From 2683b58b537e12b23993c99f3a4e38b4106b2b34 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sat, 17 Oct 2015 21:43:03 +0100 Subject: [PATCH] clean up comparison functions for Words and Phrases --- moses/ChartCellLabel.h | 9 --------- moses/FF/BleuScoreFeature.cpp | 6 +----- moses/FF/ControlRecombination.cpp | 3 +-- moses/FF/NieceTerminal.cpp | 7 +++---- moses/FF/NieceTerminal.h | 4 ++-- moses/FF/TargetNgramFeature.cpp | 14 +++++++------- moses/FF/TargetNgramFeature.h | 6 ++---- moses/GenerationDictionary.h | 4 ++-- moses/Syntax/F2S/Manager.h | 2 +- moses/Syntax/Manager.cpp | 2 +- moses/Syntax/Manager.h | 3 ++- moses/Syntax/S2T/Manager-inl.h | 2 +- moses/Syntax/S2T/Manager.h | 2 +- moses/TargetPhrase.h | 23 ----------------------- moses/Word.h | 18 +++++------------- 15 files changed, 29 insertions(+), 76 deletions(-) diff --git a/moses/ChartCellLabel.h b/moses/ChartCellLabel.h index c67d985b2..18fa8b850 100644 --- a/moses/ChartCellLabel.h +++ b/moses/ChartCellLabel.h @@ -78,15 +78,6 @@ public: return m_bestScore; } - bool operator<(const ChartCellLabel &other) const { - // m_coverage and m_label uniquely identify a ChartCellLabel, so don't - // need to compare m_stack. - if (m_coverage == other.m_coverage) { - return m_label < other.m_label; - } - return m_coverage < other.m_coverage; - } - private: const WordsRange &m_coverage; const Word &m_label; diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp index cbf891ccf..2cbf26efc 100644 --- a/moses/FF/BleuScoreFeature.cpp +++ b/moses/FF/BleuScoreFeature.cpp @@ -40,11 +40,7 @@ bool BleuScoreState::operator==(const FFState& o) const return true; const BleuScoreState& other = static_cast(o); - int c = m_words.Compare(other.m_words); - if (c == 0) - return true; - - return false; + return m_words == other.m_words; } std::ostream& operator<<(std::ostream& out, const BleuScoreState& state) diff --git a/moses/FF/ControlRecombination.cpp b/moses/FF/ControlRecombination.cpp index 5b7d2eb08..10c2898b1 100644 --- a/moses/FF/ControlRecombination.cpp +++ b/moses/FF/ControlRecombination.cpp @@ -50,8 +50,7 @@ bool ControlRecombinationState::operator==(const FFState& other) const const ControlRecombinationState &otherFF = static_cast(other); if (m_ff.GetType() == SameOutput) { - int ret = m_outputPhrase.Compare(otherFF.m_outputPhrase); - return ret == 0; + return m_outputPhrase == otherFF.m_outputPhrase; } else { // compare hypo address. Won't be equal unless they're actually the same hypo if (m_hypo == otherFF.m_hypo) diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp index 6bd65f37c..a467ce2b1 100644 --- a/moses/FF/NieceTerminal.cpp +++ b/moses/FF/NieceTerminal.cpp @@ -1,5 +1,4 @@ #include -#include #include "NieceTerminal.h" #include "moses/ScoreComponentCollection.h" #include "moses/TargetPhrase.h" @@ -45,7 +44,7 @@ void NieceTerminal::EvaluateWithSourceContext(const InputType &input const Phrase *ruleSource = targetPhrase.GetRuleSource(); assert(ruleSource); - std::set terms; + boost::unordered_set terms; for (size_t i = 0; i < ruleSource->GetSize(); ++i) { const Word &word = ruleSource->GetWord(i); if (!word.IsNonTerminal()) { @@ -81,9 +80,9 @@ void NieceTerminal::EvaluateWhenApplied(const ChartHypothesis &hypo, bool NieceTerminal::ContainTerm(const InputType &input, const WordsRange &ntRange, - const std::set &terms) const + const boost::unordered_set &terms) const { - std::set::const_iterator iter; + boost::unordered_set::const_iterator iter; for (size_t pos = ntRange.GetStartPos(); pos <= ntRange.GetEndPos(); ++pos) { const Word &word = input.GetWord(pos); diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h index 2ee019443..008e34212 100644 --- a/moses/FF/NieceTerminal.h +++ b/moses/FF/NieceTerminal.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include "StatelessFeatureFunction.h" @@ -46,7 +46,7 @@ protected: bool m_hardConstraint; bool ContainTerm(const InputType &input, const WordsRange &ntRange, - const std::set &terms) const; + const boost::unordered_set &terms) const; }; } diff --git a/moses/FF/TargetNgramFeature.cpp b/moses/FF/TargetNgramFeature.cpp index 7f8da1979..ee2b46554 100644 --- a/moses/FF/TargetNgramFeature.cpp +++ b/moses/FF/TargetNgramFeature.cpp @@ -21,23 +21,23 @@ size_t TargetNgramState::hash() const bool TargetNgramState::operator==(const FFState& other) const { const TargetNgramState& rhs = dynamic_cast(other); - int result; + bool result; if (m_words.size() == rhs.m_words.size()) { for (size_t i = 0; i < m_words.size(); ++i) { - result = Word::Compare(m_words[i],rhs.m_words[i]); - if (result != 0) return false; + result = m_words[i] == rhs.m_words[i]; + if (!result) return false; } return true; } else if (m_words.size() < rhs.m_words.size()) { for (size_t i = 0; i < m_words.size(); ++i) { - result = Word::Compare(m_words[i],rhs.m_words[i]); - if (result != 0) return false; + result = m_words[i] == rhs.m_words[i]; + if (!result) return false; } return true; } else { for (size_t i = 0; i < rhs.m_words.size(); ++i) { - result = Word::Compare(m_words[i],rhs.m_words[i]); - if (result != 0) return false; + result = m_words[i] == rhs.m_words[i]; + if (!result) return false; } return true; } diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h index 7826cdf4f..7c694a65e 100644 --- a/moses/FF/TargetNgramFeature.h +++ b/moses/FF/TargetNgramFeature.h @@ -182,14 +182,12 @@ public: // prefix if (m_startPos > 0) { // not for " ..." - int ret = GetPrefix().Compare(other.GetPrefix()); - if (ret != 0) + if (GetPrefix() != other.GetPrefix()) return false; } if (m_endPos < m_inputSize - 1) { // not for "... " - int ret = GetSuffix().Compare(other.GetSuffix()); - if (ret != 0) + if (GetSuffix() != other.GetSuffix()) return false; } return true; diff --git a/moses/GenerationDictionary.h b/moses/GenerationDictionary.h index 33121315d..c77fa6fa9 100644 --- a/moses/GenerationDictionary.h +++ b/moses/GenerationDictionary.h @@ -23,9 +23,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #define moses_GenerationDictionary_h #include -#include #include #include +#include #include "ScoreComponentCollection.h" #include "Phrase.h" #include "TypeDef.h" @@ -36,7 +36,7 @@ namespace Moses class FactorCollection; -typedef std::map < Word , ScoreComponentCollection > OutputWordCollection; +typedef boost::unordered_map < Word , ScoreComponentCollection > OutputWordCollection; // 1st = output phrase // 2nd = log probability (score) diff --git a/moses/Syntax/F2S/Manager.h b/moses/Syntax/F2S/Manager.h index bcf1ff2bd..7514338f7 100644 --- a/moses/Syntax/F2S/Manager.h +++ b/moses/Syntax/F2S/Manager.h @@ -1,10 +1,10 @@ #pragma once -#include #include #include #include +#include #include "moses/InputType.h" #include "moses/Syntax/KBestExtractor.h" diff --git a/moses/Syntax/Manager.cpp b/moses/Syntax/Manager.cpp index 5dd4c3b64..10b0d25c9 100644 --- a/moses/Syntax/Manager.cpp +++ b/moses/Syntax/Manager.cpp @@ -65,7 +65,7 @@ void Manager::OutputUnknowns(OutputCollector *collector) const long translationId = m_source.GetTranslationId(); std::ostringstream out; - for (std::set::const_iterator p = m_oovs.begin(); + for (boost::unordered_set::const_iterator p = m_oovs.begin(); p != m_oovs.end(); ++p) { out << *p; } diff --git a/moses/Syntax/Manager.h b/moses/Syntax/Manager.h index d62e2f72a..a80c756a8 100644 --- a/moses/Syntax/Manager.h +++ b/moses/Syntax/Manager.h @@ -1,5 +1,6 @@ #pragma once +#include #include "moses/InputType.h" #include "moses/BaseManager.h" @@ -50,7 +51,7 @@ public: virtual const SHyperedge *GetBestSHyperedge() const = 0; protected: - std::set m_oovs; + boost::unordered_set m_oovs; private: // Syntax-specific helper functions used to implement OutputNBest. diff --git a/moses/Syntax/S2T/Manager-inl.h b/moses/Syntax/S2T/Manager-inl.h index 67588eb94..422f14915 100644 --- a/moses/Syntax/S2T/Manager-inl.h +++ b/moses/Syntax/S2T/Manager-inl.h @@ -108,7 +108,7 @@ void Manager::InitializeParsers(PChart &pchart, // Find the set of OOVs for this input. This function assumes that the // PChart argument has already been initialized from the input. template -void Manager::FindOovs(const PChart &pchart, std::set &oovs, +void Manager::FindOovs(const PChart &pchart, boost::unordered_set &oovs, std::size_t maxOovWidth) { // Get the set of RuleTries. diff --git a/moses/Syntax/S2T/Manager.h b/moses/Syntax/S2T/Manager.h index 711d6f9d8..b0e6555cf 100644 --- a/moses/Syntax/S2T/Manager.h +++ b/moses/Syntax/S2T/Manager.h @@ -45,7 +45,7 @@ public: void OutputDetailedTranslationReport(OutputCollector *collector) const; private: - void FindOovs(const PChart &, std::set &, std::size_t); + void FindOovs(const PChart &, boost::unordered_set &, std::size_t); void InitializeCharts(); diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h index 56ed27af3..252afefc1 100644 --- a/moses/TargetPhrase.h +++ b/moses/TargetPhrase.h @@ -230,29 +230,6 @@ void swap(TargetPhrase &first, TargetPhrase &second); std::ostream& operator<<(std::ostream&, const TargetPhrase&); -/** - * Hasher that looks at source and target phrase. - **/ -struct TargetPhraseHasher { - inline size_t operator()(const TargetPhrase& targetPhrase) const { - size_t seed = 0; - boost::hash_combine(seed, targetPhrase); - boost::hash_combine(seed, targetPhrase.GetAlignTerm()); - boost::hash_combine(seed, targetPhrase.GetAlignNonTerm()); - - return seed; - } -}; - -struct TargetPhraseComparator { - inline bool operator()(const TargetPhrase& lhs, const TargetPhrase& rhs) const { - return lhs.Compare(rhs) == 0 && - lhs.GetAlignTerm() == rhs.GetAlignTerm() && - lhs.GetAlignNonTerm() == rhs.GetAlignNonTerm(); - } - -}; - } #endif diff --git a/moses/Word.h b/moses/Word.h index d2b85296b..59f514445 100644 --- a/moses/Word.h +++ b/moses/Word.h @@ -116,14 +116,6 @@ public: StringPiece GetString(FactorType factorType) const; TO_STRING(); - //! transitive comparison of Word objects - inline bool operator< (const Word &compare) const { - // needed to store word in GenerationDictionary map - // uses comparison of FactorKey - // 'proper' comparison, not address/id comparison - return Compare(*this, compare) < 0; - } - bool operator== (const Word &compare) const; inline bool operator!= (const Word &compare) const { @@ -153,6 +145,11 @@ public: } }; +inline size_t hash_value(const Word& word) +{ + return word.hash(); +} + struct WordComparer { size_t operator()(const Word* word) const { return word->hash(); @@ -165,11 +162,6 @@ struct WordComparer { }; -inline size_t hash_value(const Word& word) -{ - return word.hash(); -} - } #endif