From ebae7ce52063612b18ec169b397376f6b876f4d0 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 28 Jun 2016 11:15:40 +0100 Subject: [PATCH] move ConvertFromMoses() to Moses --- OnDiskPt/OnDiskWrapper.cpp | 39 +----------------- OnDiskPt/OnDiskWrapper.h | 4 -- OnDiskPt/Vocab.cpp | 1 + .../ChartRuleLookupManagerOnDisk.cpp | 8 ++-- .../RuleTable/PhraseDictionaryOnDisk.cpp | 40 ++++++++++++++++++- .../RuleTable/PhraseDictionaryOnDisk.h | 3 ++ 6 files changed, 48 insertions(+), 47 deletions(-) diff --git a/OnDiskPt/OnDiskWrapper.cpp b/OnDiskPt/OnDiskWrapper.cpp index 57fae5162..c132d2c4a 100644 --- a/OnDiskPt/OnDiskWrapper.cpp +++ b/OnDiskPt/OnDiskWrapper.cpp @@ -23,7 +23,7 @@ #include #include #include "OnDiskWrapper.h" -#include "moses/Factor.h" +#include "moses/Util.h" #include "util/exception.hh" #include "util/string_stream.hh" @@ -219,42 +219,5 @@ uint64_t OnDiskWrapper::GetMisc(const std::string &key) const return iter->second; } -Word *OnDiskWrapper::ConvertFromMoses(const std::vector &factorsVec - , const Moses::Word &origWord) const -{ - bool isNonTerminal = origWord.IsNonTerminal(); - Word *newWord = new Word(isNonTerminal); - - util::StringStream strme; - - size_t factorType = factorsVec[0]; - const Moses::Factor *factor = origWord.GetFactor(factorType); - UTIL_THROW_IF2(factor == NULL, "Expecting factor " << factorType); - strme << factor->GetString(); - - for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) { - size_t factorType = factorsVec[ind]; - const Moses::Factor *factor = origWord.GetFactor(factorType); - if (factor == NULL) { - // can have less factors than factorType.size() - break; - } - UTIL_THROW_IF2(factor == NULL, - "Expecting factor " << factorType << " at position " << ind); - strme << "|" << factor->GetString(); - } // for (size_t factorType - - bool found; - uint64_t vocabId = m_vocab.GetVocabId(strme.str(), found); - if (!found) { - // factor not in phrase table -> phrse definately not in. exit - delete newWord; - return NULL; - } else { - newWord->SetVocabId(vocabId); - return newWord; - } -} - } diff --git a/OnDiskPt/OnDiskWrapper.h b/OnDiskPt/OnDiskWrapper.h index bf398506c..445357fe2 100644 --- a/OnDiskPt/OnDiskWrapper.h +++ b/OnDiskPt/OnDiskWrapper.h @@ -22,7 +22,6 @@ #include #include "Vocab.h" #include "PhraseNode.h" -#include "moses/Word.h" namespace OnDiskPt { @@ -107,9 +106,6 @@ public: uint64_t GetMisc(const std::string &key) const; - Word *ConvertFromMoses(const std::vector &factorsVec - , const Moses::Word &origWord) const; - }; } diff --git a/OnDiskPt/Vocab.cpp b/OnDiskPt/Vocab.cpp index dbe7b2e2f..0a95f5180 100644 --- a/OnDiskPt/Vocab.cpp +++ b/OnDiskPt/Vocab.cpp @@ -21,6 +21,7 @@ #include #include "OnDiskWrapper.h" #include "Vocab.h" +#include "moses/Util.h" #include "util/exception.hh" using namespace std; diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp index 5a5a1368a..c4959ab38 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp @@ -108,7 +108,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( // search for terminal symbol if (startPos == absEndPos) { - OnDiskPt::Word *sourceWordBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceWordLabel.GetLabel()); + OnDiskPt::Word *sourceWordBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceWordLabel.GetLabel()); if (sourceWordBerkeleyDb != NULL) { const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper); @@ -154,7 +154,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) { const Word &sourceLHS = *iterSourceLHS; - OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceLHS); + OnDiskPt::Word *sourceLHSBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceLHS); if (sourceLHSBerkeleyDb == NULL) { delete sourceLHSBerkeleyDb; @@ -190,7 +190,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( if (doSearch) { - OnDiskPt::Word *chartNonTermBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_outputFactorsVec, cellLabel.GetLabel()); + OnDiskPt::Word *chartNonTermBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_outputFactorsVec, cellLabel.GetLabel()); if (chartNonTermBerkeleyDb == NULL) continue; @@ -234,7 +234,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) { const Word &sourceLHS = *iterLabelSet; - OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(m_inputFactorsVec, sourceLHS); + OnDiskPt::Word *sourceLHSBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceLHS); if (sourceLHSBerkeleyDb == NULL) continue; diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp index 04e401080..e331a8a99 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp @@ -150,7 +150,7 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath if (prevPtNode) { Word lastWord = phrase.GetWord(phrase.GetSize() - 1); lastWord.OnlyTheseFactors(m_inputFactors); - OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord); + OnDiskPt::Word *lastWordOnDisk = ConvertFromMoses(wrapper, m_input, lastWord); TargetPhraseCollection::shared_ptr tpc; if (lastWordOnDisk == NULL) { @@ -344,6 +344,44 @@ void PhraseDictionaryOnDisk::ConvertToMoses( } } +OnDiskPt::Word *PhraseDictionaryOnDisk::ConvertFromMoses(OnDiskPt::OnDiskWrapper &wrapper, const std::vector &factorsVec + , const Moses::Word &origWord) const +{ + bool isNonTerminal = origWord.IsNonTerminal(); + OnDiskPt::Word *newWord = new OnDiskPt::Word(isNonTerminal); + + util::StringStream strme; + + size_t factorType = factorsVec[0]; + const Moses::Factor *factor = origWord.GetFactor(factorType); + UTIL_THROW_IF2(factor == NULL, "Expecting factor " << factorType); + strme << factor->GetString(); + + for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) { + size_t factorType = factorsVec[ind]; + const Moses::Factor *factor = origWord.GetFactor(factorType); + if (factor == NULL) { + // can have less factors than factorType.size() + break; + } + UTIL_THROW_IF2(factor == NULL, + "Expecting factor " << factorType << " at position " << ind); + strme << "|" << factor->GetString(); + } // for (size_t factorType + + bool found; + uint64_t vocabId = wrapper.GetVocab().GetVocabId(strme.str(), found); + if (!found) { + // factor not in phrase table -> phrse definately not in. exit + delete newWord; + return NULL; + } else { + newWord->SetVocabId(vocabId); + return newWord; + } + +} + void PhraseDictionaryOnDisk::SetParameter(const std::string& key, const std::string& value) { if (key == "max-span-default") { diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h index dc47936f6..03af73d08 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h @@ -107,6 +107,9 @@ public: , OnDiskPt::Vocab &vocab , bool isSyntax) const; + OnDiskPt::Word *ConvertFromMoses(OnDiskPt::OnDiskWrapper &wrapper, const std::vector &factorsVec + , const Moses::Word &origWord) const; + void SetParameter(const std::string& key, const std::string& value); };