diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp index 14c3e3e26..1326e5c35 100644 --- a/moses/ChartParser.cpp +++ b/moses/ChartParser.cpp @@ -189,16 +189,17 @@ void ChartParser::CreateInputPaths(const InputType &input) size_t endPos = startPos + phaseSize -1; vector &vec = m_targetPhrasesfromPt[startPos]; - Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos))); WordsRange range(startPos, endPos); + Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos))); + const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos); InputPath *node; if (range.GetNumWordsCovered() == 1) { - node = new InputPath(subphrase, range, NULL, NULL); + node = new InputPath(subphrase, labels, range, NULL, NULL); vec.push_back(node); } else { const InputPath &prevNode = GetInputPath(startPos, endPos - 1); - node = new InputPath(subphrase, range, &prevNode, NULL); + node = new InputPath(subphrase, labels, range, &prevNode, NULL); vec.push_back(node); } @@ -207,6 +208,13 @@ void ChartParser::CreateInputPaths(const InputType &input) } } +const InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos) const +{ + size_t offset = endPos - startPos; + CHECK(offset < m_targetPhrasesfromPt[startPos].size()); + return *m_targetPhrasesfromPt[startPos][offset]; +} + InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos) { size_t offset = endPos - startPos; @@ -219,4 +227,10 @@ const Sentence &ChartParser::GetSentence() const { return sentence; } +size_t ChartParser::GetSize() const +{ + return m_source.GetSize(); +} + + } // namespace Moses diff --git a/moses/ChartParser.h b/moses/ChartParser.h index fefbbd8b7..c47c1d8d2 100644 --- a/moses/ChartParser.h +++ b/moses/ChartParser.h @@ -63,6 +63,8 @@ public: //! the sentence being decoded const Sentence &GetSentence() const; + size_t GetSize() const; + const InputPath &GetInputPath(size_t startPos, size_t endPos) const; private: ChartParserUnknown m_unknown; diff --git a/moses/ChartRuleLookupManager.h b/moses/ChartRuleLookupManager.h index 0d26bd7ee..6c088fda6 100644 --- a/moses/ChartRuleLookupManager.h +++ b/moses/ChartRuleLookupManager.h @@ -51,6 +51,8 @@ public: return m_cellCollection.GetBase(WordsRange(begin, end)).GetTargetLabelSet(); } + const ChartParser &GetParser() const + { return m_parser; } const Sentence &GetSentence() const; const ChartCellLabel &GetSourceAt(size_t at) const { diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp index 3884fb4c2..6d895aadd 100644 --- a/moses/ConfusionNet.cpp +++ b/moses/ConfusionNet.cpp @@ -64,6 +64,11 @@ ConfusionNet::ConfusionNet() : InputType() { stats.createOne(); + + const StaticData& staticData = StaticData::Instance(); + if (staticData.IsChart()) { + m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal()); + } } ConfusionNet::~ConfusionNet() { diff --git a/moses/ConfusionNet.h b/moses/ConfusionNet.h index c9c83e154..ea68ce570 100644 --- a/moses/ConfusionNet.h +++ b/moses/ConfusionNet.h @@ -7,6 +7,7 @@ #include #include "Word.h" #include "InputType.h" +#include "NonTerminal.h" namespace Moses { @@ -25,6 +26,7 @@ public: protected: std::vector data; + NonTerminalSet m_defaultLabelSet; bool ReadFormat0(std::istream&,const std::vector& factorOrder); bool ReadFormat1(std::istream&,const std::vector& factorOrder); @@ -71,8 +73,7 @@ public: TranslationOptionCollection* CreateTranslationOptionCollection() const; const NonTerminalSet &GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const { - CHECK(false); - return *(new NonTerminalSet()); + return m_defaultLabelSet; } }; diff --git a/moses/InputPath.cpp b/moses/InputPath.cpp index 2c87a131e..fad2b3fc0 100644 --- a/moses/InputPath.cpp +++ b/moses/InputPath.cpp @@ -10,10 +10,11 @@ using namespace std; namespace Moses { -InputPath::InputPath(const Phrase &phrase, const WordsRange &range, const InputPath *prevNode +InputPath::InputPath(const Phrase &phrase, const NonTerminalSet &sourceNonTerms, const WordsRange &range, const InputPath *prevNode ,const ScoreComponentCollection *inputScore) :m_prevNode(prevNode) ,m_phrase(phrase) + ,m_sourceNonTerms(sourceNonTerms) ,m_range(range) ,m_inputScore(inputScore) { diff --git a/moses/InputPath.h b/moses/InputPath.h index 7997cfe5f..c6d022c9e 100644 --- a/moses/InputPath.h +++ b/moses/InputPath.h @@ -5,6 +5,7 @@ #include #include "Phrase.h" #include "WordsRange.h" +#include "NonTerminal.h" namespace Moses { @@ -33,6 +34,7 @@ protected: WordsRange m_range; const ScoreComponentCollection *m_inputScore; std::map > m_targetPhrases; + const NonTerminalSet m_sourceNonTerms; std::vector m_placeholders; @@ -44,13 +46,16 @@ public: , m_inputScore(NULL) { } - InputPath(const Phrase &phrase, const WordsRange &range, const InputPath *prevNode + InputPath(const Phrase &phrase, const NonTerminalSet &sourceNonTerms, const WordsRange &range, const InputPath *prevNode ,const ScoreComponentCollection *inputScore); ~InputPath(); const Phrase &GetPhrase() const { return m_phrase; } + const NonTerminalSet &GetNonTerminalSet() const { + return m_sourceNonTerms; + } const WordsRange &GetWordsRange() const { return m_range; } diff --git a/moses/NonTerminal.cpp b/moses/NonTerminal.cpp index 37ebc9b68..032a50678 100644 --- a/moses/NonTerminal.cpp +++ b/moses/NonTerminal.cpp @@ -1,3 +1,20 @@ #include "NonTerminal.h" +using namespace std; + +namespace Moses { + std::ostream& operator<<(std::ostream &out, const NonTerminalSet &obj) + { + NonTerminalSet::const_iterator iter; + for (iter = obj.begin(); iter != obj.end(); ++iter) { + const Word &word = *iter; + out << word << " "; + } + + + return out; + } + + +} diff --git a/moses/NonTerminal.h b/moses/NonTerminal.h index 393e32fa1..a7201da9e 100644 --- a/moses/NonTerminal.h +++ b/moses/NonTerminal.h @@ -22,6 +22,7 @@ #include "Factor.h" #include "Word.h" +#include #include #include @@ -61,4 +62,6 @@ typedef boost::unordered_set NonTerminalSet; +std::ostream& operator<<(std::ostream&, const NonTerminalSet&); + } // namespace Moses diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp index 22218b37b..265a99d27 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp @@ -17,6 +17,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ +#include #include "ChartRuleLookupManagerMemory.h" #include "DotChartInMemory.h" @@ -28,6 +29,8 @@ #include "moses/ChartCellCollection.h" #include "moses/TranslationModel/PhraseDictionaryMemory.h" +using namespace std; + namespace Moses { @@ -40,8 +43,7 @@ ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory( { CHECK(m_dottedRuleColls.size() == 0); - const Sentence &src = parser.GetSentence(); - size_t sourceSize = src.GetSize(); + size_t sourceSize = parser.GetSize(); m_dottedRuleColls.resize(sourceSize); const PhraseDictionaryNodeMemory &rootNode = m_ruleTable.GetRootNode(); @@ -178,8 +180,8 @@ void ChartRuleLookupManagerMemory::ExtendPartialRuleApplication( DottedRuleColl & dottedRuleColl) { // source non-terminal labels for the remainder - const NonTerminalSet &sourceNonTerms = - GetSentence().GetLabelSet(startPos, endPos); + const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos); + const NonTerminalSet &sourceNonTerms = inputPath.GetNonTerminalSet(); // target non-terminal labels for the remainder const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos); diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp index b7bbe2a03..5d15ea67a 100644 --- a/moses/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/TranslationOptionCollectionConfusionNet.cpp @@ -33,6 +33,7 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet InputPathList &list = vec.back(); WordsRange range(startPos, startPos); + const NonTerminalSet &labels = input.GetLabelSet(startPos, startPos); const ConfusionNet::Column &col = input.GetColumn(startPos); for (size_t i = 0; i < col.size(); ++i) { @@ -44,7 +45,7 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet ScoreComponentCollection *inputScore = new ScoreComponentCollection(); inputScore->Assign(inputFeature, scores); - InputPath *node = new InputPath(subphrase, range, NULL, inputScore); + InputPath *node = new InputPath(subphrase, labels, range, NULL, inputScore); list.push_back(node); m_phraseDictionaryQueue.push_back(node); @@ -55,13 +56,14 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet for (size_t phaseSize = 2; phaseSize <= size; ++phaseSize) { for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) { size_t endPos = startPos + phaseSize -1; + WordsRange range(startPos, endPos); + const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos); vector &vec = m_targetPhrasesfromPt[startPos]; vec.push_back(InputPathList()); InputPathList &list = vec.back(); - // loop thru every previous path const InputPathList &prevNodes = GetInputPathList(startPos, endPos - 1); @@ -88,7 +90,7 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet ScoreComponentCollection *inputScore = new ScoreComponentCollection(*prevInputScore); inputScore->PlusEquals(inputFeature, scores); - InputPath *node = new InputPath(subphrase, range, &prevNode, inputScore); + InputPath *node = new InputPath(subphrase, labels, range, &prevNode, inputScore); list.push_back(node); m_phraseDictionaryQueue.push_back(node); diff --git a/moses/TranslationOptionCollectionText.cpp b/moses/TranslationOptionCollectionText.cpp index cd112e369..c0803a777 100644 --- a/moses/TranslationOptionCollectionText.cpp +++ b/moses/TranslationOptionCollectionText.cpp @@ -42,16 +42,17 @@ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const size_t endPos = startPos + phaseSize -1; vector &vec = m_targetPhrasesfromPt[startPos]; - Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos))); WordsRange range(startPos, endPos); + Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos))); + const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos); InputPath *node; if (range.GetNumWordsCovered() == 1) { - node = new InputPath(subphrase, range, NULL, NULL); + node = new InputPath(subphrase, labels, range, NULL, NULL); vec.push_back(node); } else { const InputPath &prevNode = GetInputPath(startPos, endPos - 1); - node = new InputPath(subphrase, range, &prevNode, NULL); + node = new InputPath(subphrase, labels, range, &prevNode, NULL); vec.push_back(node); } diff --git a/moses/WordLattice.h b/moses/WordLattice.h index 3fb3beba8..f9b20fc8e 100644 --- a/moses/WordLattice.h +++ b/moses/WordLattice.h @@ -40,11 +40,6 @@ public: */ void GetAsEdgeMatrix(std::vector >& edges) const; - const NonTerminalSet &GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const { - CHECK(false); - return *(new NonTerminalSet()); - } - }; }