diff --git a/contrib/other-builds/moses2/LM/KENLM.cpp b/contrib/other-builds/moses2/LM/KENLM.cpp index 570f45a14..c4f001f4a 100644 --- a/contrib/other-builds/moses2/LM/KENLM.cpp +++ b/contrib/other-builds/moses2/LM/KENLM.cpp @@ -62,7 +62,7 @@ public: void Add(lm::WordIndex index, const StringPiece &str) { - std::size_t factorId = m_factorCollection.AddFactor(str, m_system)->GetId(); + std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId(); if (m_mapping.size() <= factorId) { // 0 is :-) m_mapping.resize(factorId + 1); diff --git a/contrib/other-builds/moses2/SCFG/Word.cpp b/contrib/other-builds/moses2/SCFG/Word.cpp index cdb7f917f..b5d538c56 100644 --- a/contrib/other-builds/moses2/SCFG/Word.cpp +++ b/contrib/other-builds/moses2/SCFG/Word.cpp @@ -14,6 +14,12 @@ namespace Moses2 { namespace SCFG { +Word::Word(const Word ©) +:Moses2::Word(copy) +,isNonTerminal(copy.isNonTerminal) +{ +} + void Word::CreateFromString(FactorCollection &vocab, const System &system, const std::string &str) @@ -46,7 +52,7 @@ void Word::CreateFromString(FactorCollection &vocab, const string &tok = toks[i]; //cerr << "tok=" << tok << endl; - const Factor *factor = vocab.AddFactor(tok, system, false); + const Factor *factor = vocab.AddFactor(tok, system, isNonTerminal); m_factors[i] = factor; } } diff --git a/contrib/other-builds/moses2/SCFG/Word.h b/contrib/other-builds/moses2/SCFG/Word.h index bc42a4da4..3b1cbc62a 100644 --- a/contrib/other-builds/moses2/SCFG/Word.h +++ b/contrib/other-builds/moses2/SCFG/Word.h @@ -19,6 +19,9 @@ class Word: public Moses2::Word public: bool isNonTerminal; + Word() {} + Word(const Word ©); + void CreateFromString(FactorCollection &vocab, const System &system, const std::string &str); diff --git a/contrib/other-builds/moses2/TranslationModel/Memory/Node.h b/contrib/other-builds/moses2/TranslationModel/Memory/Node.h index afb42a8ee..32f1bbac7 100644 --- a/contrib/other-builds/moses2/TranslationModel/Memory/Node.h +++ b/contrib/other-builds/moses2/TranslationModel/Memory/Node.h @@ -99,6 +99,12 @@ public: const Children &GetChildren() const { return m_children; } + void Debug() const { + BOOST_FOREACH(const typename Children::value_type &valPair, m_children) { + const WORD &word = valPair.first; + std::cerr << word << "(" << word.hash() << ") "; + } + } protected: Children m_children; TPS *m_targetPhrases; @@ -119,6 +125,8 @@ protected: else { const WORD &word = source[pos]; Node &child = m_children[word]; + std::cerr << "added " << word << " " << &child << " from " << this << std::endl; + return child.AddRule(source, target, pos + 1); } } diff --git a/contrib/other-builds/moses2/TranslationModel/Memory/PhraseTableMemory.cpp b/contrib/other-builds/moses2/TranslationModel/Memory/PhraseTableMemory.cpp index ce7a796f8..357177bbb 100644 --- a/contrib/other-builds/moses2/TranslationModel/Memory/PhraseTableMemory.cpp +++ b/contrib/other-builds/moses2/TranslationModel/Memory/PhraseTableMemory.cpp @@ -75,6 +75,7 @@ void PhraseTableMemory::Load(System &system) TokenizeMultiCharSeparator(toks, line, "|||"); UTIL_THROW_IF2(toks.size() < 3, "Wrong format"); //cerr << "line=" << line << endl; + //cerr << "system.isPb=" << system.isPb << endl; if (system.isPb) { PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, @@ -101,7 +102,7 @@ void PhraseTableMemory::Load(System &system) else { SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, toks[0]); - //cerr << "created soure" << endl; + //cerr << "created source:" << *source << endl; SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this, system, toks[1]); target->SetAlignmentInfo(toks[3]); @@ -273,7 +274,17 @@ void PhraseTableMemory::LookupGivenNode(const SCFGNODE &node, { size_t ptInd = GetPtInd(); const SCFGNODE *nextNode = node.Find(wordSought); - cerr << " nextNode=" << nextNode << endl; + + cerr << " finding " << wordSought + << " from " << &node + << " found " << nextNode << endl; + + if (nextNode == NULL) { + cerr << " " << wordSought << "(" << wordSought.hash() << ")" + << " node contains:"; + node.Debug(); + cerr << endl; + } if (nextNode) { // new entries diff --git a/contrib/other-builds/moses2/TranslationModel/ProbingPT.cpp b/contrib/other-builds/moses2/TranslationModel/ProbingPT.cpp index daa78dba2..288268baf 100644 --- a/contrib/other-builds/moses2/TranslationModel/ProbingPT.cpp +++ b/contrib/other-builds/moses2/TranslationModel/ProbingPT.cpp @@ -51,7 +51,7 @@ void ProbingPT::Load(System &system) for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end(); ++iterSource) { const string &wordStr = iterSource->second; - const Factor *factor = vocab.AddFactor(wordStr, system); + const Factor *factor = vocab.AddFactor(wordStr, system, false); uint64_t probingId = iterSource->first; size_t factorId = factor->GetId(); @@ -68,7 +68,7 @@ void ProbingPT::Load(System &system) while (getline(targetVocabStrme, line)) { vector toks = Tokenize(line, "\t"); assert(toks.size()); - const Factor *factor = vocab.AddFactor(toks[0], system); + const Factor *factor = vocab.AddFactor(toks[0], system, false); uint32_t probingId = Scan(toks[1]); if (probingId >= m_targetVocab.size()) { diff --git a/contrib/other-builds/moses2/legacy/FactorCollection.h b/contrib/other-builds/moses2/legacy/FactorCollection.h index bb9b1e137..0430e5cde 100644 --- a/contrib/other-builds/moses2/legacy/FactorCollection.h +++ b/contrib/other-builds/moses2/legacy/FactorCollection.h @@ -114,7 +114,7 @@ public: * If a factor already exist in the collection, return the existing factor, if not create a new 1 */ const Factor *AddFactor(const StringPiece &factorString, const System &system, - bool isNonTerminal = false); + bool isNonTerminal); size_t GetNumNonTerminals() {