From cd96c02748e78b032b857d8e39bb5da6af73150f Mon Sep 17 00:00:00 2001 From: leven101 Date: Wed, 6 Jul 2011 17:25:54 +0000 Subject: [PATCH] bug fixes git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4068 1f5c12ca-751b-0410-a591-d2e778427230 --- moses/src/BilingualDynSuffixArray.cpp | 17 +++++++++++++---- moses/src/DynSuffixArray.cpp | 11 +++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/moses/src/BilingualDynSuffixArray.cpp b/moses/src/BilingualDynSuffixArray.cpp index daa7bd6de..436ee0382 100644 --- a/moses/src/BilingualDynSuffixArray.cpp +++ b/moses/src/BilingualDynSuffixArray.cpp @@ -463,8 +463,10 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string& m_srcVocab->MakeOpen(); wordID_t sIDs[sphrase.GetSize()]; // store words in vocabulary and corpus + for(int i = sphrase.GetSize()-1; i >= 0; --i) { + sIDs[i] = m_srcVocab->GetWordID(sphrase.GetWord(i)); // get vocab id backwards + } for(size_t i = 0; i < sphrase.GetSize(); ++i) { - sIDs[i] = m_srcVocab->GetWordID(sphrase.GetWord(i)); // get vocab id srcFactor.push_back(sIDs[i]); cerr << "srcFactor[" << (srcFactor.size() - 1) << "] = " << srcFactor.back() << endl; m_srcCorpus->push_back(srcFactor.back()); // add word to corpus @@ -474,18 +476,25 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string& Phrase tphrase(Output, ARRAY_SIZE_INCR); tphrase.CreateFromString(m_outputFactors, target, factorDelimiter); m_trgVocab->MakeOpen(); + wordID_t tIDs[tphrase.GetSize()]; + for(int i = tphrase.GetSize()-1; i >= 0; --i) { + tIDs[i] = m_trgVocab->GetWordID(tphrase.GetWord(i)); // get vocab id + } for(size_t i = 0; i < tphrase.GetSize(); ++i) { - trgFactor.push_back(m_trgVocab->GetWordID(tphrase.GetWord(i))); // get vocab id + trgFactor.push_back(tIDs[i]); cerr << "trgFactor[" << (trgFactor.size() - 1) << "] = " << trgFactor.back() << endl; m_trgCorpus->push_back(trgFactor.back()); } + cerr << "gets to 1\n"; m_trgSntBreaks.push_back(oldTrgCrpSize); + cerr << "gets to 2\n"; m_srcSA->Insert(&srcFactor, oldSrcCrpSize); + cerr << "gets to 3\n"; //m_trgSA->Insert(&trgFactor, oldTrgCrpSize); LoadRawAlignments(alignment); m_trgVocab->MakeClosed(); - for(size_t i=0; i < sphrase.GetSize(); ++i) - ClearWordInCache(sIDs[i]); + //for(size_t i=0; i < sphrase.GetSize(); ++i) + //ClearWordInCache(sIDs[i]); } void BilingualDynSuffixArray::ClearWordInCache(wordID_t srcWord) { diff --git a/moses/src/DynSuffixArray.cpp b/moses/src/DynSuffixArray.cpp index a1896ec7d..5e70b7fdd 100644 --- a/moses/src/DynSuffixArray.cpp +++ b/moses/src/DynSuffixArray.cpp @@ -72,6 +72,14 @@ int DynSuffixArray::Rank(unsigned word, unsigned idx) int DynSuffixArray::F_firstIdx(unsigned word) { // return index of first row where word is found in m_F + /*for(int i=0; i < m_F->size(); ++i) { + if(m_F->at(i) == word) { + return i; + } + } + return -1;*/ + //NOTE: lower_bound is faster than linear search above but may cause issues + // if ordering of vocab is not consecutive (ie..after deletions) int low = std::lower_bound(m_F->begin(), m_F->end(), word) - m_F->begin(); //cerr << "in F_firstIdx with word = " << word << " and low = " << low << " and F->size() =" << m_F->size() << endl; if(low >= m_F->size()) @@ -132,6 +140,7 @@ void DynSuffixArray::Insert(vuint_t* newSent, unsigned newIndex) } // Begin stage 4 Reorder(true_pos, LastFirstFunc(kprime)); // actual position vs computed position of cycle (newIndex-1) + cerr << "GETS HERE 13\n"; } void DynSuffixArray::Reorder(unsigned j, unsigned jprime) @@ -141,6 +150,7 @@ void DynSuffixArray::Reorder(unsigned j, unsigned jprime) //cerr << "j=" << j << "\tj'=" << jprime << endl; int tmp, isaIdx(-1); int new_j = LastFirstFunc(j); + cerr << "new_j = " << new_j << endl; // for SA, L, and F, the element at pos j is moved to j' tmp = m_L->at(j); // L m_L->at(j) = m_L->at(jprime); @@ -161,6 +171,7 @@ void DynSuffixArray::Reorder(unsigned j, unsigned jprime) j = new_j; jprime = LastFirstFunc(jprime); } + //cerr << "j=" << j << "\tj'=" << jprime << endl; } void DynSuffixArray::Delete(unsigned index, unsigned num2del)