bug fixes

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4068 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
leven101 2011-07-06 17:25:54 +00:00
parent a57e71a13f
commit cd96c02748
2 changed files with 24 additions and 4 deletions

View File

@ -463,8 +463,10 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
m_srcVocab->MakeOpen();
wordID_t sIDs[sphrase.GetSize()];
// store words in vocabulary and corpus
for(int i = sphrase.GetSize()-1; i >= 0; --i) {
sIDs[i] = m_srcVocab->GetWordID(sphrase.GetWord(i)); // get vocab id backwards
}
for(size_t i = 0; i < sphrase.GetSize(); ++i) {
sIDs[i] = m_srcVocab->GetWordID(sphrase.GetWord(i)); // get vocab id
srcFactor.push_back(sIDs[i]);
cerr << "srcFactor[" << (srcFactor.size() - 1) << "] = " << srcFactor.back() << endl;
m_srcCorpus->push_back(srcFactor.back()); // add word to corpus
@ -474,18 +476,25 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
Phrase tphrase(Output, ARRAY_SIZE_INCR);
tphrase.CreateFromString(m_outputFactors, target, factorDelimiter);
m_trgVocab->MakeOpen();
wordID_t tIDs[tphrase.GetSize()];
for(int i = tphrase.GetSize()-1; i >= 0; --i) {
tIDs[i] = m_trgVocab->GetWordID(tphrase.GetWord(i)); // get vocab id
}
for(size_t i = 0; i < tphrase.GetSize(); ++i) {
trgFactor.push_back(m_trgVocab->GetWordID(tphrase.GetWord(i))); // get vocab id
trgFactor.push_back(tIDs[i]);
cerr << "trgFactor[" << (trgFactor.size() - 1) << "] = " << trgFactor.back() << endl;
m_trgCorpus->push_back(trgFactor.back());
}
cerr << "gets to 1\n";
m_trgSntBreaks.push_back(oldTrgCrpSize);
cerr << "gets to 2\n";
m_srcSA->Insert(&srcFactor, oldSrcCrpSize);
cerr << "gets to 3\n";
//m_trgSA->Insert(&trgFactor, oldTrgCrpSize);
LoadRawAlignments(alignment);
m_trgVocab->MakeClosed();
for(size_t i=0; i < sphrase.GetSize(); ++i)
ClearWordInCache(sIDs[i]);
//for(size_t i=0; i < sphrase.GetSize(); ++i)
//ClearWordInCache(sIDs[i]);
}
void BilingualDynSuffixArray::ClearWordInCache(wordID_t srcWord) {

View File

@ -72,6 +72,14 @@ int DynSuffixArray::Rank(unsigned word, unsigned idx)
int DynSuffixArray::F_firstIdx(unsigned word)
{
// return index of first row where word is found in m_F
/*for(int i=0; i < m_F->size(); ++i) {
if(m_F->at(i) == word) {
return i;
}
}
return -1;*/
//NOTE: lower_bound is faster than linear search above but may cause issues
// if ordering of vocab is not consecutive (ie..after deletions)
int low = std::lower_bound(m_F->begin(), m_F->end(), word) - m_F->begin();
//cerr << "in F_firstIdx with word = " << word << " and low = " << low << " and F->size() =" << m_F->size() << endl;
if(low >= m_F->size())
@ -132,6 +140,7 @@ void DynSuffixArray::Insert(vuint_t* newSent, unsigned newIndex)
}
// Begin stage 4
Reorder(true_pos, LastFirstFunc(kprime)); // actual position vs computed position of cycle (newIndex-1)
cerr << "GETS HERE 13\n";
}
void DynSuffixArray::Reorder(unsigned j, unsigned jprime)
@ -141,6 +150,7 @@ void DynSuffixArray::Reorder(unsigned j, unsigned jprime)
//cerr << "j=" << j << "\tj'=" << jprime << endl;
int tmp, isaIdx(-1);
int new_j = LastFirstFunc(j);
cerr << "new_j = " << new_j << endl;
// for SA, L, and F, the element at pos j is moved to j'
tmp = m_L->at(j); // L
m_L->at(j) = m_L->at(jprime);
@ -161,6 +171,7 @@ void DynSuffixArray::Reorder(unsigned j, unsigned jprime)
j = new_j;
jprime = LastFirstFunc(jprime);
}
//cerr << "j=" << j << "\tj'=" << jprime << endl;
}
void DynSuffixArray::Delete(unsigned index, unsigned num2del)