redo creation of input paths for lattices. Should redo OOV handling as phrase-table

This commit is contained in:
Hieu Hoang 2014-05-01 15:10:16 +01:00
parent 1465cf94b7
commit d4b270e796
4 changed files with 58 additions and 58 deletions

View File

@ -102,6 +102,14 @@ void Manager::ProcessSentence()
}
m_transOptColl->CreateTranslationOptions();
for (size_t i = 0; i < m_transOptColl->GetInputPaths().size(); ++i) {
const InputPath &path = *m_transOptColl->GetInputPaths()[i];
if (path.GetTotalRuleSize()) {
cerr << "path=" << path << endl;
}
}
// some reporting on how long this took
IFVERBOSE(1) {
GetSentenceStats().StopTimeCollectOpts();

View File

@ -450,15 +450,11 @@ public:
stack.back().src=newSrc;
}
std::cerr << "newSrc=" << newSrc << std::endl;
std::vector<StringTgtCand> tcands;
// now, look up the target candidates (aprx. TargetPhraseCollection) for
// the current path through the CN
m_dict->GetTargetCandidates(nextP,tcands);
std::cerr << "tcands=" << tcands.size() << std::endl;
if(newRange.second>=exploredPaths.size()+newRange.first)
exploredPaths.resize(newRange.second-newRange.first+1,0);
++exploredPaths[newRange.second-newRange.first];

View File

@ -58,63 +58,58 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
path->SetNextNode(nextNode);
m_inputPathQueue.push_back(path);
// recursive
Extend(*path, input);
}
}
// iteratively extend all paths
for (size_t endPos = 1; endPos < size; ++endPos) {
const std::vector<size_t> &nextNodes = input.GetNextNodes(endPos);
// loop thru every previous paths
size_t numPrevPaths = m_inputPathQueue.size();
for (size_t i = 0; i < numPrevPaths; ++i) {
//for (size_t pathInd = 0; pathInd < prevPaths.size(); ++pathInd) {
const InputPath &prevPath = *m_inputPathQueue[i];
size_t nextNode = prevPath.GetNextNode();
if (prevPath.GetWordsRange().GetEndPos() + nextNode != endPos) {
continue;
}
size_t startPos = prevPath.GetWordsRange().GetStartPos();
if (endPos - startPos + 1 > maxPhraseLength) {
continue;
}
WordsRange range(startPos, endPos);
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
const Phrase &prevPhrase = prevPath.GetPhrase();
const ScorePair *prevInputScore = prevPath.GetInputScore();
UTIL_THROW_IF2(prevInputScore == NULL,
"Null previous score");
// loop thru every word at this position
const ConfusionNet::Column &col = input.GetColumn(endPos);
for (size_t i = 0; i < col.size(); ++i) {
const Word &word = col[i].first;
Phrase subphrase(prevPhrase);
subphrase.AddWord(word);
const ScorePair &scores = col[i].second;
ScorePair *inputScore = new ScorePair(*prevInputScore);
inputScore->PlusEquals(scores);
InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
size_t nextNode = nextNodes[i];
path->SetNextNode(nextNode);
m_inputPathQueue.push_back(path);
} // for (size_t i = 0; i < col.size(); ++i) {
} // for (size_t i = 0; i < numPrevPaths; ++i) {
}
}
void TranslationOptionCollectionLattice::Extend(const InputPath &prevPath, const WordLattice &input)
{
size_t nextPos = prevPath.GetWordsRange().GetEndPos() + 1;
if (nextPos >= input.GetSize()) {
return;
}
size_t startPos = prevPath.GetWordsRange().GetStartPos();
const Phrase &prevPhrase = prevPath.GetPhrase();
const ScorePair *prevInputScore = prevPath.GetInputScore();
UTIL_THROW_IF2(prevInputScore == NULL,
"Null previous score");
const std::vector<size_t> &nextNodes = input.GetNextNodes(nextPos);
const ConfusionNet::Column &col = input.GetColumn(nextPos);
for (size_t i = 0; i < col.size(); ++i) {
const Word &word = col[i].first;
UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
size_t nextNode = nextNodes[i];
size_t endPos = nextPos + nextNode - 1;
WordsRange range(startPos, endPos);
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
Phrase subphrase(prevPhrase);
subphrase.AddWord(word);
const ScorePair &scores = col[i].second;
ScorePair *inputScore = new ScorePair(*prevInputScore);
inputScore->PlusEquals(scores);
InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
path->SetNextNode(nextNode);
m_inputPathQueue.push_back(path);
// recursive
Extend(*path, input);
}
}
void TranslationOptionCollectionLattice::CreateTranslationOptions()
{
@ -148,7 +143,7 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
}
else if (path.GetPhrase().GetSize() == 1) {
// unknown word processing
ProcessOneUnknownWord(path, path.GetWordsRange().GetEndPos(), 1, path.GetInputScore());
ProcessOneUnknownWord(path, path.GetWordsRange().GetStartPos(), path.GetWordsRange().GetNumWordsCovered() , path.GetInputScore());
}
}

View File

@ -33,6 +33,7 @@ public:
, size_t graphInd); // do not implement
protected:
void Extend(const InputPath &prevPath, const WordLattice &input);
};