mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 11:28:48 +03:00
redo creation of input paths for lattices. Should redo OOV handling as phrase-table
This commit is contained in:
parent
1465cf94b7
commit
d4b270e796
@ -102,6 +102,14 @@ void Manager::ProcessSentence()
|
||||
}
|
||||
m_transOptColl->CreateTranslationOptions();
|
||||
|
||||
for (size_t i = 0; i < m_transOptColl->GetInputPaths().size(); ++i) {
|
||||
const InputPath &path = *m_transOptColl->GetInputPaths()[i];
|
||||
|
||||
if (path.GetTotalRuleSize()) {
|
||||
cerr << "path=" << path << endl;
|
||||
}
|
||||
}
|
||||
|
||||
// some reporting on how long this took
|
||||
IFVERBOSE(1) {
|
||||
GetSentenceStats().StopTimeCollectOpts();
|
||||
|
@ -450,15 +450,11 @@ public:
|
||||
stack.back().src=newSrc;
|
||||
}
|
||||
|
||||
std::cerr << "newSrc=" << newSrc << std::endl;
|
||||
|
||||
std::vector<StringTgtCand> tcands;
|
||||
// now, look up the target candidates (aprx. TargetPhraseCollection) for
|
||||
// the current path through the CN
|
||||
m_dict->GetTargetCandidates(nextP,tcands);
|
||||
|
||||
std::cerr << "tcands=" << tcands.size() << std::endl;
|
||||
|
||||
if(newRange.second>=exploredPaths.size()+newRange.first)
|
||||
exploredPaths.resize(newRange.second-newRange.first+1,0);
|
||||
++exploredPaths[newRange.second-newRange.first];
|
||||
|
@ -58,63 +58,58 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
|
||||
|
||||
path->SetNextNode(nextNode);
|
||||
m_inputPathQueue.push_back(path);
|
||||
|
||||
// recursive
|
||||
Extend(*path, input);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// iteratively extend all paths
|
||||
for (size_t endPos = 1; endPos < size; ++endPos) {
|
||||
const std::vector<size_t> &nextNodes = input.GetNextNodes(endPos);
|
||||
|
||||
// loop thru every previous paths
|
||||
size_t numPrevPaths = m_inputPathQueue.size();
|
||||
|
||||
for (size_t i = 0; i < numPrevPaths; ++i) {
|
||||
//for (size_t pathInd = 0; pathInd < prevPaths.size(); ++pathInd) {
|
||||
const InputPath &prevPath = *m_inputPathQueue[i];
|
||||
|
||||
size_t nextNode = prevPath.GetNextNode();
|
||||
if (prevPath.GetWordsRange().GetEndPos() + nextNode != endPos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t startPos = prevPath.GetWordsRange().GetStartPos();
|
||||
|
||||
if (endPos - startPos + 1 > maxPhraseLength) {
|
||||
continue;
|
||||
}
|
||||
|
||||
WordsRange range(startPos, endPos);
|
||||
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
|
||||
|
||||
const Phrase &prevPhrase = prevPath.GetPhrase();
|
||||
const ScorePair *prevInputScore = prevPath.GetInputScore();
|
||||
UTIL_THROW_IF2(prevInputScore == NULL,
|
||||
"Null previous score");
|
||||
|
||||
// loop thru every word at this position
|
||||
const ConfusionNet::Column &col = input.GetColumn(endPos);
|
||||
|
||||
for (size_t i = 0; i < col.size(); ++i) {
|
||||
const Word &word = col[i].first;
|
||||
Phrase subphrase(prevPhrase);
|
||||
subphrase.AddWord(word);
|
||||
|
||||
const ScorePair &scores = col[i].second;
|
||||
ScorePair *inputScore = new ScorePair(*prevInputScore);
|
||||
inputScore->PlusEquals(scores);
|
||||
|
||||
InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
|
||||
|
||||
size_t nextNode = nextNodes[i];
|
||||
path->SetNextNode(nextNode);
|
||||
|
||||
m_inputPathQueue.push_back(path);
|
||||
} // for (size_t i = 0; i < col.size(); ++i) {
|
||||
|
||||
} // for (size_t i = 0; i < numPrevPaths; ++i) {
|
||||
}
|
||||
}
|
||||
|
||||
void TranslationOptionCollectionLattice::Extend(const InputPath &prevPath, const WordLattice &input)
|
||||
{
|
||||
size_t nextPos = prevPath.GetWordsRange().GetEndPos() + 1;
|
||||
if (nextPos >= input.GetSize()) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t startPos = prevPath.GetWordsRange().GetStartPos();
|
||||
const Phrase &prevPhrase = prevPath.GetPhrase();
|
||||
const ScorePair *prevInputScore = prevPath.GetInputScore();
|
||||
UTIL_THROW_IF2(prevInputScore == NULL,
|
||||
"Null previous score");
|
||||
|
||||
|
||||
const std::vector<size_t> &nextNodes = input.GetNextNodes(nextPos);
|
||||
|
||||
const ConfusionNet::Column &col = input.GetColumn(nextPos);
|
||||
for (size_t i = 0; i < col.size(); ++i) {
|
||||
const Word &word = col[i].first;
|
||||
UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
|
||||
|
||||
size_t nextNode = nextNodes[i];
|
||||
size_t endPos = nextPos + nextNode - 1;
|
||||
|
||||
WordsRange range(startPos, endPos);
|
||||
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
|
||||
|
||||
Phrase subphrase(prevPhrase);
|
||||
subphrase.AddWord(word);
|
||||
|
||||
const ScorePair &scores = col[i].second;
|
||||
ScorePair *inputScore = new ScorePair(*prevInputScore);
|
||||
inputScore->PlusEquals(scores);
|
||||
|
||||
InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
|
||||
|
||||
path->SetNextNode(nextNode);
|
||||
m_inputPathQueue.push_back(path);
|
||||
|
||||
// recursive
|
||||
Extend(*path, input);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void TranslationOptionCollectionLattice::CreateTranslationOptions()
|
||||
{
|
||||
@ -148,7 +143,7 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
|
||||
}
|
||||
else if (path.GetPhrase().GetSize() == 1) {
|
||||
// unknown word processing
|
||||
ProcessOneUnknownWord(path, path.GetWordsRange().GetEndPos(), 1, path.GetInputScore());
|
||||
ProcessOneUnknownWord(path, path.GetWordsRange().GetStartPos(), path.GetWordsRange().GetNumWordsCovered() , path.GetInputScore());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,7 @@ public:
|
||||
, size_t graphInd); // do not implement
|
||||
|
||||
protected:
|
||||
void Extend(const InputPath &prevPath, const WordLattice &input);
|
||||
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user