2010-07-18 03:23:09 +04:00
|
|
|
// $Id$
|
2010-04-12 14:15:49 +04:00
|
|
|
// vim:tabstop=2
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2010 Hieu Hoang
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
2010-04-08 21:16:10 +04:00
|
|
|
|
2010-07-18 02:29:06 +04:00
|
|
|
#include "PhraseDictionarySCFG.h"
|
2010-04-08 21:16:10 +04:00
|
|
|
#include "FactorCollection.h"
|
|
|
|
#include "InputType.h"
|
|
|
|
#include "ChartRuleCollection.h"
|
|
|
|
#include "CellCollection.h"
|
|
|
|
#include "DotChart.h"
|
|
|
|
#include "StaticData.h"
|
|
|
|
#include "TreeInput.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
using namespace Moses;
|
|
|
|
|
2010-07-18 02:29:06 +04:00
|
|
|
Word PhraseDictionarySCFG::CreateCoveredWord(const Word &origSourceLabel, const InputType &src, const WordsRange &range) const
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
|
|
|
string coveredWordsString = origSourceLabel.GetFactor(0)->GetString();
|
|
|
|
|
|
|
|
for (size_t pos = range.GetStartPos(); pos <= range.GetEndPos(); ++pos)
|
|
|
|
{
|
|
|
|
const Word &word = src.GetWord(pos);
|
|
|
|
coveredWordsString += "_" + word.GetFactor(0)->GetString();
|
|
|
|
}
|
|
|
|
|
|
|
|
FactorCollection &factorCollection = FactorCollection::Instance();
|
|
|
|
|
|
|
|
Word ret;
|
|
|
|
|
|
|
|
const Factor *factor = factorCollection.AddFactor(Input, 0, coveredWordsString);
|
|
|
|
ret.SetFactor(0, factor);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-09-23 19:18:11 +04:00
|
|
|
void PhraseDictionarySCFG::GetChartRuleCollection(ChartRuleCollection &outColl
|
|
|
|
,InputType const& src
|
|
|
|
,WordsRange const& range
|
|
|
|
,bool adhereTableLimit
|
|
|
|
,const CellCollection &cellColl) const
|
2010-09-23 19:48:33 +04:00
|
|
|
{
|
2010-04-08 21:16:10 +04:00
|
|
|
size_t relEndPos = range.GetEndPos() - range.GetStartPos();
|
|
|
|
size_t absEndPos = range.GetEndPos();
|
|
|
|
|
|
|
|
// MAIN LOOP. create list of nodes of target phrases
|
2010-09-10 19:19:25 +04:00
|
|
|
|
|
|
|
ProcessedRuleColl &processedRuleCol = *m_processedRuleColls[range.GetStartPos()];
|
|
|
|
const ProcessedRuleList &runningNodes = processedRuleCol.GetRunningNodes();
|
|
|
|
// Note that runningNodes can be expanded as the loop runs (through calls to processedRuleCol::Add())
|
|
|
|
for (size_t ind = 0; ind < runningNodes.size(); ++ind)
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
2010-09-10 19:19:25 +04:00
|
|
|
const ProcessedRule &prevProcessedRule = *runningNodes[ind];
|
2010-08-17 15:01:03 +04:00
|
|
|
const PhraseDictionaryNodeSCFG &prevNode = prevProcessedRule.GetLastNode();
|
2010-04-08 21:16:10 +04:00
|
|
|
const WordConsumed *prevWordConsumed = prevProcessedRule.GetLastWordConsumed();
|
|
|
|
size_t startPos = (prevWordConsumed == NULL) ? range.GetStartPos() : prevWordConsumed->GetWordsRange().GetEndPos() + 1;
|
|
|
|
|
|
|
|
// search for terminal symbol
|
|
|
|
if (startPos == absEndPos)
|
|
|
|
{
|
|
|
|
const Word &sourceWord = src.GetWord(absEndPos);
|
2010-08-17 17:41:46 +04:00
|
|
|
const PhraseDictionaryNodeSCFG *node = prevNode.GetChild(sourceWord);
|
2010-04-08 21:16:10 +04:00
|
|
|
if (node != NULL)
|
|
|
|
{
|
|
|
|
WordConsumed *newWordConsumed = new WordConsumed(absEndPos, absEndPos
|
|
|
|
, sourceWord
|
|
|
|
, prevWordConsumed);
|
|
|
|
ProcessedRule *processedRule = new ProcessedRule(*node, newWordConsumed);
|
2010-09-10 19:19:25 +04:00
|
|
|
processedRuleCol.Add(relEndPos+1, processedRule);
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// search for non-terminals
|
|
|
|
size_t endPos, stackInd;
|
|
|
|
if (startPos > absEndPos)
|
|
|
|
continue;
|
|
|
|
else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos())
|
|
|
|
{ // start.
|
|
|
|
endPos = absEndPos - 1;
|
|
|
|
stackInd = relEndPos;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
endPos = absEndPos;
|
|
|
|
stackInd = relEndPos + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// get headwords in this span from chart
|
|
|
|
const vector<Word> &headWords = cellColl.GetHeadwords(WordsRange(startPos, endPos));
|
|
|
|
|
|
|
|
// go thru each source span
|
|
|
|
const LabelList &labelList = src.GetLabelList(startPos, endPos);
|
|
|
|
|
|
|
|
LabelList::const_iterator iterLabelList;
|
|
|
|
for (iterLabelList = labelList.begin(); iterLabelList != labelList.end(); ++iterLabelList)
|
|
|
|
{
|
|
|
|
const Word &sourceLabel = *iterLabelList;
|
|
|
|
|
|
|
|
// go thru each headword & see if in phrase table
|
|
|
|
vector<Word>::const_iterator iterHeadWords;
|
|
|
|
for (iterHeadWords = headWords.begin(); iterHeadWords != headWords.end(); ++iterHeadWords)
|
|
|
|
{
|
|
|
|
const Word &headWord = *iterHeadWords;
|
|
|
|
|
2010-08-17 17:41:46 +04:00
|
|
|
const PhraseDictionaryNodeSCFG *node = prevNode.GetChild(sourceLabel, headWord);
|
2010-04-08 21:16:10 +04:00
|
|
|
if (node != NULL)
|
|
|
|
{
|
|
|
|
WordConsumed *newWordConsumed = new WordConsumed(startPos, endPos
|
|
|
|
, headWord
|
|
|
|
, prevWordConsumed);
|
|
|
|
|
|
|
|
ProcessedRule *processedRule = new ProcessedRule(*node, newWordConsumed);
|
2010-09-10 19:19:25 +04:00
|
|
|
processedRuleCol.Add(stackInd, processedRule);
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
} // for (iterHeadWords
|
|
|
|
} // for (iterLabelList
|
|
|
|
}
|
|
|
|
|
|
|
|
// return list of target phrases
|
2010-09-10 19:19:25 +04:00
|
|
|
ProcessedRuleList &nodes = processedRuleCol.Get(relEndPos + 1);
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
size_t rulesLimit = StaticData::Instance().GetRuleLimit();
|
2010-09-10 19:19:25 +04:00
|
|
|
ProcessedRuleList::const_iterator iterNode;
|
2010-04-08 21:16:10 +04:00
|
|
|
for (iterNode = nodes.begin(); iterNode != nodes.end(); ++iterNode)
|
|
|
|
{
|
|
|
|
const ProcessedRule &processedRule = **iterNode;
|
2010-08-17 15:01:03 +04:00
|
|
|
const PhraseDictionaryNodeSCFG &node = processedRule.GetLastNode();
|
2010-04-08 21:16:10 +04:00
|
|
|
const WordConsumed *wordConsumed = processedRule.GetLastWordConsumed();
|
|
|
|
assert(wordConsumed);
|
|
|
|
|
|
|
|
const TargetPhraseCollection *targetPhraseCollection = node.GetTargetPhraseCollection();
|
|
|
|
|
|
|
|
if (targetPhraseCollection != NULL)
|
|
|
|
{
|
2010-09-23 19:18:11 +04:00
|
|
|
outColl.Add(*targetPhraseCollection, *wordConsumed, adhereTableLimit, rulesLimit);
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
}
|
2010-09-23 19:18:11 +04:00
|
|
|
outColl.CreateChartRules(rulesLimit);
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|