add oovpt

This commit is contained in:
Hieu Hoang 2015-01-07 10:26:12 +04:00
parent 3b3f11365d
commit b9bef2fc44
5 changed files with 220 additions and 18 deletions

View File

@ -2455,6 +2455,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h</locationURI>
</link>
<link>
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp</locationURI>
</link>
<link>
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h</locationURI>
</link>
<link>
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp</name>
<type>1</type>

View File

@ -0,0 +1,102 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include "ChartRuleLookupManagerOOVPT.h"
#include "DotChartInMemory.h"
#include "moses/Util.h"
#include "moses/ChartParser.h"
#include "moses/InputType.h"
#include "moses/ChartParserCallback.h"
#include "moses/StaticData.h"
#include "moses/NonTerminal.h"
#include "moses/ChartCellCollection.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
#include "moses/TranslationModel/OOVPT.h"
using namespace std;
namespace Moses
{
ChartRuleLookupManagerOOVPT::ChartRuleLookupManagerOOVPT(
const ChartParser &parser,
const ChartCellCollectionBase &cellColl,
const OOVPT &oovPt)
: ChartRuleLookupManager(parser, cellColl)
, m_oovPt(oovPt)
{
cerr << "starting ChartRuleLookupManagerOOVPT" << endl;
}
ChartRuleLookupManagerOOVPT::~ChartRuleLookupManagerOOVPT()
{
RemoveAllInColl(m_tpColl);
}
void ChartRuleLookupManagerOOVPT::GetChartRuleCollection(
const WordsRange &range,
size_t last,
ChartParserCallback &outColl)
{
//m_tpColl.push_back(TargetPhraseCollection());
//TargetPhraseCollection &tpColl = m_tpColl.back();
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
m_tpColl.push_back(tpColl);
if (range.GetNumWordsCovered() == 1) {
const ChartCellLabel &sourceWordLabel = GetSourceAt(range.GetStartPos());
const Word &sourceWord = sourceWordLabel.GetLabel();
CreateTargetPhrases(sourceWord, *tpColl);
}
outColl.Add(*tpColl, m_stackVec, range);
}
void ChartRuleLookupManagerOOVPT::CreateTargetPhrases(const Word &sourceWord, TargetPhraseCollection &tpColl) const
{
const StaticData &staticData = StaticData::Instance();
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS;
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
const string &targetLHSStr = iterLHS->first;
float prob = iterLHS->second;
// lhs
//const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
// add to dictionary
TargetPhrase *targetPhrase = m_oovPt.CreateTargetPhrase(sourceWord);
//targetPhrase->EvaluateInIsolation(*unksrc);
targetPhrase->SetTargetLHS(targetLHS);
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.PrintNBestTrees() || staticData.GetTreeStructure() != NULL) {
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
}
// chart rule
tpColl.Add(targetPhrase);
}
}
} // namespace Moses

View File

@ -0,0 +1,57 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <vector>
#include "moses/ChartRuleLookupManager.h"
#include "moses/StackVec.h"
namespace Moses
{
class TargetPhraseCollection;
class ChartParserCallback;
class DottedRuleColl;
class WordsRange;
class OOVPT;
class ChartRuleLookupManagerOOVPT : public ChartRuleLookupManager
{
public:
ChartRuleLookupManagerOOVPT(const ChartParser &parser,
const ChartCellCollectionBase &cellColl,
const OOVPT &oovPt);
~ChartRuleLookupManagerOOVPT();
virtual void GetChartRuleCollection(
const WordsRange &range,
size_t last,
ChartParserCallback &outColl);
private:
void CreateTargetPhrases(const Word &sourceWord, TargetPhraseCollection &tpColl) const;
StackVec m_stackVec;
std::vector<TargetPhraseCollection*> m_tpColl;
const OOVPT &m_oovPt;
};
} // namespace Moses

View File

@ -1,5 +1,7 @@
// vim:tabstop=2
#include "OOVPT.h"
#include "moses/StaticData.h"
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h"
using namespace std;
@ -29,8 +31,11 @@ void OOVPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue)
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
InputPath &inputPath = **iter;
const Phrase &sourcePhrase = inputPath.GetPhrase();
const Word &sourceWord = sourcePhrase.GetWord(0);
TargetPhrase *tp = CreateTargetPhrase(sourceWord);
tp->EvaluateInIsolation(sourcePhrase);
TargetPhrase *tp = CreateTargetPhrase(sourcePhrase);
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
tpColl->Add(tp);
@ -43,34 +48,62 @@ void OOVPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue)
}
}
TargetPhrase *OOVPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
TargetPhrase *OOVPT::CreateTargetPhrase(const Word &sourceWord) const
{
// create a target phrase from the 1st word of the source, prefix with 'OOVPT:'
assert(sourcePhrase.GetSize());
assert(m_output.size() == 1);
// unknown word, add as trans opt
const StaticData &staticData = StaticData::Instance();
FactorCollection &factorCollection = FactorCollection::Instance();
string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string();
size_t isDigit = 0;
TargetPhrase *tp = new TargetPhrase(this);
Word &word = tp->AddWord();
word.CreateFromString(Output, m_output, str, false);
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
const StringPiece s = f->GetString();
bool isEpsilon = (s=="" || s==EPSILON);
if (staticData.GetDropUnknown()) {
isDigit = s.find_first_of("0123456789");
if (isDigit == string::npos)
isDigit = 0;
else
isDigit = 1;
// modify the starting bitmap
}
// score for this phrase table
vector<float> scores(m_numScoreComponents, 1.3);
tp->GetScoreBreakdown().PlusEquals(this, scores);
TargetPhrase *targetPhrase = new TargetPhrase(this);
// score of all other ff when this rule is being loaded
tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
if (!(staticData.GetDropUnknown() || isEpsilon) || isDigit) {
// add to dictionary
return tp;
Word &targetWord = targetPhrase->AddWord();
targetWord.SetIsOOV(true);
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
FactorType factorType = static_cast<FactorType>(currFactor);
const Factor *sourceFactor = sourceWord[currFactor];
if (sourceFactor == NULL)
targetWord[factorType] = factorCollection.AddFactor(UNKNOWN_FACTOR);
else
targetWord[factorType] = factorCollection.AddFactor(sourceFactor->GetString());
}
//create a one-to-one alignment between UNKNOWN_FACTOR and its verbatim translation
targetPhrase->SetAlignmentInfo("0-0");
} else {
// drop source word. create blank target phrase
}
float unknownScore = FloorScore(TransformScore(0));
targetPhrase->GetScoreBreakdown().Assign(this, unknownScore);
return targetPhrase;
}
ChartRuleLookupManager* OOVPT::CreateRuleLookupManager(const ChartParser &parser,
const ChartCellCollectionBase &cellCollection,
std::size_t /*maxChartSpan*/)
{
assert(false);
return NULL;
return new ChartRuleLookupManagerOOVPT(parser, cellCollection, *this);
}
TO_STRING_BODY(OOVPT);

View File

@ -28,9 +28,9 @@ public:
TO_STRING();
TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const;
protected:
TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const;
};
} // namespace Moses