mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-24 20:32:59 +03:00
add oovpt
This commit is contained in:
parent
3b3f11365d
commit
b9bef2fc44
@ -2455,6 +2455,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -0,0 +1,102 @@
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2011 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include "ChartRuleLookupManagerOOVPT.h"
|
||||
#include "DotChartInMemory.h"
|
||||
|
||||
#include "moses/Util.h"
|
||||
#include "moses/ChartParser.h"
|
||||
#include "moses/InputType.h"
|
||||
#include "moses/ChartParserCallback.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/NonTerminal.h"
|
||||
#include "moses/ChartCellCollection.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
|
||||
#include "moses/TranslationModel/OOVPT.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
ChartRuleLookupManagerOOVPT::ChartRuleLookupManagerOOVPT(
|
||||
const ChartParser &parser,
|
||||
const ChartCellCollectionBase &cellColl,
|
||||
const OOVPT &oovPt)
|
||||
: ChartRuleLookupManager(parser, cellColl)
|
||||
, m_oovPt(oovPt)
|
||||
{
|
||||
cerr << "starting ChartRuleLookupManagerOOVPT" << endl;
|
||||
}
|
||||
|
||||
ChartRuleLookupManagerOOVPT::~ChartRuleLookupManagerOOVPT()
|
||||
{
|
||||
RemoveAllInColl(m_tpColl);
|
||||
}
|
||||
|
||||
void ChartRuleLookupManagerOOVPT::GetChartRuleCollection(
|
||||
const WordsRange &range,
|
||||
size_t last,
|
||||
ChartParserCallback &outColl)
|
||||
{
|
||||
//m_tpColl.push_back(TargetPhraseCollection());
|
||||
//TargetPhraseCollection &tpColl = m_tpColl.back();
|
||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
||||
m_tpColl.push_back(tpColl);
|
||||
|
||||
if (range.GetNumWordsCovered() == 1) {
|
||||
const ChartCellLabel &sourceWordLabel = GetSourceAt(range.GetStartPos());
|
||||
const Word &sourceWord = sourceWordLabel.GetLabel();
|
||||
CreateTargetPhrases(sourceWord, *tpColl);
|
||||
}
|
||||
|
||||
outColl.Add(*tpColl, m_stackVec, range);
|
||||
}
|
||||
|
||||
void ChartRuleLookupManagerOOVPT::CreateTargetPhrases(const Word &sourceWord, TargetPhraseCollection &tpColl) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
|
||||
UnknownLHSList::const_iterator iterLHS;
|
||||
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
|
||||
const string &targetLHSStr = iterLHS->first;
|
||||
float prob = iterLHS->second;
|
||||
|
||||
// lhs
|
||||
//const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
|
||||
Word *targetLHS = new Word(true);
|
||||
|
||||
targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
|
||||
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
|
||||
|
||||
// add to dictionary
|
||||
TargetPhrase *targetPhrase = m_oovPt.CreateTargetPhrase(sourceWord);
|
||||
|
||||
//targetPhrase->EvaluateInIsolation(*unksrc);
|
||||
targetPhrase->SetTargetLHS(targetLHS);
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.PrintNBestTrees() || staticData.GetTreeStructure() != NULL) {
|
||||
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
|
||||
}
|
||||
|
||||
// chart rule
|
||||
tpColl.Add(targetPhrase);
|
||||
}
|
||||
}
|
||||
} // namespace Moses
|
@ -0,0 +1,57 @@
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2011 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "moses/ChartRuleLookupManager.h"
|
||||
#include "moses/StackVec.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class TargetPhraseCollection;
|
||||
class ChartParserCallback;
|
||||
class DottedRuleColl;
|
||||
class WordsRange;
|
||||
class OOVPT;
|
||||
|
||||
class ChartRuleLookupManagerOOVPT : public ChartRuleLookupManager
|
||||
{
|
||||
public:
|
||||
ChartRuleLookupManagerOOVPT(const ChartParser &parser,
|
||||
const ChartCellCollectionBase &cellColl,
|
||||
const OOVPT &oovPt);
|
||||
|
||||
~ChartRuleLookupManagerOOVPT();
|
||||
|
||||
virtual void GetChartRuleCollection(
|
||||
const WordsRange &range,
|
||||
size_t last,
|
||||
ChartParserCallback &outColl);
|
||||
|
||||
private:
|
||||
void CreateTargetPhrases(const Word &sourceWord, TargetPhraseCollection &tpColl) const;
|
||||
|
||||
StackVec m_stackVec;
|
||||
std::vector<TargetPhraseCollection*> m_tpColl;
|
||||
const OOVPT &m_oovPt;
|
||||
};
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -1,5 +1,7 @@
|
||||
// vim:tabstop=2
|
||||
#include "OOVPT.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOOVPT.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -29,8 +31,11 @@ void OOVPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue)
|
||||
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
|
||||
InputPath &inputPath = **iter;
|
||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||
const Word &sourceWord = sourcePhrase.GetWord(0);
|
||||
|
||||
TargetPhrase *tp = CreateTargetPhrase(sourceWord);
|
||||
tp->EvaluateInIsolation(sourcePhrase);
|
||||
|
||||
TargetPhrase *tp = CreateTargetPhrase(sourcePhrase);
|
||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
||||
tpColl->Add(tp);
|
||||
|
||||
@ -43,34 +48,62 @@ void OOVPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue)
|
||||
}
|
||||
}
|
||||
|
||||
TargetPhrase *OOVPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
|
||||
TargetPhrase *OOVPT::CreateTargetPhrase(const Word &sourceWord) const
|
||||
{
|
||||
// create a target phrase from the 1st word of the source, prefix with 'OOVPT:'
|
||||
assert(sourcePhrase.GetSize());
|
||||
assert(m_output.size() == 1);
|
||||
// unknown word, add as trans opt
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string();
|
||||
size_t isDigit = 0;
|
||||
|
||||
TargetPhrase *tp = new TargetPhrase(this);
|
||||
Word &word = tp->AddWord();
|
||||
word.CreateFromString(Output, m_output, str, false);
|
||||
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
|
||||
const StringPiece s = f->GetString();
|
||||
bool isEpsilon = (s=="" || s==EPSILON);
|
||||
if (staticData.GetDropUnknown()) {
|
||||
isDigit = s.find_first_of("0123456789");
|
||||
if (isDigit == string::npos)
|
||||
isDigit = 0;
|
||||
else
|
||||
isDigit = 1;
|
||||
// modify the starting bitmap
|
||||
}
|
||||
|
||||
// score for this phrase table
|
||||
vector<float> scores(m_numScoreComponents, 1.3);
|
||||
tp->GetScoreBreakdown().PlusEquals(this, scores);
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(this);
|
||||
|
||||
// score of all other ff when this rule is being loaded
|
||||
tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
|
||||
if (!(staticData.GetDropUnknown() || isEpsilon) || isDigit) {
|
||||
// add to dictionary
|
||||
|
||||
return tp;
|
||||
Word &targetWord = targetPhrase->AddWord();
|
||||
targetWord.SetIsOOV(true);
|
||||
|
||||
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
|
||||
FactorType factorType = static_cast<FactorType>(currFactor);
|
||||
|
||||
const Factor *sourceFactor = sourceWord[currFactor];
|
||||
if (sourceFactor == NULL)
|
||||
targetWord[factorType] = factorCollection.AddFactor(UNKNOWN_FACTOR);
|
||||
else
|
||||
targetWord[factorType] = factorCollection.AddFactor(sourceFactor->GetString());
|
||||
}
|
||||
//create a one-to-one alignment between UNKNOWN_FACTOR and its verbatim translation
|
||||
|
||||
targetPhrase->SetAlignmentInfo("0-0");
|
||||
|
||||
} else {
|
||||
// drop source word. create blank target phrase
|
||||
}
|
||||
|
||||
float unknownScore = FloorScore(TransformScore(0));
|
||||
targetPhrase->GetScoreBreakdown().Assign(this, unknownScore);
|
||||
|
||||
return targetPhrase;
|
||||
}
|
||||
|
||||
ChartRuleLookupManager* OOVPT::CreateRuleLookupManager(const ChartParser &parser,
|
||||
const ChartCellCollectionBase &cellCollection,
|
||||
std::size_t /*maxChartSpan*/)
|
||||
{
|
||||
assert(false);
|
||||
return NULL;
|
||||
return new ChartRuleLookupManagerOOVPT(parser, cellCollection, *this);
|
||||
}
|
||||
|
||||
TO_STRING_BODY(OOVPT);
|
||||
|
@ -28,9 +28,9 @@ public:
|
||||
|
||||
TO_STRING();
|
||||
|
||||
TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const;
|
||||
|
||||
protected:
|
||||
TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const;
|
||||
};
|
||||
|
||||
} // namespace Moses
|
||||
|
Loading…
Reference in New Issue
Block a user