mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 07:42:21 +03:00
Merge branch 'master' of ssh://github.com/moses-smt/mosesdecoder
This commit is contained in:
commit
2e53bda77a
@ -1,5 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<?fileVersion 4.0.0?>
|
||||
|
||||
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.162355801">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.162355801" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
@ -85,7 +87,6 @@
|
||||
<listOptionValue builtIn="false" value="pthread"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="bz2"/>
|
||||
<listOptionValue builtIn="false" value="rt"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.128214028" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
|
@ -1,5 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<?fileVersion 4.0.0?>
|
||||
|
||||
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.461114338">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
@ -81,7 +83,6 @@
|
||||
<listOptionValue builtIn="false" value="pthread"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="bz2"/>
|
||||
<listOptionValue builtIn="false" value="rt"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.userobjs.1542590830" name="Other objects" superClass="gnu.cpp.link.option.userobjs"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.983725033" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
|
@ -1076,16 +1076,6 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/BleuScoreFeature.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/ChartBasedFeatureContext.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ChartBasedFeatureContext.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/ChartBasedFeatureContext.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ChartBasedFeatureContext.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/ControlRecombination.cpp</name>
|
||||
<type>1</type>
|
||||
@ -1171,16 +1161,6 @@
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/PhraseBasedFeatureContext.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhraseBasedFeatureContext.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/PhraseBasedFeatureContext.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhraseBasedFeatureContext.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/PhraseBoundaryFeature.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -217,12 +217,11 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<
|
||||
//preface surface form with UNK if marking unknowns
|
||||
const Word &word = phrase.GetWord(pos);
|
||||
if(markUnknown && word.IsOOV()) {
|
||||
out << "UNK" << *factor;
|
||||
out << "UNK" << *factor;
|
||||
} else {
|
||||
out << *factor;
|
||||
}
|
||||
else {
|
||||
out << *factor;
|
||||
}
|
||||
|
||||
|
||||
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
|
||||
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
|
||||
CHECK(factor);
|
||||
|
@ -162,7 +162,7 @@ void ChartHypothesis::Evaluate()
|
||||
StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
||||
for (unsigned i = 0; i < sfs.size(); ++i) {
|
||||
if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) {
|
||||
sfs[i]->EvaluateChart(ChartBasedFeatureContext(this),&m_scoreBreakdown);
|
||||
sfs[i]->EvaluateChart(*this,&m_scoreBreakdown);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -137,7 +137,7 @@ void DecodeStepTranslation::ProcessInitialTranslation(
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeStepTranslation::ProcessInitialTranslationLegacy(
|
||||
void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
|
||||
const InputType &source
|
||||
,PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, size_t startPos, size_t endPos, bool adhereTableLimit
|
||||
@ -147,7 +147,7 @@ void DecodeStepTranslation::ProcessInitialTranslationLegacy(
|
||||
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
||||
|
||||
const WordsRange wordsRange(startPos, endPos);
|
||||
const TargetPhraseCollectionWithSourcePhrase *phraseColl = phraseDictionary->GetTargetPhraseCollectionLegacy(source,wordsRange);
|
||||
const TargetPhraseCollectionWithSourcePhrase *phraseColl = phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange);
|
||||
|
||||
if (phraseColl != NULL) {
|
||||
IFVERBOSE(3) {
|
||||
@ -171,7 +171,7 @@ void DecodeStepTranslation::ProcessInitialTranslationLegacy(
|
||||
const TargetPhrase &targetPhrase = **iterTargetPhrase;
|
||||
const Phrase &sourcePhrase = *iterSourcePhrase;
|
||||
|
||||
const InputPath &inputPath = GetInputPathLegacy(targetPhrase, sourcePhrase, inputPathList);
|
||||
const InputPath &inputPath = GetInputPathLEGACY(targetPhrase, sourcePhrase, inputPathList);
|
||||
|
||||
TranslationOption *transOpt = new TranslationOption(wordsRange, targetPhrase);
|
||||
transOpt->SetInputPath(inputPath);
|
||||
@ -184,7 +184,7 @@ void DecodeStepTranslation::ProcessInitialTranslationLegacy(
|
||||
}
|
||||
}
|
||||
|
||||
const InputPath &DecodeStepTranslation::GetInputPathLegacy(
|
||||
const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
|
||||
const TargetPhrase targetPhrase,
|
||||
const Phrase sourcePhrase,
|
||||
const InputPathList &inputPathList) const
|
||||
@ -207,7 +207,7 @@ const InputPath &DecodeStepTranslation::GetInputPathLegacy(
|
||||
UTIL_THROW(util::Exception, "Input path not found");
|
||||
}
|
||||
|
||||
void DecodeStepTranslation::ProcessLegacy(const TranslationOption &inputPartialTranslOpt
|
||||
void DecodeStepTranslation::ProcessLEGACY(const TranslationOption &inputPartialTranslOpt
|
||||
, const DecodeStep &decodeStep
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, TranslationOptionCollection *toc
|
||||
@ -229,7 +229,7 @@ void DecodeStepTranslation::ProcessLegacy(const TranslationOption &inputPartialT
|
||||
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
||||
|
||||
const TargetPhraseCollectionWithSourcePhrase *phraseColl
|
||||
= phraseDictionary->GetTargetPhraseCollectionLegacy(toc->GetSource(),sourceWordsRange);
|
||||
= phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange);
|
||||
|
||||
|
||||
if (phraseColl != NULL) {
|
||||
|
@ -61,11 +61,11 @@ public:
|
||||
, const TargetPhraseCollection *phraseColl) const;
|
||||
|
||||
// legacy
|
||||
void ProcessInitialTranslationLegacy(const InputType &source
|
||||
void ProcessInitialTranslationLEGACY(const InputType &source
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, size_t startPos, size_t endPos, bool adhereTableLimit
|
||||
, const InputPathList &inputPathList) const;
|
||||
void ProcessLegacy(const TranslationOption &inputPartialTranslOpt
|
||||
void ProcessLEGACY(const TranslationOption &inputPartialTranslOpt
|
||||
, const DecodeStep &decodeStep
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, TranslationOptionCollection *toc
|
||||
@ -75,7 +75,7 @@ private:
|
||||
// I'm not sure whether this actually works or not for binary phrase table.
|
||||
// The source phrase only appears to contain the 1st word, therefore, this function
|
||||
// only compares the 1st word
|
||||
const InputPath &GetInputPathLegacy(const TargetPhrase targetPhrase,
|
||||
const InputPath &GetInputPathLEGACY(const TargetPhrase targetPhrase,
|
||||
const Phrase sourcePhrase,
|
||||
const InputPathList &inputPathList) const;
|
||||
|
||||
|
@ -1,24 +0,0 @@
|
||||
#include "ChartBasedFeatureContext.h"
|
||||
#include "moses/ChartHypothesis.h"
|
||||
#include "moses/ChartManager.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
ChartBasedFeatureContext::ChartBasedFeatureContext
|
||||
(const ChartHypothesis* hypothesis):
|
||||
m_hypothesis(hypothesis),
|
||||
m_targetPhrase(hypothesis->GetCurrTargetPhrase()),
|
||||
m_source(hypothesis->GetManager().GetSource())
|
||||
{}
|
||||
|
||||
ChartBasedFeatureContext::ChartBasedFeatureContext(
|
||||
const TargetPhrase& targetPhrase,
|
||||
const InputType& source):
|
||||
m_hypothesis(NULL),
|
||||
m_targetPhrase(targetPhrase),
|
||||
m_source(source)
|
||||
{}
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,39 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class ChartHypothesis;
|
||||
class InputType;
|
||||
class TargetPhrase;
|
||||
|
||||
/**
|
||||
* Same as PhraseBasedFeatureContext, but for chart-based Moses.
|
||||
**/
|
||||
class ChartBasedFeatureContext
|
||||
{
|
||||
//The context either has a hypothesis (during search) or a
|
||||
//TargetPhrase and source sentence (during pre-calculation)
|
||||
//TODO: should the context also include some info on where the TargetPhrase
|
||||
//is anchored (assuming it's lexicalised), which is available at pre-calc?
|
||||
const ChartHypothesis* m_hypothesis;
|
||||
const TargetPhrase& m_targetPhrase;
|
||||
const InputType& m_source;
|
||||
|
||||
public:
|
||||
ChartBasedFeatureContext(const ChartHypothesis* hypothesis);
|
||||
ChartBasedFeatureContext(const TargetPhrase& targetPhrase,
|
||||
const InputType& source);
|
||||
|
||||
const InputType& GetSource() const {
|
||||
return m_source;
|
||||
}
|
||||
|
||||
const TargetPhrase& GetTargetPhrase() const {
|
||||
return m_targetPhrase;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
|
@ -4,8 +4,6 @@
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include "PhraseBasedFeatureContext.h"
|
||||
#include "ChartBasedFeatureContext.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
namespace Moses
|
||||
|
@ -167,11 +167,11 @@ float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetP
|
||||
}
|
||||
|
||||
void GlobalLexicalModel::Evaluate
|
||||
(const PhraseBasedFeatureContext& context,
|
||||
(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
accumulator->PlusEquals( this,
|
||||
GetFromCacheOrScorePhrase(context.GetTargetPhrase()) );
|
||||
GetFromCacheOrScorePhrase(hypo.GetCurrTargetPhrase()) );
|
||||
}
|
||||
|
||||
bool GlobalLexicalModel::IsUseable(const FactorMask &mask) const
|
||||
|
@ -68,12 +68,12 @@ public:
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const;
|
||||
|
||||
void Evaluate(const PhraseBasedFeatureContext& context,
|
||||
void Evaluate(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
|
||||
void EvaluateChart(
|
||||
const ChartBasedFeatureContext& context,
|
||||
const ChartHypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
throw std::logic_error("GlobalLexicalModel not supported in chart decoder, yet");
|
||||
}
|
||||
|
@ -13,6 +13,9 @@ namespace Moses
|
||||
OpSequenceModel::OpSequenceModel(const std::string &line)
|
||||
:StatefulFeatureFunction("OpSequenceModel", 5, line )
|
||||
{
|
||||
sFactor = 0;
|
||||
tFactor = 0;
|
||||
numFeatures = 5;
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
@ -47,7 +50,7 @@ void OpSequenceModel:: Evaluate(const Phrase &source
|
||||
WordsBitmap myBitmap(source.GetSize());
|
||||
vector <string> mySourcePhrase;
|
||||
vector <string> myTargetPhrase;
|
||||
vector<float> scores(5);
|
||||
vector<float> scores;
|
||||
vector <int> alignments;
|
||||
int startIndex = 0;
|
||||
int endIndex = source.GetSize();
|
||||
@ -55,28 +58,27 @@ void OpSequenceModel:: Evaluate(const Phrase &source
|
||||
const AlignmentInfo &align = targetPhrase.GetAlignTerm();
|
||||
AlignmentInfo::const_iterator iter;
|
||||
|
||||
|
||||
for (iter = align.begin(); iter != align.end(); ++iter) {
|
||||
alignments.push_back(iter->first);
|
||||
alignments.push_back(iter->second);
|
||||
}
|
||||
|
||||
for (int i = 0; i < targetPhrase.GetSize(); i++) {
|
||||
if (targetPhrase.GetWord(i).IsOOV())
|
||||
if (targetPhrase.GetWord(i).IsOOV() && sFactor == 0 && tFactor == 0)
|
||||
myTargetPhrase.push_back("_TRANS_SLF_");
|
||||
else
|
||||
myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
|
||||
myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(tFactor)->GetString().as_string());
|
||||
}
|
||||
|
||||
for (int i = 0; i < source.GetSize(); i++) {
|
||||
mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
|
||||
mySourcePhrase.push_back(source.GetWord(i).GetFactor(sFactor)->GetString().as_string());
|
||||
}
|
||||
|
||||
obj.setPhrases(mySourcePhrase , myTargetPhrase);
|
||||
obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
|
||||
obj.computeOSMFeature(startIndex,myBitmap);
|
||||
obj.calculateOSMProb(*OSM);
|
||||
obj.populateScores(scores);
|
||||
obj.populateScores(scores,numFeatures);
|
||||
estimatedFutureScore.PlusEquals(this, scores);
|
||||
|
||||
}
|
||||
@ -96,7 +98,7 @@ FFState* OpSequenceModel::Evaluate(
|
||||
osmHypothesis obj;
|
||||
vector <string> mySourcePhrase;
|
||||
vector <string> myTargetPhrase;
|
||||
vector<float> scores(5);
|
||||
vector<float> scores;
|
||||
|
||||
|
||||
//target.GetWord(0)
|
||||
@ -140,16 +142,16 @@ FFState* OpSequenceModel::Evaluate(
|
||||
|
||||
for (int i = startIndex; i <= endIndex; i++) {
|
||||
myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
|
||||
mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
|
||||
mySourcePhrase.push_back(source.GetWord(i).GetFactor(sFactor)->GetString().as_string());
|
||||
// cerr<<mySourcePhrase[i]<<endl;
|
||||
}
|
||||
|
||||
for (int i = 0; i < target.GetSize(); i++) {
|
||||
|
||||
if (target.GetWord(i).IsOOV())
|
||||
if (target.GetWord(i).IsOOV() && sFactor == 0 && tFactor == 0)
|
||||
myTargetPhrase.push_back("_TRANS_SLF_");
|
||||
else
|
||||
myTargetPhrase.push_back(target.GetWord(i).GetFactor(0)->GetString().as_string());
|
||||
myTargetPhrase.push_back(target.GetWord(i).GetFactor(tFactor)->GetString().as_string());
|
||||
|
||||
}
|
||||
|
||||
@ -161,7 +163,8 @@ FFState* OpSequenceModel::Evaluate(
|
||||
obj.setPhrases(mySourcePhrase , myTargetPhrase);
|
||||
obj.computeOSMFeature(startIndex,myBitmap);
|
||||
obj.calculateOSMProb(*OSM);
|
||||
obj.populateScores(scores);
|
||||
obj.populateScores(scores,numFeatures);
|
||||
//obj.print();
|
||||
|
||||
/*
|
||||
if (bitmap.GetFirstGapPos() == NOT_FOUND)
|
||||
@ -175,14 +178,7 @@ FFState* OpSequenceModel::Evaluate(
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
vector<float> scores(5);
|
||||
scores[0] = 0.343423f;
|
||||
scores[1] = 1.343423f;
|
||||
scores[2] = 2.343423f;
|
||||
scores[3] = 3.343423f;
|
||||
scores[4] = 4.343423f;
|
||||
*/
|
||||
|
||||
|
||||
accumulator->PlusEquals(this, scores);
|
||||
|
||||
@ -225,10 +221,10 @@ std::vector<float> OpSequenceModel::GetFutureScores(const Phrase &source, const
|
||||
iter = m_futureCost.find(pp);
|
||||
//iter = m_coll.find(pp);
|
||||
if (iter == m_futureCost.end()) {
|
||||
vector<float> scores(5, 0);
|
||||
vector<float> scores(numFeatures, 0);
|
||||
scores[0] = unkOpProb;
|
||||
return scores;
|
||||
} else {
|
||||
} else{
|
||||
const vector<float> &scores = iter->second;
|
||||
return scores;
|
||||
}
|
||||
@ -239,9 +235,16 @@ void OpSequenceModel::SetParameter(const std::string& key, const std::string& va
|
||||
|
||||
if (key == "path") {
|
||||
m_lmPath = value;
|
||||
} else if (key == "order") {
|
||||
}else if (key == "numFeatures"){
|
||||
numFeatures = Scan<int>(value);
|
||||
}else if (key == "order") {
|
||||
lmOrder = Scan<int>(value);
|
||||
} else {
|
||||
}else if (key == "sFactor"){
|
||||
sFactor = Scan<int>(value);
|
||||
}else if (key == "tFactor"){
|
||||
tFactor = Scan<int>(value);
|
||||
}
|
||||
else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
@ -21,6 +21,9 @@ public:
|
||||
|
||||
int lmOrder;
|
||||
float unkOpProb;
|
||||
int sFactor; // Source Factor ...
|
||||
int tFactor; // Target Factor ...
|
||||
int numFeatures; // Number of features used ...
|
||||
|
||||
OpSequenceModel(const std::string &line);
|
||||
|
||||
|
@ -105,7 +105,6 @@ void osmHypothesis :: removeReorderingOperations()
|
||||
deletionCount = 0;
|
||||
openGapCount = 0;
|
||||
gapWidth = 0;
|
||||
//cout<<"I came here"<<endl;
|
||||
|
||||
std::vector <std::string> tupleSequence;
|
||||
|
||||
@ -581,10 +580,14 @@ void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int
|
||||
|
||||
}
|
||||
|
||||
void osmHypothesis :: populateScores(vector <float> & scores)
|
||||
void osmHypothesis :: populateScores(vector <float> & scores , const int numFeatures)
|
||||
{
|
||||
scores.clear();
|
||||
scores.push_back(opProb);
|
||||
scores.push_back(opProb);
|
||||
|
||||
if (numFeatures == 1)
|
||||
return;
|
||||
|
||||
scores.push_back(gapWidth);
|
||||
scores.push_back(gapCount);
|
||||
scores.push_back(openGapCount);
|
||||
|
@ -89,7 +89,7 @@ public:
|
||||
void setState(const FFState* prev_state);
|
||||
osmState * saveState();
|
||||
void print();
|
||||
void populateScores(std::vector <float> & scores);
|
||||
void populateScores(std::vector <float> & scores , const int numFeatures);
|
||||
void setState(const lm::ngram::State & val) {
|
||||
lmState = val;
|
||||
}
|
||||
|
@ -1,33 +0,0 @@
|
||||
#include "PhraseBasedFeatureContext.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/Manager.h"
|
||||
#include "moses/TranslationOption.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
PhraseBasedFeatureContext::PhraseBasedFeatureContext(const Hypothesis* hypothesis) :
|
||||
m_hypothesis(hypothesis),
|
||||
m_translationOption(m_hypothesis->GetTranslationOption()),
|
||||
m_source(m_hypothesis->GetManager().GetSource()) {}
|
||||
|
||||
PhraseBasedFeatureContext::PhraseBasedFeatureContext
|
||||
(const TranslationOption& translationOption, const InputType& source) :
|
||||
m_hypothesis(NULL),
|
||||
m_translationOption(translationOption),
|
||||
m_source(source)
|
||||
{}
|
||||
|
||||
const TargetPhrase& PhraseBasedFeatureContext::GetTargetPhrase() const
|
||||
{
|
||||
return m_translationOption.GetTargetPhrase();
|
||||
}
|
||||
|
||||
const WordsBitmap& PhraseBasedFeatureContext::GetWordsBitmap() const
|
||||
{
|
||||
if (!m_hypothesis) {
|
||||
throw std::logic_error("Coverage vector not available during pre-calculation");
|
||||
}
|
||||
return m_hypothesis->GetWordsBitmap();
|
||||
}
|
||||
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class Hypothesis;
|
||||
class TranslationOption;
|
||||
class InputType;
|
||||
class TargetPhrase;
|
||||
class WordsBitmap;
|
||||
|
||||
/**
|
||||
* Contains all that a feature function can access without affecting recombination.
|
||||
* For stateless features, this is all that it can access. Currently this is not
|
||||
* used for stateful features, as it would need to be retro-fitted to the LM feature.
|
||||
* TODO: Expose source segmentation,lattice path.
|
||||
* XXX Don't add anything to the context that would break recombination XXX
|
||||
**/
|
||||
class PhraseBasedFeatureContext
|
||||
{
|
||||
// The context either has a hypothesis (during search), or a TranslationOption and
|
||||
// source sentence (during pre-calculation).
|
||||
const Hypothesis* m_hypothesis;
|
||||
const TranslationOption& m_translationOption;
|
||||
const InputType& m_source;
|
||||
|
||||
public:
|
||||
PhraseBasedFeatureContext(const Hypothesis* hypothesis);
|
||||
PhraseBasedFeatureContext(const TranslationOption& translationOption,
|
||||
const InputType& source);
|
||||
|
||||
const TranslationOption& GetTranslationOption() const {
|
||||
return m_translationOption;
|
||||
}
|
||||
const InputType& GetSource() const {
|
||||
return m_source;
|
||||
}
|
||||
const TargetPhrase& GetTargetPhrase() const; //convenience method
|
||||
const WordsBitmap& GetWordsBitmap() const;
|
||||
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
|
@ -24,7 +24,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
void EvaluateChart(const ChartBasedFeatureContext& context,
|
||||
void EvaluateChart(const ChartHypothesis& hypo,
|
||||
ScoreComponentCollection*) const {
|
||||
throw std::logic_error("PhraseLengthFeature not valid in chart decoder");
|
||||
}
|
||||
|
@ -106,11 +106,11 @@ void PhrasePairFeature::Load()
|
||||
}
|
||||
|
||||
void PhrasePairFeature::Evaluate(
|
||||
const PhraseBasedFeatureContext& context,
|
||||
const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
const TargetPhrase& target = context.GetTargetPhrase();
|
||||
const Phrase& source = context.GetTranslationOption().GetInputPath().GetPhrase();
|
||||
const TargetPhrase& target = hypo.GetCurrTargetPhrase();
|
||||
const Phrase& source = hypo.GetTranslationOption().GetInputPath().GetPhrase();
|
||||
if (m_simple) {
|
||||
ostringstream namestr;
|
||||
namestr << "pp_";
|
||||
@ -131,7 +131,7 @@ void PhrasePairFeature::Evaluate(
|
||||
accumulator->SparsePlusEquals(namestr.str(),1);
|
||||
}
|
||||
if (m_domainTrigger) {
|
||||
const Sentence& input = static_cast<const Sentence&>(context.GetSource());
|
||||
const Sentence& input = static_cast<const Sentence&>(hypo.GetInput());
|
||||
const bool use_topicid = input.GetUseTopicId();
|
||||
const bool use_topicid_prob = input.GetUseTopicIdAndProb();
|
||||
|
||||
@ -199,7 +199,7 @@ void PhrasePairFeature::Evaluate(
|
||||
}
|
||||
}
|
||||
if (m_sourceContext) {
|
||||
const Sentence& input = static_cast<const Sentence&>(context.GetSource());
|
||||
const Sentence& input = static_cast<const Sentence&>(hypo.GetInput());
|
||||
|
||||
// range over source words to get context
|
||||
for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
|
||||
|
@ -37,10 +37,10 @@ public:
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const;
|
||||
|
||||
void Evaluate(const PhraseBasedFeatureContext& context,
|
||||
void Evaluate(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
void EvaluateChart(const ChartBasedFeatureContext& context,
|
||||
void EvaluateChart(const ChartHypothesis& hypo,
|
||||
ScoreComponentCollection*) const {
|
||||
throw std::logic_error("PhrasePairFeature not valid in chart decoder");
|
||||
}
|
||||
|
@ -23,14 +23,14 @@ public:
|
||||
/**
|
||||
* This should be implemented for features that apply to phrase-based models.
|
||||
**/
|
||||
virtual void Evaluate(const PhraseBasedFeatureContext& context,
|
||||
virtual void Evaluate(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
/**
|
||||
* Same for chart-based features.
|
||||
**/
|
||||
virtual void EvaluateChart(const ChartBasedFeatureContext& context,
|
||||
virtual void EvaluateChart(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
|
@ -137,12 +137,12 @@ void WordTranslationFeature::Load()
|
||||
}
|
||||
|
||||
void WordTranslationFeature::Evaluate
|
||||
(const PhraseBasedFeatureContext& context,
|
||||
(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
const Sentence& input = static_cast<const Sentence&>(context.GetSource());
|
||||
const TranslationOption& transOpt = context.GetTranslationOption();
|
||||
const TargetPhrase& targetPhrase = context.GetTargetPhrase();
|
||||
const Sentence& input = static_cast<const Sentence&>(hypo.GetInput());
|
||||
const TranslationOption& transOpt = hypo.GetTranslationOption();
|
||||
const TargetPhrase& targetPhrase = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo &alignment = targetPhrase.GetAlignTerm();
|
||||
|
||||
// process aligned words
|
||||
@ -243,7 +243,7 @@ void WordTranslationFeature::Evaluate
|
||||
}
|
||||
}
|
||||
if (m_sourceContext) {
|
||||
size_t globalSourceIndex = context.GetTranslationOption().GetStartPos() + sourceIndex;
|
||||
size_t globalSourceIndex = hypo.GetTranslationOption().GetStartPos() + sourceIndex;
|
||||
if (!m_domainTrigger && globalSourceIndex == 0) {
|
||||
// add <s> trigger feature for source
|
||||
stringstream feature;
|
||||
@ -349,7 +349,7 @@ void WordTranslationFeature::Evaluate
|
||||
}
|
||||
|
||||
void WordTranslationFeature::EvaluateChart(
|
||||
const ChartBasedFeatureContext& context,
|
||||
const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
UTIL_THROW(util::Exception, "Need source phrase. Can't be arsed at the moment");
|
||||
|
@ -47,10 +47,10 @@ public:
|
||||
return new DummyState();
|
||||
}
|
||||
|
||||
void Evaluate(const PhraseBasedFeatureContext& context,
|
||||
void Evaluate(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
void EvaluateChart(const ChartBasedFeatureContext& context,
|
||||
void EvaluateChart(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
};
|
||||
|
@ -311,7 +311,8 @@ private:
|
||||
|
||||
};
|
||||
|
||||
inline void swap(FVector &first, FVector &second) {
|
||||
inline void swap(FVector &first, FVector &second)
|
||||
{
|
||||
swap(first.m_features, second.m_features);
|
||||
swap(first.m_coreFeatures, second.m_coreFeatures);
|
||||
}
|
||||
|
@ -263,7 +263,7 @@ void Hypothesis::EvaluateWith(const StatelessFeatureFunction& slff)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
if (! staticData.IsFeatureFunctionIgnored( slff )) {
|
||||
slff.Evaluate(PhraseBasedFeatureContext(this), &m_scoreBreakdown);
|
||||
slff.Evaluate(*this, &m_scoreBreakdown);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -417,7 +417,8 @@ struct SCCPlus {
|
||||
}
|
||||
};
|
||||
|
||||
inline void swap(ScoreComponentCollection &first, ScoreComponentCollection &second) {
|
||||
inline void swap(ScoreComponentCollection &first, ScoreComponentCollection &second)
|
||||
{
|
||||
swap(first.m_scores, second.m_scores);
|
||||
}
|
||||
|
||||
|
@ -34,8 +34,8 @@ class MockStatelessFeatureFunction : public StatelessFeatureFunction
|
||||
public:
|
||||
MockStatelessFeatureFunction(const string& desc, size_t n, const string &line) :
|
||||
StatelessFeatureFunction(desc,n, line) {}
|
||||
virtual void Evaluate(const PhraseBasedFeatureContext&, ScoreComponentCollection*) const {}
|
||||
virtual void EvaluateChart(const ChartBasedFeatureContext&, ScoreComponentCollection*) const {}
|
||||
virtual void Evaluate(const Hypothesis&, ScoreComponentCollection*) const {}
|
||||
virtual void EvaluateChart(const ChartHypothesis&, ScoreComponentCollection*) const {}
|
||||
virtual void Evaluate(const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
|
@ -201,7 +201,8 @@ void TargetPhrase::Merge(const TargetPhrase ©, const std::vector<FactorType>
|
||||
m_fullScore += copy.m_fullScore;
|
||||
}
|
||||
|
||||
void swap(TargetPhrase &first, TargetPhrase &second) {
|
||||
void swap(TargetPhrase &first, TargetPhrase &second)
|
||||
{
|
||||
first.SwapWords(second);
|
||||
std::swap(first.m_fullScore, second.m_fullScore);
|
||||
std::swap(first.m_futureScore, second.m_futureScore);
|
||||
|
@ -107,7 +107,7 @@ struct CompareTargetPhrase {
|
||||
};
|
||||
|
||||
const TargetPhraseCollection*
|
||||
PhraseDictionaryCompact::GetTargetPhraseCollectionNonCache(const Phrase &sourcePhrase) const
|
||||
PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const
|
||||
{
|
||||
|
||||
// There is no souch source phrase if source phrase is longer than longest
|
||||
|
@ -74,7 +74,7 @@ public:
|
||||
|
||||
void Load();
|
||||
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionNonCache(const Phrase &source) const;
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const;
|
||||
TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const;
|
||||
|
||||
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
|
||||
|
@ -39,7 +39,7 @@ PhraseDictionary::PhraseDictionary(const std::string &description, const std::st
|
||||
{
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const Phrase& src) const
|
||||
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
{
|
||||
const TargetPhraseCollection *ret;
|
||||
if (m_maxCacheSize) {
|
||||
@ -53,7 +53,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const
|
||||
|
||||
if (iter == cache.end()) {
|
||||
// not in cache, need to look up from phrase table
|
||||
ret = GetTargetPhraseCollectionNonCache(src);
|
||||
ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
|
||||
if (ret) {
|
||||
ret = new TargetPhraseCollection(*ret);
|
||||
}
|
||||
@ -62,37 +62,35 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const
|
||||
cache[hash] = value;
|
||||
} else {
|
||||
// in cache. just use it
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
||||
value.second = clock();
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
||||
value.second = clock();
|
||||
|
||||
ret = value.first;
|
||||
ret = value.first;
|
||||
}
|
||||
} else {
|
||||
// don't use cache. look up from phrase table
|
||||
ret = GetTargetPhraseCollectionNonCache(src);
|
||||
// don't use cache. look up from phrase table
|
||||
ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionNonCache(const Phrase& src) const
|
||||
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
|
||||
{
|
||||
UTIL_THROW(util::Exception, "Legacy method not implemented");
|
||||
}
|
||||
|
||||
|
||||
const TargetPhraseCollectionWithSourcePhrase* PhraseDictionary::
|
||||
GetTargetPhraseCollectionLegacy(InputType const& src,WordsRange const& range) const
|
||||
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const
|
||||
{
|
||||
UTIL_THROW(util::Exception, "Legacy method not implemented");
|
||||
//Phrase phrase = src.GetSubString(range);
|
||||
//return GetTargetPhraseCollection(phrase);
|
||||
}
|
||||
|
||||
void PhraseDictionary::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "cache-size") {
|
||||
m_maxCacheSize = Scan<size_t>(value);
|
||||
m_maxCacheSize = Scan<size_t>(value);
|
||||
} else if (key == "path") {
|
||||
m_filePath = value;
|
||||
} else if (key == "table-limit") {
|
||||
@ -121,14 +119,14 @@ void PhraseDictionary::GetTargetPhraseCollectionBatch(const InputPathList &phras
|
||||
InputPath &node = **iter;
|
||||
|
||||
const Phrase &phrase = node.GetPhrase();
|
||||
const TargetPhraseCollection *targetPhrases = this->GetTargetPhraseCollection(phrase);
|
||||
const TargetPhraseCollection *targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
|
||||
node.SetTargetPhrases(*this, targetPhrases, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionary::ReduceCache() const
|
||||
{
|
||||
CacheColl &cache = GetCache();
|
||||
CacheColl &cache = GetCache();
|
||||
if (cache.size() <= m_maxCacheSize) return; // not full
|
||||
|
||||
// find cutoff for last used time
|
||||
@ -161,8 +159,8 @@ PhraseDictionary::CacheColl &PhraseDictionary::GetCache() const
|
||||
CacheColl *cache;
|
||||
cache = m_cache.get();
|
||||
if (cache == NULL) {
|
||||
cache = new CacheColl;
|
||||
m_cache.reset(cache);
|
||||
cache = new CacheColl;
|
||||
m_cache.reset(cache);
|
||||
}
|
||||
CHECK(cache);
|
||||
return *cache;
|
||||
|
@ -73,7 +73,7 @@ public:
|
||||
// LEGACY - The preferred method is to override GetTargetPhraseCollectionBatch().
|
||||
// See class PhraseDictionaryMemory or PhraseDictionaryOnDisk for details
|
||||
//! find list of translations that can translates src. Only for phrase input
|
||||
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const;
|
||||
virtual const TargetPhraseCollection *GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
|
||||
virtual void GetTargetPhraseCollectionBatch(const InputPathList &phraseDictionaryQueue) const;
|
||||
|
||||
@ -102,7 +102,7 @@ public:
|
||||
|
||||
// LEGACY
|
||||
//! find list of translations that can translates a portion of src. Used by confusion network decoding
|
||||
virtual const TargetPhraseCollectionWithSourcePhrase* GetTargetPhraseCollectionLegacy(InputType const& src,WordsRange const& range) const;
|
||||
virtual const TargetPhraseCollectionWithSourcePhrase* GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const;
|
||||
|
||||
protected:
|
||||
size_t m_tableLimit;
|
||||
@ -126,7 +126,7 @@ protected:
|
||||
mutable boost::scoped_ptr<CacheColl> m_cache;
|
||||
#endif
|
||||
|
||||
virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCache(const Phrase& src) const;
|
||||
virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const;
|
||||
void ReduceCache() const;
|
||||
|
||||
protected:
|
||||
|
@ -46,7 +46,7 @@ void PhraseDictionaryDynSuffixArray::SetParameter(const std::string& key, const
|
||||
|
||||
const TargetPhraseCollection*
|
||||
PhraseDictionaryDynSuffixArray::
|
||||
GetTargetPhraseCollection(const Phrase& src) const
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
{
|
||||
typedef map<SAPhrase, vector<float> >::value_type pstat_entry;
|
||||
map<SAPhrase, vector<float> > pstats; // phrase (pair) statistics
|
||||
|
@ -21,7 +21,7 @@ public:
|
||||
bool InitDictionary();
|
||||
void Load();
|
||||
// functions below required by base class
|
||||
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const;
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
void insertSnt(string&, string&, string&);
|
||||
void deleteSnt(unsigned, unsigned);
|
||||
ChartRuleLookupManager *CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&);
|
||||
|
@ -59,7 +59,7 @@ TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollectio
|
||||
return currNode.GetTargetPhraseCollection();
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryMemory::GetTargetPhraseCollection(const Phrase& sourceOrig) const
|
||||
const TargetPhraseCollection *PhraseDictionaryMemory::GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
|
||||
{
|
||||
Phrase source(sourceOrig);
|
||||
source.OnlyTheseFactors(m_inputFactors);
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
const ChartCellCollectionBase &);
|
||||
|
||||
// only used by multi-model phrase table, and other meta-features
|
||||
const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const;
|
||||
const TargetPhraseCollection *GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
void GetTargetPhraseCollectionBatch(const InputPathList &phraseDictionaryQueue) const;
|
||||
|
||||
TO_STRING();
|
||||
|
@ -83,7 +83,7 @@ void PhraseDictionaryMultiModel::Load()
|
||||
}
|
||||
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollection(const Phrase& src) const
|
||||
const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
{
|
||||
|
||||
std::vector<std::vector<float> > multimodelweights;
|
||||
@ -117,7 +117,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
||||
for(size_t i = 0; i < m_numModels; ++i) {
|
||||
const PhraseDictionary &pd = *m_pd[i];
|
||||
|
||||
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollection( src);
|
||||
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src);
|
||||
if (ret_raw != NULL) {
|
||||
|
||||
TargetPhraseCollection::iterator iterTargetPhrase, iterLast;
|
||||
|
@ -76,7 +76,7 @@ public:
|
||||
std::vector<float> Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels);
|
||||
#endif
|
||||
// functions below required by base class
|
||||
virtual const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const;
|
||||
virtual const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
virtual void InitializeForInput(InputType const&) {
|
||||
/* Don't do anything source specific here as this object is shared between threads.*/
|
||||
}
|
||||
|
@ -140,7 +140,7 @@ void PhraseDictionaryMultiModelCounts::Load()
|
||||
}
|
||||
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseCollection(const Phrase& src) const
|
||||
const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
{
|
||||
vector<vector<float> > multimodelweights;
|
||||
bool normalize;
|
||||
@ -168,7 +168,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
||||
for(size_t i = 0; i < m_numModels; ++i) {
|
||||
const PhraseDictionary &pd = *m_pd[i];
|
||||
|
||||
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollection( src);
|
||||
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src);
|
||||
if (ret_raw != NULL) {
|
||||
|
||||
TargetPhraseCollection::iterator iterTargetPhrase;
|
||||
@ -269,7 +269,7 @@ float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, siz
|
||||
{
|
||||
|
||||
const PhraseDictionary &pd = *m_inverse_pd[modelIndex];
|
||||
const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollection(target);
|
||||
const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollectionLEGACY(target);
|
||||
|
||||
// in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
|
||||
if (ret_raw && ret_raw->GetSize() > 0) {
|
||||
|
@ -94,7 +94,7 @@ public:
|
||||
void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
|
||||
void FillLexicalCountsMarginal(Word &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
|
||||
void LoadLexicalTable( std::string &fileName, lexicalTable* ltable);
|
||||
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const;
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
#ifdef WITH_DLIB
|
||||
std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
|
||||
#endif
|
||||
|
@ -75,7 +75,7 @@ void PhraseDictionaryTreeAdaptor::CleanUpAfterSentenceProcessing(InputType const
|
||||
}
|
||||
|
||||
TargetPhraseCollection const*
|
||||
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionNonCache(Phrase const &src) const
|
||||
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
|
||||
{
|
||||
const TargetPhraseCollection *ret = GetImplementation().GetTargetPhraseCollection(src);
|
||||
return ret;
|
||||
@ -108,7 +108,7 @@ const PDTAimp& PhraseDictionaryTreeAdaptor::GetImplementation() const
|
||||
|
||||
// legacy
|
||||
const TargetPhraseCollectionWithSourcePhrase*
|
||||
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionLegacy(InputType const& src,WordsRange const &range) const
|
||||
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const
|
||||
{
|
||||
if(GetImplementation().m_rangeCache.empty()) {
|
||||
const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range));
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
|
||||
// get translation candidates for a given source phrase
|
||||
// returns null pointer if nothing found
|
||||
TargetPhraseCollection const* GetTargetPhraseCollectionNonCache(Phrase const &src) const;
|
||||
TargetPhraseCollection const* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
|
||||
|
||||
void InitializeForInput(InputType const& source);
|
||||
void CleanUpAfterSentenceProcessing(InputType const& source);
|
||||
@ -73,7 +73,7 @@ public:
|
||||
}
|
||||
|
||||
// legacy
|
||||
const TargetPhraseCollectionWithSourcePhrase *GetTargetPhraseCollectionLegacy(InputType const& src,WordsRange const & srcRange) const;
|
||||
const TargetPhraseCollectionWithSourcePhrase *GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const & srcRange) const;
|
||||
|
||||
};
|
||||
|
||||
|
@ -104,92 +104,91 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(const InputPathList
|
||||
|
||||
void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath) const
|
||||
{
|
||||
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
||||
const Phrase &phrase = inputPath.GetPhrase();
|
||||
const InputPath *prevInputPath = inputPath.GetPrevNode();
|
||||
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
||||
const Phrase &phrase = inputPath.GetPhrase();
|
||||
const InputPath *prevInputPath = inputPath.GetPrevNode();
|
||||
|
||||
const OnDiskPt::PhraseNode *prevPtNode = NULL;
|
||||
const OnDiskPt::PhraseNode *prevPtNode = NULL;
|
||||
|
||||
if (prevInputPath) {
|
||||
prevPtNode = static_cast<const OnDiskPt::PhraseNode*>(prevInputPath->GetPtNode(*this));
|
||||
if (prevInputPath) {
|
||||
prevPtNode = static_cast<const OnDiskPt::PhraseNode*>(prevInputPath->GetPtNode(*this));
|
||||
} else {
|
||||
// Starting subphrase.
|
||||
assert(phrase.GetSize() == 1);
|
||||
prevPtNode = &wrapper.GetRootSourceNode();
|
||||
}
|
||||
|
||||
if (prevPtNode) {
|
||||
Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
|
||||
lastWord.OnlyTheseFactors(m_inputFactors);
|
||||
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
|
||||
|
||||
if (lastWordOnDisk == NULL) {
|
||||
// OOV according to this phrase table. Not possible to extend
|
||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
||||
} else {
|
||||
// Starting subphrase.
|
||||
assert(phrase.GetSize() == 1);
|
||||
prevPtNode = &wrapper.GetRootSourceNode();
|
||||
}
|
||||
|
||||
if (prevPtNode) {
|
||||
Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
|
||||
lastWord.OnlyTheseFactors(m_inputFactors);
|
||||
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
|
||||
|
||||
if (lastWordOnDisk == NULL) {
|
||||
// OOV according to this phrase table. Not possible to extend
|
||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
||||
const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
|
||||
if (ptNode) {
|
||||
const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode);
|
||||
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
|
||||
} else {
|
||||
const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
|
||||
if (ptNode) {
|
||||
const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode);
|
||||
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
|
||||
} else {
|
||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
||||
}
|
||||
|
||||
delete lastWordOnDisk;
|
||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
||||
}
|
||||
|
||||
delete lastWordOnDisk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
|
||||
{
|
||||
const TargetPhraseCollection *ret;
|
||||
const TargetPhraseCollection *ret;
|
||||
|
||||
if (m_maxCacheSize) {
|
||||
CacheColl &cache = GetCache();
|
||||
size_t hash = (size_t) ptNode->GetFilePos();
|
||||
if (m_maxCacheSize) {
|
||||
CacheColl &cache = GetCache();
|
||||
size_t hash = (size_t) ptNode->GetFilePos();
|
||||
|
||||
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
|
||||
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
|
||||
|
||||
iter = cache.find(hash);
|
||||
iter = cache.find(hash);
|
||||
|
||||
if (iter == cache.end()) {
|
||||
// not in cache, need to look up from phrase table
|
||||
ret = GetTargetPhraseCollectionNonCache(ptNode);
|
||||
if (ret) {
|
||||
ret = new TargetPhraseCollection(*ret);
|
||||
}
|
||||
if (iter == cache.end()) {
|
||||
// not in cache, need to look up from phrase table
|
||||
ret = GetTargetPhraseCollectionNonCache(ptNode);
|
||||
if (ret) {
|
||||
ret = new TargetPhraseCollection(*ret);
|
||||
}
|
||||
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
|
||||
cache[hash] = value;
|
||||
}
|
||||
else {
|
||||
// in cache. just use it
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
||||
value.second = clock();
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
|
||||
cache[hash] = value;
|
||||
} else {
|
||||
// in cache. just use it
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
||||
value.second = clock();
|
||||
|
||||
ret = value.first;
|
||||
}
|
||||
} else {
|
||||
ret = GetTargetPhraseCollectionNonCache(ptNode);
|
||||
}
|
||||
ret = value.first;
|
||||
}
|
||||
} else {
|
||||
ret = GetTargetPhraseCollectionNonCache(ptNode);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
|
||||
{
|
||||
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
||||
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
||||
|
||||
vector<float> weightT = StaticData::Instance().GetWeights(this);
|
||||
OnDiskPt::Vocab &vocab = wrapper.GetVocab();
|
||||
vector<float> weightT = StaticData::Instance().GetWeights(this);
|
||||
OnDiskPt::Vocab &vocab = wrapper.GetVocab();
|
||||
|
||||
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
|
||||
TargetPhraseCollection *targetPhrases
|
||||
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
|
||||
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
|
||||
TargetPhraseCollection *targetPhrases
|
||||
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
|
||||
|
||||
delete targetPhrasesOnDisk;
|
||||
delete targetPhrasesOnDisk;
|
||||
|
||||
return targetPhrases;
|
||||
return targetPhrases;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -57,8 +57,6 @@ public:
|
||||
const ChartCellCollectionBase &);
|
||||
|
||||
private:
|
||||
// const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const;
|
||||
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
||||
|
||||
|
@ -104,7 +104,7 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet
|
||||
|
||||
// check whether we should be using the old code to supportbinary phrase-table.
|
||||
// eventually, we'll stop support the binary phrase-table and delete this legacy code
|
||||
CheckLegacy();
|
||||
CheckLEGACY();
|
||||
}
|
||||
|
||||
InputPathList &TranslationOptionCollectionConfusionNet::GetInputPathList(size_t startPos, size_t endPos)
|
||||
@ -164,7 +164,7 @@ void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRange(
|
||||
, size_t graphInd)
|
||||
{
|
||||
if (m_useLegacy) {
|
||||
CreateTranslationOptionsForRangeLegacy(decodeGraph, startPos, endPos, adhereTableLimit, graphInd);
|
||||
CreateTranslationOptionsForRangeLEGACY(decodeGraph, startPos, endPos, adhereTableLimit, graphInd);
|
||||
} else {
|
||||
CreateTranslationOptionsForRangeNew(decodeGraph, startPos, endPos, adhereTableLimit, graphInd);
|
||||
}
|
||||
@ -191,7 +191,7 @@ void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeNe
|
||||
}
|
||||
}
|
||||
|
||||
void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLegacy(
|
||||
void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLEGACY(
|
||||
const DecodeGraph &decodeGraph
|
||||
, size_t startPos
|
||||
, size_t endPos
|
||||
@ -209,7 +209,7 @@ void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLe
|
||||
list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin();
|
||||
const DecodeStep &decodeStep = **iterStep;
|
||||
|
||||
static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslationLegacy
|
||||
static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslationLEGACY
|
||||
(m_source, *oldPtoc
|
||||
, startPos, endPos, adhereTableLimit, inputPathList );
|
||||
|
||||
@ -231,7 +231,7 @@ void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLe
|
||||
TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;
|
||||
|
||||
if (transStep) {
|
||||
transStep->ProcessLegacy(inputPartialTranslOpt
|
||||
transStep->ProcessLEGACY(inputPartialTranslOpt
|
||||
, *decodeStep
|
||||
, *newPtoc
|
||||
, this
|
||||
@ -275,7 +275,7 @@ void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLe
|
||||
}
|
||||
}
|
||||
|
||||
void TranslationOptionCollectionConfusionNet::CheckLegacy()
|
||||
void TranslationOptionCollectionConfusionNet::CheckLEGACY()
|
||||
{
|
||||
const std::vector<PhraseDictionary*> &pts = StaticData::Instance().GetPhraseDictionaries();
|
||||
for (size_t i = 0; i < pts.size(); ++i) {
|
||||
|
@ -30,8 +30,8 @@ protected:
|
||||
, bool adhereTableLimit
|
||||
, size_t graphInd);
|
||||
|
||||
void CheckLegacy();
|
||||
void CreateTranslationOptionsForRangeLegacy(const DecodeGraph &decodeStepList
|
||||
void CheckLEGACY();
|
||||
void CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeStepList
|
||||
, size_t startPosition
|
||||
, size_t endPosition
|
||||
, bool adhereTableLimit
|
||||
|
Loading…
Reference in New Issue
Block a user