Merge branch 'master' of https://github.com/moses-smt/mosesdecoder into moses-server-placeholder-patch

Conflict in TranslationRequest solved.

Conflicts:
	moses/server/TranslationRequest.cpp
	moses/server/TranslationRequest.h
This commit is contained in:
Evgeny Matusov 2015-11-19 09:59:15 -07:00
commit 42c57d077b
42 changed files with 318 additions and 374 deletions

View File

@ -51,27 +51,8 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,

View File

@ -123,22 +123,6 @@ public:
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
int featureID,
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
bool Enabled() const {
return m_enabled;

View File

@ -42,24 +42,6 @@ public:
return true;
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -58,23 +58,6 @@ public:
return true;
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -30,11 +30,6 @@ bool CoveredReferenceState::operator==(const FFState& other) const
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
{}
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath

View File

@ -53,10 +53,6 @@ public:
return new CoveredReferenceState();
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
@ -64,10 +60,6 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -47,23 +47,6 @@ public:
throw std::logic_error("DistortionScoreProducer not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
};
}

View File

@ -64,28 +64,6 @@ public:
UTIL_THROW2("LexicalReordering is not valid for chart decoder");
}
void
EvaluateWithSourceContext
(const InputType &input,
const InputPath &inputPath,
const TargetPhrase &targetPhrase,
const StackVec *stackVec,
ScoreComponentCollection& scoreBreakdown,
ScoreComponentCollection* estimatedScores = NULL) const
{ }
void
EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &transOptList) const
{ }
void
EvaluateInIsolation(const Phrase &source,
const TargetPhrase &targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const
{ }
bool
GetHaveDefaultScores() {
return m_haveDefaultScores;

View File

@ -37,18 +37,6 @@ public:
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown

View File

@ -54,23 +54,6 @@ public:
throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void SetParameter(const std::string& key, const std::string& value);
private:

View File

@ -296,18 +296,6 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const
{};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -15,12 +15,19 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
ReadParameters();
}
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
{}
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
@ -29,8 +36,11 @@ void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
, ScoreComponentCollection *estimatedScores) const
{}
void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &translationOptionList) const
{}
FFState* SkeletonStatefulFF::EvaluateWhenApplied(

View File

@ -37,19 +37,35 @@ public:
return new SkeletonState(0);
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const;
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove this declaration here
// and the empty skeleton implementation from the corresponding .cpp
// file to reduce code clutter.
void
EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove this declaration here
// and the empty skeleton implementation from the corresponding .cpp
// file to reduce code clutter.
void
EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const;
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove this declaration here
// and the empty skeleton implementation from the corresponding .cpp
// file to reduce code clutter.
void
EvaluateTranslationOptionListWithSourceContext
( const InputType &input , const TranslationOptionList &translationOptionList) const;
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,

View File

@ -66,6 +66,23 @@ public:
return false;
}
virtual void
EvaluateInIsolation
(Phrase const& source, TargetPhrase const& targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const {}
virtual void
EvaluateWithSourceContext
(InputType const&input, InputPath const& inputPath, TargetPhrase const& targetPhrase,
StackVec const* stackVec, ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection *estimatedFutureScore = NULL) const {}
virtual void
EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &translationOptionList) const {}
};

View File

@ -48,22 +48,6 @@ public:
ScoreComponentCollection* ) const {
throw std::logic_error("TargetBigramFeature not valid in chart decoder");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void SetParameter(const std::string& key, const std::string& value);

View File

@ -215,24 +215,6 @@ public:
virtual FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId,
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void SetParameter(const std::string& key, const std::string& value);
private:

View File

@ -63,22 +63,6 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {};
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -69,10 +69,11 @@ void LanguageModel::ReportHistoryOrder(std::ostream &out,const Phrase &phrase) c
// out << "ReportHistoryOrder not implemented";
}
void LanguageModel::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
void
LanguageModel::
EvaluateInIsolation(Phrase const& source, TargetPhrase const& targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const
{
// contains factors used by this LM
float fullScore, nGramScore;

View File

@ -94,18 +94,6 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
};
}

View File

@ -188,22 +188,6 @@ size_t BilingualLM::getState(const Hypothesis& cur_hypo) const
return hashCode;
}
void BilingualLM::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {}
void BilingualLM::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores) const
{
}
FFState* BilingualLM::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -119,23 +119,6 @@ public:
void Load();
void EvaluateInIsolation(
const Phrase &source,
const TargetPhrase &targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(
const InputType &input,
const InputPath &inputPath,
const TargetPhrase &targetPhrase,
const StackVec *stackVec,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection *estimatedScores = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {};
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -393,9 +393,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
// Non-terminal is first so we can copy instead of rescoring.
const Syntax::SVertex *pred = hyperedge.tail[nonTermIndexMap[phrasePos]];
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(pred->states[featureID])->GetChartState();
float prob = UntransformLMScore(
pred->best->label.scoreBreakdown.GetScoresForProducer(this)[0]);
ruleScore.BeginNonTerminal(prevState, prob);
ruleScore.BeginNonTerminal(prevState);
phrasePos++;
}
}
@ -405,9 +403,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
if (word.IsNonTerminal()) {
const Syntax::SVertex *pred = hyperedge.tail[nonTermIndexMap[phrasePos]];
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(pred->states[featureID])->GetChartState();
float prob = UntransformLMScore(
pred->best->label.scoreBreakdown.GetScoresForProducer(this)[0]);
ruleScore.NonTerminal(prevState, prob);
ruleScore.NonTerminal(prevState);
} else {
ruleScore.Terminal(TranslateID(word));
}
@ -415,7 +411,16 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(con
float score = ruleScore.Finish();
score = TransformLMScore(score);
accumulator->Assign(this, score);
score -= target.GetScoreBreakdown().GetScoresForProducer(this)[0];
if (OOVFeatureEnabled()) {
std::vector<float> scores(2);
scores[0] = score;
scores[1] = 0.0;
accumulator->PlusEquals(this, scores);
} else {
accumulator->PlusEquals(this, score);
}
return newState;
}

View File

@ -196,18 +196,7 @@ public:
}
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {};
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -101,10 +101,6 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
for (std::size_t i = 0; i < coordinates.size()-1; ++i) {
boost::shared_ptr<SVertex> pred = (*m_bundle.stacks[i])[coordinates[i]];
hyperedge->tail[i] = pred.get();
if (pred->best) {
hyperedge->label.scoreBreakdown.PlusEquals(
pred->best->label.scoreBreakdown);
}
}
hyperedge->label.inputWeight = m_bundle.inputWeight;
@ -112,8 +108,7 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
hyperedge->label.translation =
*(m_bundle.translations->begin()+coordinates.back());
hyperedge->label.scoreBreakdown.PlusEquals(
hyperedge->label.translation->GetScoreBreakdown());
// Calculate feature deltas.
const StaticData &staticData = StaticData::Instance();
@ -123,7 +118,7 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
if (!staticData.IsFeatureFunctionIgnored(*sfs[i])) {
sfs[i]->EvaluateWhenApplied(*hyperedge, &hyperedge->label.scoreBreakdown);
sfs[i]->EvaluateWhenApplied(*hyperedge, &hyperedge->label.deltas);
}
}
@ -132,12 +127,24 @@ SHyperedge *Cube::CreateHyperedge(const std::vector<int> &coordinates)
for (unsigned i = 0; i < ffs.size(); ++i) {
if (!staticData.IsFeatureFunctionIgnored(*ffs[i])) {
head->states[i] =
ffs[i]->EvaluateWhenApplied(*hyperedge, i,
&hyperedge->label.scoreBreakdown);
ffs[i]->EvaluateWhenApplied(*hyperedge, i, &hyperedge->label.deltas);
}
}
hyperedge->label.score = hyperedge->label.scoreBreakdown.GetWeightedScore();
// Calculate future score.
hyperedge->label.futureScore =
hyperedge->label.translation->GetScoreBreakdown().GetWeightedScore();
hyperedge->label.futureScore += hyperedge->label.deltas.GetWeightedScore();
for (std::vector<SVertex*>::const_iterator p = hyperedge->tail.begin();
p != hyperedge->tail.end(); ++p) {
const SVertex *pred = *p;
if (pred->best) {
hyperedge->label.futureScore += pred->best->label.futureScore;
}
}
return hyperedge;
}

View File

@ -42,7 +42,7 @@ private:
{
public:
bool operator()(const QueueItem &p, const QueueItem &q) const {
return p.first->label.score < q.first->label.score;
return p.first->label.futureScore < q.first->label.futureScore;
}
};

View File

@ -31,7 +31,7 @@ private:
{
public:
bool operator()(const Cube *p, const Cube *q) const {
return p->Top()->label.score < q->Top()->label.score;
return p->Top()->label.futureScore < q->Top()->label.futureScore;
}
};

View File

@ -304,7 +304,7 @@ void Manager<RuleMatcher>::RecombineAndSort(
// Compare the score of h against the score of the best incoming hyperedge
// for the stored vertex.
SVertex *storedVertex = result.first->second;
if (h->label.score > storedVertex->best->label.score) {
if (h->label.futureScore > storedVertex->best->label.futureScore) {
// h's score is better.
storedVertex->recombined.push_back(storedVertex->best);
storedVertex->best = h;

View File

@ -32,24 +32,25 @@ void KBestExtractor::Extract(
supremeVertex->best = new SHyperedge();
supremeVertex->best->head = supremeVertex.get();
supremeVertex->best->tail.push_back(&bestTopLevelVertex);
supremeVertex->best->label.score = bestTopLevelVertex.best->label.score;
supremeVertex->best->label.scoreBreakdown =
bestTopLevelVertex.best->label.scoreBreakdown;
supremeVertex->best->label.futureScore =
bestTopLevelVertex.best->label.futureScore;
supremeVertex->best->label.deltas = bestTopLevelVertex.best->label.deltas;
supremeVertex->best->label.translation = 0;
// For each alternative top-level SVertex, add a new incoming hyperedge to
// supremeVertex.
for (++p; p != topLevelVertices.end(); ++p) {
// Check that the first item in topLevelVertices really was the best.
UTIL_THROW_IF2((*p)->best->label.score > bestTopLevelVertex.best->label.score,
UTIL_THROW_IF2((*p)->best->label.futureScore >
bestTopLevelVertex.best->label.futureScore,
"top-level SVertices are not correctly sorted");
// Note: there's no need for a smart pointer here: supremeVertex will take
// ownership of altEdge.
SHyperedge *altEdge = new SHyperedge();
altEdge->head = supremeVertex.get();
altEdge->tail.push_back((*p).get());
altEdge->label.score = (*p)->best->label.score;
altEdge->label.scoreBreakdown = (*p)->best->label.scoreBreakdown;
altEdge->label.futureScore = (*p)->best->label.futureScore;
altEdge->label.deltas = (*p)->best->label.deltas;
altEdge->label.translation = 0;
supremeVertex->recombined.push_back(altEdge);
}
@ -282,7 +283,13 @@ void KBestExtractor::LazyNext(KVertex &v, const Derivation &d,
KBestExtractor::Derivation::Derivation(const boost::shared_ptr<KHyperedge> &e)
{
edge = e;
std::size_t arity = edge->tail.size();
const TargetPhrase *translation = edge->shyperedge.label.translation;
// Every hyperedge should have an associated target phrase, except for
// incoming hyperedges of the 'supreme' vertex.
if (translation) {
scoreBreakdown = translation->GetScoreBreakdown();
}
const std::size_t arity = edge->tail.size();
backPointers.resize(arity, 0);
subderivations.reserve(arity);
for (std::size_t i = 0; i < arity; ++i) {
@ -290,9 +297,10 @@ KBestExtractor::Derivation::Derivation(const boost::shared_ptr<KHyperedge> &e)
assert(pred.kBestList.size() >= 1);
boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
subderivations.push_back(sub);
scoreBreakdown.PlusEquals(sub->scoreBreakdown);
}
score = edge->shyperedge.label.score;
scoreBreakdown = edge->shyperedge.label.scoreBreakdown;
scoreBreakdown.PlusEquals(edge->shyperedge.label.deltas);
score = scoreBreakdown.GetWeightedScore();
}
// Construct a Derivation that neighbours an existing Derivation.

View File

@ -32,7 +32,7 @@ void Manager::OutputBest(OutputCollector *collector) const
out << '\n';
} else {
if (options().output.ReportHypoScore) {
out << best->label.score << " ";
out << best->label.futureScore << " ";
}
Phrase yield = GetOneBestTargetYield(*best);
// delete 1st & last

View File

@ -368,7 +368,7 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
// Compare the score of h against the score of the best incoming hyperedge
// for the stored vertex.
SVertex *storedVertex = result.first->second;
if (h->label.score > storedVertex->best->label.score) {
if (h->label.futureScore > storedVertex->best->label.futureScore) {
// h's score is better.
storedVertex->recombined.push_back(storedVertex->best);
storedVertex->best = h;

View File

@ -16,7 +16,7 @@ public:
bundle.stacks.begin(); p != bundle.stacks.end(); ++p) {
const SVertexStack *stack = *p;
if (stack->front()->best) {
score += stack->front()->best->label.score;
score += stack->front()->best->label.futureScore;
}
}
return score;

View File

@ -8,11 +8,24 @@ namespace Moses
namespace Syntax
{
// A SHyperedge label.
//
struct SLabel {
float inputWeight;
float score;
ScoreComponentCollection scoreBreakdown;
// Deltas for individual feature scores. i.e. this object records the change
// in each feature score that results from applying the rule associated with
// this hyperedge.
ScoreComponentCollection deltas;
// Total derivation score to be used for comparison in beam search (i.e.
// including future cost estimates). This is the sum of the 1-best
// subderivations' future scores + deltas.
float futureScore;
// Target-side of the grammar rule.
const TargetPhrase *translation;
// Input weight of this hyperedge (e.g. from weighted input forest).
float inputWeight;
};
} // Syntax

View File

@ -18,7 +18,7 @@ struct SVertexStackContentOrderer {
public:
bool operator()(const boost::shared_ptr<SVertex> &x,
const boost::shared_ptr<SVertex> &y) {
return x->best->label.score > y->best->label.score;
return x->best->label.futureScore > y->best->label.futureScore;
}
};

View File

@ -264,7 +264,7 @@ void Manager<RuleMatcher>::RecombineAndSort(
// Compare the score of h against the score of the best incoming hyperedge
// for the stored vertex.
SVertex *storedVertex = result.first->second;
if (h->label.score > storedVertex->best->label.score) {
if (h->label.futureScore > storedVertex->best->label.futureScore) {
// h's score is better.
storedVertex->recombined.push_back(storedVertex->best);
storedVertex->best = h;

View File

@ -33,11 +33,15 @@ namespace Moses
PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line)
: PhraseDictionary(line, true),
m_numModels(0),
m_totalModelScores(0),
m_phraseCounts(false),
m_wordCounts(false),
m_modelBitmapCounts(false),
m_restrict(false),
m_haveDefaultScores(false),
m_defaultAverageOthers(false),
m_scoresToAverage(0),
m_scoresPerModel(0)
m_scoresPerModel(0),
m_haveMmsaptLrFunc(false)
{
ReadParameters();
}
@ -47,14 +51,23 @@ void PhraseDictionaryGroup::SetParameter(const string& key, const string& value)
if (key == "members") {
m_memberPDStrs = Tokenize(value, ",");
m_numModels = m_memberPDStrs.size();
m_seenByAll = dynamic_bitset<>(m_numModels);
m_seenByAll.set();
} else if (key == "restrict") {
m_restrict = Scan<bool>(value);
} else if (key == "phrase-counts") {
m_phraseCounts = Scan<bool>(value);
} else if (key == "word-counts") {
m_wordCounts = Scan<bool>(value);
} else if (key == "model-bitmap-counts") {
m_modelBitmapCounts = Scan<bool>(value);
} else if (key =="default-scores") {
m_haveDefaultScores = true;
m_defaultScores = Scan<float>(Tokenize(value, ","));
} else if (key =="default-average-others") {
m_defaultAverageOthers = true;
m_scoresToAverage = Scan<size_t>(value);
m_defaultAverageOthers = Scan<bool>(value);
} else if (key =="mmsapt-lr-func") {
m_haveMmsaptLrFunc = true;
} else {
PhraseDictionary::SetParameter(key, value);
}
@ -64,9 +77,9 @@ void PhraseDictionaryGroup::Load()
{
SetFeaturesToApply();
m_pdFeature.push_back(const_cast<PhraseDictionaryGroup*>(this));
size_t numScoreComponents = 0;
// Locate/check component phrase tables
size_t componentWeights = 0;
BOOST_FOREACH(const string& pdName, m_memberPDStrs) {
bool pdFound = false;
BOOST_FOREACH(PhraseDictionary* pd, PhraseDictionary::GetColl()) {
@ -74,25 +87,48 @@ void PhraseDictionaryGroup::Load()
pdFound = true;
m_memberPDs.push_back(pd);
size_t nScores = pd->GetNumScoreComponents();
componentWeights += nScores;
numScoreComponents += nScores;
if (m_scoresPerModel == 0) {
m_scoresPerModel = nScores;
} else if (m_defaultAverageOthers) {
UTIL_THROW_IF2(nScores != m_scoresPerModel,
"Member models must have the same number of scores when using default-average-others");
m_description << ": member models must have the same number of scores when using default-average-others");
}
}
}
UTIL_THROW_IF2(!pdFound,
"Could not find member phrase table " << pdName);
m_description << ": could not find member phrase table " << pdName);
}
UTIL_THROW_IF2(componentWeights != m_numScoreComponents,
"Total number of member model scores is unequal to specified number of scores");
m_totalModelScores = numScoreComponents;
// Check feature total
if (m_phraseCounts) {
numScoreComponents += m_numModels;
}
if (m_wordCounts) {
numScoreComponents += m_numModels;
}
if (m_modelBitmapCounts) {
numScoreComponents += (pow(2, m_numModels) - 1);
}
UTIL_THROW_IF2(numScoreComponents != m_numScoreComponents,
m_description << ": feature count mismatch: specify \"num-features=" << numScoreComponents << "\" and supply " << numScoreComponents << " weights");
#ifdef PT_UG
// Locate mmsapt lexical reordering functions if specified
if (m_haveMmsaptLrFunc) {
BOOST_FOREACH(PhraseDictionary* pd, m_memberPDs) {
// pointer to pointer, all start as NULL and some may be populated prior
// to translation
m_mmsaptLrFuncs.push_back(&(static_cast<Mmsapt*>(pd)->m_lr_func));
}
}
#endif
// Determine "zero" scores for features
if (m_haveDefaultScores) {
UTIL_THROW_IF2(m_defaultScores.size() != m_numScoreComponents,
"Number of specified default scores is unequal to number of member model scores");
m_description << ": number of specified default scores is unequal to number of member model scores");
} else {
// Default is all 0 (as opposed to e.g. -99 or similar to approximate log(0)
// or a smoothed "not in model" score)
@ -207,32 +243,57 @@ CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
offset += pd.GetNumScoreComponents();
}
// Finalize scores and add phrases to return collection
// Compute additional scores as phrases are added to return collection
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
BOOST_FOREACH(TargetPhrase* phrase, phraseList) {
PDGroupPhrase& pdgPhrase = phraseMap.find(phrase)->second;
// Score order (example with 2 models)
// member1_scores member2_scores [m1_pc m2_pc] [m1_wc m2_wc]
// Extra scores added after member model scores
size_t offset = m_totalModelScores;
// Phrase count (per member model)
if (m_phraseCounts) {
for (size_t i = 0; i < m_numModels; ++i) {
if (pdgPhrase.m_seenBy[i]) {
pdgPhrase.m_scores[offset + i] = 1;
}
}
offset += m_numModels;
}
// Word count (per member model)
if (m_wordCounts) {
size_t wc = pdgPhrase.m_targetPhrase->GetSize();
for (size_t i = 0; i < m_numModels; ++i) {
if (pdgPhrase.m_seenBy[i]) {
pdgPhrase.m_scores[offset + i] = wc;
}
}
offset += m_numModels;
}
// Model bitmap features (one feature per possible bitmap)
// e.g. seen by models 1 and 3 but not 2 -> "101" fires
if (m_modelBitmapCounts) {
// Throws exception if someone tries to combine more than 64 models
pdgPhrase.m_scores[offset + (pdgPhrase.m_seenBy.to_ulong() - 1)] = 1;
offset += m_seenByAll.to_ulong();
}
// Average other-model scores to fill in defaults when models have not seen
// this phrase
if (m_defaultAverageOthers) {
bool seenByAll = true;
for (size_t i = 0; i < m_numModels; ++i) {
if (!pdgPhrase.m_seenBy[i]) {
seenByAll = false;
break;
}
}
// Average seen scores, limited to specified number (e.g. model can have
// 10 scores but you only want to average 8, leaving the last 2 as 0s)
if (!seenByAll) {
vector<float> avgScores(m_scoresToAverage, 0);
// Average seen scores
if (pdgPhrase.m_seenBy != m_seenByAll) {
vector<float> avgScores(m_scoresPerModel, 0);
size_t seenBy = 0;
size_t offset = 0;
offset = 0;
// sum
for (size_t i = 0; i < m_numModels; ++i) {
if (pdgPhrase.m_seenBy[i]) {
for (size_t j = 0; j < m_scoresToAverage; ++j) {
for (size_t j = 0; j < m_scoresPerModel; ++j) {
avgScores[j] += pdgPhrase.m_scores[offset + j];
}
seenBy += 1;
@ -240,19 +301,57 @@ CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
offset += m_scoresPerModel;
}
// divide
for (size_t j = 0; j < m_scoresToAverage; ++j) {
for (size_t j = 0; j < m_scoresPerModel; ++j) {
avgScores[j] /= seenBy;
}
// copy
offset = 0;
for (size_t i = 0; i < m_numModels; ++i) {
if (!pdgPhrase.m_seenBy[i]) {
for (size_t j = 0; j < m_scoresToAverage; ++j) {
for (size_t j = 0; j < m_scoresPerModel; ++j) {
pdgPhrase.m_scores[offset + j] = avgScores[j];
}
}
offset += m_scoresPerModel;
}
#ifdef PT_UG
// Also average LexicalReordering scores if specified
// We don't necessarily have a lr-func for each model
if (m_haveMmsaptLrFunc) {
SPTR<Scores> avgLRScores;
size_t seenBy = 0;
// For each model
for (size_t i = 0; i < m_numModels; ++i) {
const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
// Add if phrase seen and model has lr-func
if (pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
const Scores* scores = pdgPhrase.m_targetPhrase->GetExtraScores(lrFunc);
if (!avgLRScores) {
avgLRScores.reset(new Scores(*scores));
} else {
for (size_t j = 0; j < scores->size(); ++j) {
(*avgLRScores)[j] += (*scores)[j];
}
}
seenBy += 1;
}
}
// Make sure we have at least one lr-func
if (avgLRScores) {
// divide
for (size_t j = 0; j < avgLRScores->size(); ++j) {
(*avgLRScores)[j] /= seenBy;
}
// set
for (size_t i = 0; i < m_numModels; ++i) {
const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
if (!pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
pdgPhrase.m_targetPhrase->SetExtraScores(lrFunc, avgLRScores);
}
}
}
}
#endif
}
}

View File

@ -20,28 +20,36 @@
#ifndef moses_PhraseDictionaryGroup_h
#define moses_PhraseDictionaryGroup_h
#include "moses/TranslationModel/PhraseDictionary.h"
#include <boost/dynamic_bitset.hpp>
#include <boost/unordered_map.hpp>
#include <boost/thread/shared_mutex.hpp>
#include "moses/StaticData.h"
#include "moses/TargetPhrase.h"
#include "moses/Util.h"
#include "moses/FF/LexicalReordering/LexicalReordering.h"
#include "moses/TranslationModel/PhraseDictionary.h"
#ifdef PT_UG
#include "moses/TranslationModel/UG/mmsapt.h"
#endif
namespace Moses
{
struct PDGroupPhrase {
TargetPhrase* m_targetPhrase;
std::vector<float> m_scores;
std::vector<bool> m_seenBy;
boost::dynamic_bitset<> m_seenBy;
PDGroupPhrase() : m_targetPhrase(NULL) { }
PDGroupPhrase(TargetPhrase* targetPhrase, const std::vector<float>& scores, const size_t nModels)
: m_targetPhrase(targetPhrase),
m_scores(scores),
m_seenBy(nModels, false) { }
m_seenBy(nModels) { }
};
/** Combines multiple phrase tables into a single interface. Each member phrase
@ -81,6 +89,14 @@ protected:
std::vector<PhraseDictionary*> m_memberPDs;
std::vector<FeatureFunction*> m_pdFeature;
size_t m_numModels;
size_t m_totalModelScores;
boost::dynamic_bitset<> m_seenByAll;
// phrase-counts option
bool m_phraseCounts;
// word-counts option
bool m_wordCounts;
// model-bitmap-counts option
bool m_modelBitmapCounts;
// restrict option
bool m_restrict;
// default-scores option
@ -88,8 +104,11 @@ protected:
std::vector<float> m_defaultScores;
// default-average-others option
bool m_defaultAverageOthers;
size_t m_scoresToAverage;
size_t m_scoresPerModel;
// mmsapt-lr-func options
bool m_haveMmsaptLrFunc;
// pointers to pointers since member mmsapts may not load these until later
std::vector<LexicalReordering**> m_mmsaptLrFuncs;
typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
#ifdef WITH_THREADS

View File

@ -52,7 +52,7 @@ BitextSampler : public Moses::reference_counter
// const members
// SPTR<bitext const> const m_bitext; // keep bitext alive while I am
// should be an
iptr<bitext const> const m_bitext; // keep bitext alive as long as I am
SPTR<bitext const> const m_bitext; // keep bitext alive as long as I am
size_t const m_plen; // length of lookup phrase
bool const m_fwd; // forward or backward direction?
SPTR<tsa const> const m_root; // root of suffix array

View File

@ -16,7 +16,7 @@ struct StatsCollector
typedef lru_cache::LRU_Cache< uint64_t, pstats > hcache_t;
typedef ThreadSafeContainer<uint64_t, SPTR<pstats> > pcache_t;
typedef map<uint64_t, SPTR<pstats> > lcache_t;
iptr<Bitext<Token> const> bitext; // underlying bitext
SPTR<Bitext<Token> const> bitext; // underlying bitext
sampling_method method; // sampling method
size_t sample_size; // sample size
SPTR<SamplingBias const> bias; // sampling bias
@ -26,7 +26,7 @@ struct StatsCollector
SPTR<lcache_t> lcache; // local cache
ug::ThreadPool* tpool; // thread pool to run jobs on
StatsCollector(iptr<Bitext<Token> > xbitext,
StatsCollector(SPTR<Bitext<Token> > xbitext,
SPTR<SamplingBias> const xbias)
: method(ranked_sampling)
, sample_size(100)

View File

@ -58,6 +58,10 @@ namespace Moses
friend class Alignment;
std::map<std::string,std::string> param;
std::string m_name;
#ifndef NO_MOSES
// Allows PhraseDictionaryGroup to get &m_lr_func
friend class PhraseDictionaryGroup;
#endif
public:
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
typedef sapt::mmBitext<Token> mmbitext;
@ -67,7 +71,7 @@ namespace Moses
typedef sapt::PhraseScorer<Token> pscorer;
private:
// vector<SPTR<bitext> > shards;
iptr<mmbitext> btfix;
SPTR<mmbitext> btfix;
SPTR<imbitext> btdyn;
std::string m_bname, m_extra_data, m_bias_file,m_bias_server;
std::string L1;
@ -156,7 +160,7 @@ namespace Moses
#if PROVIDES_RANKED_SAMPLING
void
set_bias_for_ranking(ttasksptr const& ttask, iptr<sapt::Bitext<Token> const> bt);
set_bias_for_ranking(ttasksptr const& ttask, SPTR<sapt::Bitext<Token> const> bt);
#endif
private:

View File

@ -165,7 +165,17 @@ insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
}
retData["sg"] = xmlrpc_c::value_array(searchGraphXml);
}
// void
// TranslationRequest::
// output_phrase(ostream& out, Phrase const& phrase) const
// {
// if (!m_options.output.ReportAllFactors) {
// for (size_t i = 0 ; i < phrase.GetSize(); ++i)
// out << *phrase.GetFactor(i, 0) << " ";
// } else out << phrase;
// }
void
TranslationRequest::
outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
@ -367,12 +377,12 @@ pack_hypothesis(const Moses::Manager& manager, vector<Hypothesis const* > const&
{
// target string
ostringstream target;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
manager.OutputSurface(target, *e, StaticData::Instance().GetOutputFactorOrder(),
options().output.ReportSegmentation, m_options.output.ReportAllFactors);
XVERBOSE(1, "BEST TRANSLATION: " << *(manager.GetBestHypothesis()) << std::endl);
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) {
manager.OutputSurface(target, *e, m_options.output.factor_order,
m_options.output.ReportSegmentation, m_options.output.ReportAllFactors);
}
XVERBOSE(1, "BEST TRANSLATION: " << *(manager.GetBestHypothesis()) << std::endl);
// XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl);
dest[key] = xmlrpc_c::value_string(target.str());
if (m_withAlignInfo) {
@ -418,7 +428,7 @@ run_phrase_decoder()
manager.Decode();
pack_hypothesis(manager, manager.GetBestHypothesis(), "text", m_retData);
if (m_session_id)
m_retData["session-id"] = xmlrpc_c::value_int(m_session_id);

View File

@ -66,6 +66,9 @@ TranslationRequest : public virtual Moses::TranslationTask
pack_hypothesis(const Moses::Manager& manager, Moses::Hypothesis const* h, std::string const& key,
std::map<std::string, xmlrpc_c::value> & dest) const;
// void
// output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
void
add_phrase_aln_info(Moses::Hypothesis const& h,
std::vector<xmlrpc_c::value>& aInfo) const;

View File

@ -90,9 +90,10 @@ def run_instance(cmd_base, threads, tasks, n_best=False):
cmd.append('--threads')
cmd.append(str(threads))
try:
# Queue of tasks instance is currently working on, limited to the number of
# threads. The queue should be kept full for optimal CPU usage.
work = Queue.Queue(maxsize=threads)
# Queue of tasks instance is currently working on, limited to the number
# of threads * 2 (minimal buffering). The queue should be kept full for
# optimal CPU usage.
work = Queue.Queue(maxsize=(threads * 2))
# Multi-threaded instance
moses = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)