mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-11 19:27:11 +03:00
Query member phrase tables in batch mode for forward compatibility
Use GetTargetPhraseCollectionBatch instead of GetTargetPhraseCollectionLEGACY
This commit is contained in:
parent
e7627e04ed
commit
2462c81f7a
@ -20,7 +20,6 @@
|
||||
#include "moses/TranslationModel/PhraseDictionaryGroup.h"
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
|
||||
#include "util/exception.hh"
|
||||
|
||||
@ -30,18 +29,18 @@ using namespace boost;
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line)
|
||||
: PhraseDictionary(line, true),
|
||||
m_numModels(0),
|
||||
m_totalModelScores(0),
|
||||
m_phraseCounts(false),
|
||||
m_wordCounts(false),
|
||||
m_modelBitmapCounts(false),
|
||||
m_restrict(false),
|
||||
m_haveDefaultScores(false),
|
||||
m_defaultAverageOthers(false),
|
||||
m_scoresPerModel(0),
|
||||
m_haveMmsaptLrFunc(false)
|
||||
PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line) :
|
||||
PhraseDictionary(line, true),
|
||||
m_numModels(0),
|
||||
m_totalModelScores(0),
|
||||
m_phraseCounts(false),
|
||||
m_wordCounts(false),
|
||||
m_modelBitmapCounts(false),
|
||||
m_restrict(false),
|
||||
m_haveDefaultScores(false),
|
||||
m_defaultAverageOthers(false),
|
||||
m_scoresPerModel(0),
|
||||
m_haveMmsaptLrFunc(false)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
@ -61,12 +60,12 @@ void PhraseDictionaryGroup::SetParameter(const string& key, const string& value)
|
||||
m_wordCounts = Scan<bool>(value);
|
||||
} else if (key == "model-bitmap-counts") {
|
||||
m_modelBitmapCounts = Scan<bool>(value);
|
||||
} else if (key =="default-scores") {
|
||||
} else if (key == "default-scores") {
|
||||
m_haveDefaultScores = true;
|
||||
m_defaultScores = Scan<float>(Tokenize(value, ","));
|
||||
} else if (key =="default-average-others") {
|
||||
} else if (key == "default-average-others") {
|
||||
m_defaultAverageOthers = Scan<bool>(value);
|
||||
} else if (key =="mmsapt-lr-func") {
|
||||
} else if (key == "mmsapt-lr-func") {
|
||||
m_haveMmsaptLrFunc = true;
|
||||
} else {
|
||||
PhraseDictionary::SetParameter(key, value);
|
||||
@ -93,12 +92,12 @@ void PhraseDictionaryGroup::Load(AllOptions::ptr const& opts)
|
||||
m_scoresPerModel = nScores;
|
||||
} else if (m_defaultAverageOthers) {
|
||||
UTIL_THROW_IF2(nScores != m_scoresPerModel,
|
||||
m_description << ": member models must have the same number of scores when using default-average-others");
|
||||
m_description << ": member models must have the same number of scores when using default-average-others");
|
||||
}
|
||||
}
|
||||
}
|
||||
UTIL_THROW_IF2(!pdFound,
|
||||
m_description << ": could not find member phrase table " << pdName);
|
||||
m_description << ": could not find member phrase table " << pdName);
|
||||
}
|
||||
m_totalModelScores = numScoreComponents;
|
||||
|
||||
@ -113,7 +112,7 @@ void PhraseDictionaryGroup::Load(AllOptions::ptr const& opts)
|
||||
numScoreComponents += (pow(2, m_numModels) - 1);
|
||||
}
|
||||
UTIL_THROW_IF2(numScoreComponents != m_numScoreComponents,
|
||||
m_description << ": feature count mismatch: specify \"num-features=" << numScoreComponents << "\" and supply " << numScoreComponents << " weights");
|
||||
m_description << ": feature count mismatch: specify \"num-features=" << numScoreComponents << "\" and supply " << numScoreComponents << " weights");
|
||||
|
||||
#ifdef PT_UG
|
||||
// Locate mmsapt lexical reordering functions if specified
|
||||
@ -129,7 +128,7 @@ void PhraseDictionaryGroup::Load(AllOptions::ptr const& opts)
|
||||
// Determine "zero" scores for features
|
||||
if (m_haveDefaultScores) {
|
||||
UTIL_THROW_IF2(m_defaultScores.size() != m_numScoreComponents,
|
||||
m_description << ": number of specified default scores is unequal to number of member model scores");
|
||||
m_description << ": number of specified default scores is unequal to number of member model scores");
|
||||
} else {
|
||||
// Default is all 0 (as opposed to e.g. -99 or similar to approximate log(0)
|
||||
// or a smoothed "not in model" score)
|
||||
@ -137,253 +136,233 @@ void PhraseDictionaryGroup::Load(AllOptions::ptr const& opts)
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryGroup::InitializeForInput(const ttasksptr& ttask)
|
||||
{
|
||||
// Member models are registered as FFs and should already be initialized
|
||||
}
|
||||
|
||||
void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch(
|
||||
const ttasksptr& ttask, const InputPathList& inputPathQueue) const
|
||||
const ttasksptr& ttask,
|
||||
const InputPathList& inputPathQueue) const
|
||||
{
|
||||
// Some implementations (mmsapt) do work in PrefixExists
|
||||
BOOST_FOREACH(const InputPath* inputPath, inputPathQueue) {
|
||||
const Phrase& phrase = inputPath->GetPhrase();
|
||||
BOOST_FOREACH(const PhraseDictionary* pd, m_memberPDs) {
|
||||
pd->PrefixExists(ttask, phrase);
|
||||
}
|
||||
// For each member phrase table, add translation options to input paths
|
||||
// (Run each phrase table lookup normally)
|
||||
BOOST_FOREACH(const PhraseDictionary* pd, m_memberPDs) {
|
||||
pd->GetTargetPhraseCollectionBatch(ttask, inputPathQueue);
|
||||
}
|
||||
// Look up each input in each model
|
||||
|
||||
// Below: "collapse" translation options from all member tables into a single
|
||||
// option for each <source, target> phrase pair for this table. Remove
|
||||
// original options from other tables.
|
||||
|
||||
// For each input path (source phrase)
|
||||
BOOST_FOREACH(InputPath* inputPath, inputPathQueue) {
|
||||
const Phrase &phrase = inputPath->GetPhrase();
|
||||
TargetPhraseCollection::shared_ptr targetPhrases =
|
||||
this->GetTargetPhraseCollectionLEGACY(ttask, phrase);
|
||||
inputPath->SetTargetPhrases(*this, targetPhrases, NULL);
|
||||
}
|
||||
}
|
||||
const Phrase& source = inputPath->GetPhrase();
|
||||
|
||||
TargetPhraseCollection::shared_ptr PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
|
||||
const Phrase& src) const
|
||||
{
|
||||
UTIL_THROW2("Don't call me without the translation task.");
|
||||
}
|
||||
// Aggregation of target phrases and corresponding statistics (scores, tables seen by)
|
||||
vector<TargetPhrase*> phraseList;
|
||||
PhraseMap phraseMap;
|
||||
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryGroup::
|
||||
GetTargetPhraseCollectionLEGACY(const ttasksptr& ttask, const Phrase& src) const
|
||||
{
|
||||
TargetPhraseCollection::shared_ptr ret
|
||||
= CreateTargetPhraseCollection(ttask, src);
|
||||
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
|
||||
const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
|
||||
return ret;
|
||||
}
|
||||
// For each member phrase table
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryGroup::
|
||||
CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
|
||||
{
|
||||
// Aggregation of phrases and corresponding statistics (scores, models seen by)
|
||||
vector<TargetPhrase*> phraseList;
|
||||
typedef unordered_map<const TargetPhrase*, PDGroupPhrase, UnorderedComparer<Phrase>, UnorderedComparer<Phrase> > PhraseMap;
|
||||
PhraseMap phraseMap;
|
||||
// "Pop" target phrases for this source from current table
|
||||
const PhraseDictionary& pd = *m_memberPDs[i];
|
||||
TargetPhraseCollection::shared_ptr targets = inputPath->GetTargetPhrases(
|
||||
pd);
|
||||
inputPath->SetTargetPhrases(pd, TargetPhraseCollection::shared_ptr(),
|
||||
NULL);
|
||||
|
||||
// For each model
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
// For each target phrase for this <source, table>
|
||||
if (targets != NULL) {
|
||||
BOOST_FOREACH(const TargetPhrase* targetPhrase, *targets) {
|
||||
|
||||
// Collect phrases from this table
|
||||
const PhraseDictionary& pd = *m_memberPDs[i];
|
||||
TargetPhraseCollection::shared_ptr
|
||||
ret_raw = pd.GetTargetPhraseCollectionLEGACY(ttask, src);
|
||||
vector<float> scores =
|
||||
targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
|
||||
|
||||
if (ret_raw != NULL) {
|
||||
// Process each phrase from table
|
||||
BOOST_FOREACH(const TargetPhrase* targetPhrase, *ret_raw) {
|
||||
vector<float> raw_scores =
|
||||
targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
|
||||
// Phrase not in collection -> add if unrestricted or first model
|
||||
PhraseMap::iterator iter = phraseMap.find(targetPhrase);
|
||||
if (iter == phraseMap.end()) {
|
||||
if (m_restrict && i > 0) {
|
||||
continue;
|
||||
}
|
||||
// Copy phrase to avoid disrupting base model
|
||||
TargetPhrase* phrase = new TargetPhrase(*targetPhrase);
|
||||
// Correct future cost estimates and total score
|
||||
phrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
|
||||
vector<FeatureFunction*> pd_feature;
|
||||
pd_feature.push_back(m_memberPDs[i]);
|
||||
const vector<FeatureFunction*> pd_feature_const(pd_feature);
|
||||
phrase->EvaluateInIsolation(source, pd_feature_const);
|
||||
// Zero out scores from original phrase table
|
||||
phrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
|
||||
// Add phrase entry
|
||||
phraseList.push_back(phrase);
|
||||
phraseMap[targetPhrase] = PDGroupPhrase(phrase, m_defaultScores,
|
||||
m_numModels);
|
||||
} else {
|
||||
// For existing phrases: merge extra scores (such as lr-func scores for mmsapt)
|
||||
TargetPhrase* phrase = iter->second.m_targetPhrase;
|
||||
BOOST_FOREACH(const TargetPhrase::ScoreCache_t::value_type pair, targetPhrase->GetExtraScores()) {
|
||||
phrase->SetExtraScores(pair.first, pair.second);
|
||||
}
|
||||
}
|
||||
// Don't repeat lookup if phrase already found
|
||||
PDGroupPhrase& pdgPhrase =
|
||||
(iter == phraseMap.end()) ?
|
||||
phraseMap.find(targetPhrase)->second : iter->second;
|
||||
|
||||
// Phrase not in collection -> add if unrestricted or first model
|
||||
PhraseMap::iterator iter = phraseMap.find(targetPhrase);
|
||||
if (iter == phraseMap.end()) {
|
||||
if (m_restrict && i > 0) {
|
||||
continue;
|
||||
// Copy scores from this model
|
||||
for (size_t j = 0; j < pd.GetNumScoreComponents(); ++j) {
|
||||
pdgPhrase.m_scores[offset + j] = scores[j];
|
||||
}
|
||||
|
||||
// Copy phrase to avoid disrupting base model
|
||||
TargetPhrase* phrase = new TargetPhrase(*targetPhrase);
|
||||
// Correct future cost estimates and total score
|
||||
phrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
|
||||
vector<FeatureFunction*> pd_feature;
|
||||
pd_feature.push_back(m_memberPDs[i]);
|
||||
const vector<FeatureFunction*> pd_feature_const(pd_feature);
|
||||
phrase->EvaluateInIsolation(src, pd_feature_const);
|
||||
// Zero out scores from original phrase table
|
||||
phrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
|
||||
// Add phrase entry
|
||||
phraseList.push_back(phrase);
|
||||
phraseMap[targetPhrase] = PDGroupPhrase(phrase, m_defaultScores, m_numModels);
|
||||
} else {
|
||||
// For existing phrases: merge extra scores (such as lr-func scores for mmsapt)
|
||||
TargetPhrase* phrase = iter->second.m_targetPhrase;
|
||||
BOOST_FOREACH(const TargetPhrase::ScoreCache_t::value_type pair, targetPhrase->GetExtraScores()) {
|
||||
phrase->SetExtraScores(pair.first, pair.second);
|
||||
}
|
||||
}
|
||||
// Don't repeat lookup if phrase already found
|
||||
PDGroupPhrase& pdgPhrase = (iter == phraseMap.end()) ? phraseMap.find(targetPhrase)->second : iter->second;
|
||||
|
||||
// Copy scores from this model
|
||||
for (size_t j = 0; j < pd.GetNumScoreComponents(); ++j) {
|
||||
pdgPhrase.m_scores[offset + j] = raw_scores[j];
|
||||
}
|
||||
|
||||
// Phrase seen by this model
|
||||
pdgPhrase.m_seenBy[i] = true;
|
||||
}
|
||||
}
|
||||
offset += pd.GetNumScoreComponents();
|
||||
}
|
||||
|
||||
// Compute additional scores as phrases are added to return collection
|
||||
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
|
||||
const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
|
||||
BOOST_FOREACH(TargetPhrase* phrase, phraseList) {
|
||||
PDGroupPhrase& pdgPhrase = phraseMap.find(phrase)->second;
|
||||
|
||||
// Score order (example with 2 models)
|
||||
// member1_scores member2_scores [m1_pc m2_pc] [m1_wc m2_wc]
|
||||
|
||||
// Extra scores added after member model scores
|
||||
size_t offset = m_totalModelScores;
|
||||
// Phrase count (per member model)
|
||||
if (m_phraseCounts) {
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
if (pdgPhrase.m_seenBy[i]) {
|
||||
pdgPhrase.m_scores[offset + i] = 1;
|
||||
// Phrase seen by this model
|
||||
pdgPhrase.m_seenBy[i] = true;
|
||||
}
|
||||
}
|
||||
offset += m_numModels;
|
||||
}
|
||||
// Word count (per member model)
|
||||
if (m_wordCounts) {
|
||||
size_t wc = pdgPhrase.m_targetPhrase->GetSize();
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
if (pdgPhrase.m_seenBy[i]) {
|
||||
pdgPhrase.m_scores[offset + i] = wc;
|
||||
}
|
||||
}
|
||||
offset += m_numModels;
|
||||
offset += pd.GetNumScoreComponents();
|
||||
}
|
||||
|
||||
// Model bitmap features (one feature per possible bitmap)
|
||||
// e.g. seen by models 1 and 3 but not 2 -> "101" fires
|
||||
if (m_modelBitmapCounts) {
|
||||
// Throws exception if someone tries to combine more than 64 models
|
||||
pdgPhrase.m_scores[offset + (pdgPhrase.m_seenBy.to_ulong() - 1)] = 1;
|
||||
offset += m_seenByAll.to_ulong();
|
||||
}
|
||||
// Compute additional scores as phrases are added to return collection
|
||||
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
|
||||
const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
|
||||
BOOST_FOREACH(TargetPhrase* phrase, phraseList) {
|
||||
|
||||
// Average other-model scores to fill in defaults when models have not seen
|
||||
// this phrase
|
||||
if (m_defaultAverageOthers) {
|
||||
// Average seen scores
|
||||
if (pdgPhrase.m_seenBy != m_seenByAll) {
|
||||
vector<float> avgScores(m_scoresPerModel, 0);
|
||||
size_t seenBy = 0;
|
||||
offset = 0;
|
||||
// sum
|
||||
PDGroupPhrase& pdgPhrase = phraseMap.find(phrase)->second;
|
||||
|
||||
// Score order (example with 2 models)
|
||||
// member1_scores member2_scores [m1_pc m2_pc] [m1_wc m2_wc]
|
||||
|
||||
// Extra scores added after member model scores
|
||||
size_t offset = m_totalModelScores;
|
||||
// Phrase count (per member model)
|
||||
if (m_phraseCounts) {
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
if (pdgPhrase.m_seenBy[i]) {
|
||||
for (size_t j = 0; j < m_scoresPerModel; ++j) {
|
||||
avgScores[j] += pdgPhrase.m_scores[offset + j];
|
||||
}
|
||||
seenBy += 1;
|
||||
pdgPhrase.m_scores[offset + i] = 1;
|
||||
}
|
||||
offset += m_scoresPerModel;
|
||||
}
|
||||
// divide
|
||||
for (size_t j = 0; j < m_scoresPerModel; ++j) {
|
||||
avgScores[j] /= seenBy;
|
||||
}
|
||||
// copy
|
||||
offset = 0;
|
||||
offset += m_numModels;
|
||||
}
|
||||
// Word count (per member model)
|
||||
if (m_wordCounts) {
|
||||
size_t wc = pdgPhrase.m_targetPhrase->GetSize();
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
if (!pdgPhrase.m_seenBy[i]) {
|
||||
for (size_t j = 0; j < m_scoresPerModel; ++j) {
|
||||
pdgPhrase.m_scores[offset + j] = avgScores[j];
|
||||
}
|
||||
if (pdgPhrase.m_seenBy[i]) {
|
||||
pdgPhrase.m_scores[offset + i] = wc;
|
||||
}
|
||||
offset += m_scoresPerModel;
|
||||
}
|
||||
#ifdef PT_UG
|
||||
// Also average LexicalReordering scores if specified
|
||||
// We don't necessarily have a lr-func for each model
|
||||
if (m_haveMmsaptLrFunc) {
|
||||
SPTR<Scores> avgLRScores;
|
||||
offset += m_numModels;
|
||||
}
|
||||
|
||||
// Model bitmap features (one feature per possible bitmap)
|
||||
// e.g. seen by models 1 and 3 but not 2 -> "101" fires
|
||||
if (m_modelBitmapCounts) {
|
||||
// Throws exception if someone tries to combine more than 64 models
|
||||
pdgPhrase.m_scores[offset + (pdgPhrase.m_seenBy.to_ulong() - 1)] = 1;
|
||||
offset += m_seenByAll.to_ulong();
|
||||
}
|
||||
|
||||
// Average other-model scores to fill in defaults when models have not seen
|
||||
// this phrase
|
||||
if (m_defaultAverageOthers) {
|
||||
// Average seen scores
|
||||
if (pdgPhrase.m_seenBy != m_seenByAll) {
|
||||
vector<float> avgScores(m_scoresPerModel, 0);
|
||||
size_t seenBy = 0;
|
||||
// For each model
|
||||
offset = 0;
|
||||
// sum
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
|
||||
// Add if phrase seen and model has lr-func
|
||||
if (pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
|
||||
const Scores* scores = pdgPhrase.m_targetPhrase->GetExtraScores(lrFunc);
|
||||
if (!avgLRScores) {
|
||||
avgLRScores.reset(new Scores(*scores));
|
||||
} else {
|
||||
for (size_t j = 0; j < scores->size(); ++j) {
|
||||
(*avgLRScores)[j] += (*scores)[j];
|
||||
}
|
||||
if (pdgPhrase.m_seenBy[i]) {
|
||||
for (size_t j = 0; j < m_scoresPerModel; ++j) {
|
||||
avgScores[j] += pdgPhrase.m_scores[offset + j];
|
||||
}
|
||||
seenBy += 1;
|
||||
}
|
||||
offset += m_scoresPerModel;
|
||||
}
|
||||
// Make sure we have at least one lr-func
|
||||
if (avgLRScores) {
|
||||
// divide
|
||||
for (size_t j = 0; j < avgLRScores->size(); ++j) {
|
||||
(*avgLRScores)[j] /= seenBy;
|
||||
// divide
|
||||
for (size_t j = 0; j < m_scoresPerModel; ++j) {
|
||||
avgScores[j] /= seenBy;
|
||||
}
|
||||
// copy
|
||||
offset = 0;
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
if (!pdgPhrase.m_seenBy[i]) {
|
||||
for (size_t j = 0; j < m_scoresPerModel; ++j) {
|
||||
pdgPhrase.m_scores[offset + j] = avgScores[j];
|
||||
}
|
||||
}
|
||||
// set
|
||||
offset += m_scoresPerModel;
|
||||
}
|
||||
#ifdef PT_UG
|
||||
// Also average LexicalReordering scores if specified
|
||||
// We don't necessarily have a lr-func for each model
|
||||
if (m_haveMmsaptLrFunc) {
|
||||
SPTR<Scores> avgLRScores;
|
||||
size_t seenBy = 0;
|
||||
// For each model
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
|
||||
if (!pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
|
||||
pdgPhrase.m_targetPhrase->SetExtraScores(lrFunc, avgLRScores);
|
||||
// Add if phrase seen and model has lr-func
|
||||
if (pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
|
||||
const Scores* scores = pdgPhrase.m_targetPhrase->GetExtraScores(lrFunc);
|
||||
if (!avgLRScores) {
|
||||
avgLRScores.reset(new Scores(*scores));
|
||||
} else {
|
||||
for (size_t j = 0; j < scores->size(); ++j) {
|
||||
(*avgLRScores)[j] += (*scores)[j];
|
||||
}
|
||||
}
|
||||
seenBy += 1;
|
||||
}
|
||||
}
|
||||
// Make sure we have at least one lr-func
|
||||
if (avgLRScores) {
|
||||
// divide
|
||||
for (size_t j = 0; j < avgLRScores->size(); ++j) {
|
||||
(*avgLRScores)[j] /= seenBy;
|
||||
}
|
||||
// set
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
const LexicalReordering* lrFunc = *m_mmsaptLrFuncs[i];
|
||||
if (!pdgPhrase.m_seenBy[i] && lrFunc != NULL) {
|
||||
pdgPhrase.m_targetPhrase->SetExtraScores(lrFunc, avgLRScores);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Assign scores
|
||||
phrase->GetScoreBreakdown().Assign(this, pdgPhrase.m_scores);
|
||||
// Correct future cost estimates and total score
|
||||
phrase->EvaluateInIsolation(source, pd_feature_const);
|
||||
ret->Add(phrase);
|
||||
}
|
||||
|
||||
// Assign scores
|
||||
phrase->GetScoreBreakdown().Assign(this, pdgPhrase.m_scores);
|
||||
// Correct future cost estimates and total score
|
||||
phrase->EvaluateInIsolation(src, pd_feature_const);
|
||||
ret->Add(phrase);
|
||||
// Add target phrases to path for this input phrase
|
||||
const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
|
||||
inputPath->SetTargetPhrases(*this, ret, NULL);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ChartRuleLookupManager*
|
||||
PhraseDictionaryGroup::
|
||||
CreateRuleLookupManager(const ChartParser &,
|
||||
const ChartCellCollectionBase&, size_t)
|
||||
ChartRuleLookupManager* PhraseDictionaryGroup::CreateRuleLookupManager(
|
||||
const ChartParser &,
|
||||
const ChartCellCollectionBase&,
|
||||
size_t)
|
||||
{
|
||||
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
|
||||
}
|
||||
|
||||
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
|
||||
void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
|
||||
// copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
|
||||
void PhraseDictionaryGroup::CacheForCleanup(
|
||||
TargetPhraseCollection::shared_ptr tpc)
|
||||
{
|
||||
PhraseCache &ref = GetPhraseCache();
|
||||
ref.push_back(tpc);
|
||||
}
|
||||
|
||||
void
|
||||
PhraseDictionaryGroup::
|
||||
CleanUpAfterSentenceProcessing(const InputType &source)
|
||||
void PhraseDictionaryGroup::CleanUpAfterSentenceProcessing(
|
||||
const InputType &source)
|
||||
{
|
||||
GetPhraseCache().clear();
|
||||
CleanUpComponentModels(source);
|
||||
|
@ -39,17 +39,24 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
struct PDGroupPhrase {
|
||||
struct PDGroupPhrase
|
||||
{
|
||||
TargetPhrase* m_targetPhrase;
|
||||
std::vector<float> m_scores;
|
||||
boost::dynamic_bitset<> m_seenBy;
|
||||
|
||||
PDGroupPhrase() : m_targetPhrase(NULL) { }
|
||||
PDGroupPhrase() :
|
||||
m_targetPhrase(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
PDGroupPhrase(TargetPhrase* targetPhrase, const std::vector<float>& scores, const size_t nModels)
|
||||
: m_targetPhrase(targetPhrase),
|
||||
m_scores(scores),
|
||||
m_seenBy(nModels) { }
|
||||
PDGroupPhrase(
|
||||
TargetPhrase* targetPhrase,
|
||||
const std::vector<float>& scores,
|
||||
const size_t nModels) :
|
||||
m_targetPhrase(targetPhrase), m_scores(scores), m_seenBy(nModels)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/** Combines multiple phrase tables into a single interface. Each member phrase
|
||||
@ -64,24 +71,17 @@ class PhraseDictionaryGroup: public PhraseDictionary
|
||||
public:
|
||||
PhraseDictionaryGroup(const std::string& line);
|
||||
void Load(AllOptions::ptr const& opts);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
CreateTargetPhraseCollection(const ttasksptr& ttask,
|
||||
const Phrase& src) const;
|
||||
std::vector<std::vector<float> > getWeights(size_t numWeights,
|
||||
bool normalize) const;
|
||||
void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
|
||||
void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
|
||||
void CleanUpAfterSentenceProcessing(const InputType& source);
|
||||
void CleanUpComponentModels(const InputType& source);
|
||||
// functions below override the base class
|
||||
void GetTargetPhraseCollectionBatch(const ttasksptr& ttask,
|
||||
const InputPathList &inputPathQueue) const;
|
||||
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(
|
||||
const Phrase& src) const;
|
||||
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(
|
||||
const ttasksptr& ttask, const Phrase& src) const;
|
||||
void InitializeForInput(ttasksptr const& ttask);
|
||||
ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&,
|
||||
const ChartCellCollectionBase&, std::size_t);
|
||||
void GetTargetPhraseCollectionBatch(
|
||||
const ttasksptr& ttask,
|
||||
const InputPathList &inputPathQueue) const;
|
||||
ChartRuleLookupManager* CreateRuleLookupManager(
|
||||
const ChartParser&,
|
||||
const ChartCellCollectionBase&,
|
||||
std::size_t);
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
protected:
|
||||
@ -109,8 +109,10 @@ protected:
|
||||
bool m_haveMmsaptLrFunc;
|
||||
// pointers to pointers since member mmsapts may not load these until later
|
||||
std::vector<LexicalReordering**> m_mmsaptLrFuncs;
|
||||
typedef boost::unordered_map<const TargetPhrase*, PDGroupPhrase,
|
||||
UnorderedComparer<Phrase>, UnorderedComparer<Phrase> > PhraseMap;
|
||||
|
||||
typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
|
||||
typedef std::vector<TargetPhraseCollection::shared_ptr> PhraseCache;
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_mutex m_lock_cache;
|
||||
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
|
||||
@ -119,13 +121,14 @@ protected:
|
||||
#endif
|
||||
SentenceCache m_sentenceCache;
|
||||
|
||||
PhraseCache& GetPhraseCache() {
|
||||
PhraseCache& GetPhraseCache()
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
{
|
||||
// first try read-only lock
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_cache);
|
||||
SentenceCache::iterator i = m_sentenceCache.find(
|
||||
boost::this_thread::get_id());
|
||||
boost::this_thread::get_id());
|
||||
if (i != m_sentenceCache.end())
|
||||
return i->second;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user