Merge branch 'master' of ssh://github.com/moses-smt/mosesdecoder

This commit is contained in:
phikoehn 2013-05-14 07:19:45 +01:00
commit 8dea116064
33 changed files with 188 additions and 119 deletions

View File

@ -244,7 +244,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
}
// scores
ret->SetScore(&phraseDict, m_scores);
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
ret->Evaluate();
// alignments

View File

@ -1436,6 +1436,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMultiModel.h</locationURI>
</link>
<link>
<name>TranslationModel/PhraseDictionaryMultiModelCounts.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp</locationURI>
</link>
<link>
<name>TranslationModel/PhraseDictionaryMultiModelCounts.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h</locationURI>
</link>
<link>
<name>TranslationModel/PhraseDictionaryTree.cpp</name>
<type>1</type>

View File

@ -154,6 +154,10 @@ public:
bool addTopts = (si != params.end());
si = params.find("report-all-factors");
bool reportAllFactors = (si != params.end());
si = params.find("nbest");
int nbest_size = (si == params.end()) ? 0 : int(xmlrpc_c::value_int(si->second));
si = params.find("nbest-distinct");
bool nbest_distinct = (si != params.end());
const StaticData &staticData = StaticData::Instance();
@ -198,6 +202,9 @@ public:
if (addTopts) {
insertTranslationOptions(manager,retData);
}
if (nbest_size>0) {
outputNBest(manager, retData, nbest_size, nbest_distinct, reportAllFactors);
}
}
pair<string, xmlrpc_c::value>
text("text", xmlrpc_c::value_string(out.str()));
@ -248,7 +255,6 @@ public:
}
bool compareSearchGraphNode(const SearchGraphNode& a, const SearchGraphNode b) {
return a.hypo->GetId() < b.hypo->GetId();
}
@ -283,6 +289,45 @@ public:
retData.insert(pair<string, xmlrpc_c::value>("sg", xmlrpc_c::value_array(searchGraphXml)));
}
void outputNBest(const Manager& manager,
map<string, xmlrpc_c::value>& retData,
const int n=100,
const bool distinct=false,
const bool reportAllFactors=false)
{
TrellisPathList nBestList;
manager.CalcNBest(n, nBestList, distinct);
vector<xmlrpc_c::value> nBestXml;
TrellisPathList::const_iterator iter;
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
const TrellisPath &path = **iter;
const std::vector<const Hypothesis *> &edges = path.GetEdges();
map<string, xmlrpc_c::value> nBestXMLItem;
// output surface
ostringstream out;
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
const Phrase& phrase = edge.GetCurrTargetPhrase();
if(reportAllFactors) {
out << phrase << " ";
} else {
for (size_t pos = 0 ; pos < phrase.GetSize() ; pos++) {
const Factor *factor = phrase.GetFactor(pos, 0);
out << *factor << " ";
}
}
}
nBestXMLItem["hyp"] = xmlrpc_c::value_string(out.str());
// weighted score
nBestXMLItem["totalScore"] = xmlrpc_c::value_double(path.GetTotalScore());
nBestXml.push_back(xmlrpc_c::value_struct(nBestXMLItem));
}
retData.insert(pair<string, xmlrpc_c::value>("nbest", xmlrpc_c::value_array(nBestXml)));
}
void insertTranslationOptions(Manager& manager, map<string, xmlrpc_c::value>& retData) {
const TranslationOptionCollection* toptsColl = manager.getSntTranslationOptions();
vector<xmlrpc_c::value> toptsXml;

View File

@ -168,7 +168,7 @@ void ChartHypothesis::CalcScore()
const std::vector<const StatelessFeatureFunction*>& sfs =
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i)
if (sfs[i]->GetStatelessFeatureType() == NotCacheable)
if (sfs[i]->GetStatelessFeatureType() == RequiresSegmentation)
sfs[i]->EvaluateChart(ChartBasedFeatureContext(this),&m_scoreBreakdown);
const std::vector<const StatefulFeatureFunction*>& ffs =

View File

@ -134,8 +134,8 @@ void ChartManager::AddXmlChartOptions() {
i != xmlChartOptionsList.end(); ++i) {
ChartTranslationOptions* opt = *i;
Moses::Scores wordPenaltyScore(1, -1);
opt->GetTargetPhraseCollection().GetCollection()[0]->SetScore((FeatureFunction*)staticData.GetWordPenaltyProducer(), wordPenaltyScore);
TargetPhrase &targetPhrase = *opt->GetTargetPhraseCollection().GetCollection()[0];
targetPhrase.GetScoreBreakdown().Assign(staticData.GetWordPenaltyProducer(), -1);
const WordsRange &range = opt->GetSourceWordsRange();
RuleCubeItem* item = new RuleCubeItem( *opt, m_hypoStackColl );
@ -354,7 +354,7 @@ void ChartManager::PreCalculateScores()
StatelessFeatureFunction::GetStatelessFeatureFunctions();
ScoreComponentCollection& breakdown = m_precalculatedScores[*targetPhrase];
for (size_t k = 0; k < sfs.size(); ++k) {
if (sfs[k]->GetStatelessFeatureType() == DependsOnSource) {
if (sfs[k]->GetStatelessFeatureType() == RequiresSource) {
sfs[k]->EvaluateChart(context,&breakdown);
}
}

View File

@ -44,7 +44,6 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
// unknown word, add as trans opt
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer *unknownWordPenaltyProducer = staticData.GetUnknownWordPenaltyProducer();
vector<float> wordPenaltyScore(1, -1);
size_t isDigit = 0;
if (staticData.GetDropUnknown()) {
@ -84,11 +83,11 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
targetWord.CreateUnknownWord(sourceWord);
// scores
vector<float> unknownScore(1, FloorScore(TransformScore(prob)));
//targetPhrase->SetScore();
targetPhrase->SetScore(unknownWordPenaltyProducer, unknownScore);
targetPhrase->SetScore(staticData.GetWordPenaltyProducer(), wordPenaltyScore);
float unknownScore = FloorScore(TransformScore(prob));
targetPhrase->GetScoreBreakdown().Assign(unknownWordPenaltyProducer, unknownScore);
targetPhrase->Evaluate();
targetPhrase->SetSourcePhrase(*unksrc);
targetPhrase->SetTargetLHS(targetLHS);
targetPhrase->SetAlignmentInfo("0-0");
@ -98,7 +97,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
} // for (iterLHS
} else {
// drop source word. create blank trans opt
vector<float> unknownScore(1, FloorScore(-numeric_limits<float>::infinity()));
float unknownScore = FloorScore(-numeric_limits<float>::infinity());
TargetPhrase *targetPhrase = new TargetPhrase();
// loop
@ -112,8 +111,10 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
targetLHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
CHECK(targetLHS.GetFactor(0) != NULL);
targetPhrase->GetScoreBreakdown().Assign(unknownWordPenaltyProducer, unknownScore);
targetPhrase->Evaluate();
targetPhrase->SetSourcePhrase(*unksrc);
targetPhrase->SetScore(unknownWordPenaltyProducer, unknownScore);
targetPhrase->SetTargetLHS(targetLHS);
// chart rule

View File

@ -142,7 +142,7 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
TargetPhrase outPhrase(inPhrase);
outPhrase.SetScore(generationScore);
outPhrase.GetScoreBreakdown().PlusEquals(generationScore);
outPhrase.MergeFactors(genPhrase, m_newOutputFactors);
const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();

View File

@ -76,7 +76,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
continue;
}
outPhrase.SetScore(transScores);
outPhrase.GetScoreBreakdown().PlusEquals(transScores);
outPhrase.Evaluate(); // need to do this as all non-transcores would be screwed up
outPhrase.MergeFactors(targetPhrase, m_newOutputFactors);

View File

@ -96,10 +96,8 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
bool IsDecodeFeature() const
{ return true; }
StatelessFeatureType GetStatelessFeatureType() const
{ return SetByOriginator; }
};
class MetaFeatureProducer : public StatelessFeatureFunction

View File

@ -127,7 +127,6 @@ FeatureFunction::~FeatureFunction() {}
void FeatureFunction::ParseLine(const std::string& description, const std::string &line)
{
cerr << "line=" << line << endl;
vector<string> toks = Tokenize(line);
CHECK(toks.size());

View File

@ -132,9 +132,6 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const = 0;
virtual bool IsDecodeFeature() const
{ return false; }
};
/** base class for all stateless feature functions.
@ -163,7 +160,7 @@ public:
ScoreComponentCollection* accumulator) const = 0;
virtual StatelessFeatureType GetStatelessFeatureType() const
{ return CacheableInPhraseTable; }
{ return RequiresTargetPhrase; }
bool IsStateless() const
{ return true; }

View File

@ -85,7 +85,7 @@ public:
virtual StatelessFeatureType GetStatelessFeatureType() const
{ return DependsOnSource; }
{ return RequiresSource; }
};

View File

@ -101,7 +101,7 @@ public:
StringPiece targetWord) const;
virtual StatelessFeatureType GetStatelessFeatureType() const
{ return DependsOnSource; }
{ return RequiresSource; }
};

View File

@ -286,7 +286,7 @@ void Hypothesis::CalcScore(const SquareMatrix &futureScore)
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
const StatelessFeatureFunction &ff = *sfs[i];
if (ff.GetStatelessFeatureType() == NotCacheable) {
if (ff.GetStatelessFeatureType() == RequiresSegmentation) {
EvaluateWith(ff);
}
}

View File

@ -287,7 +287,7 @@ protected:
targetPhrase.SetSourcePhrase(*srcPtr);
targetPhrase.SetScore(m_obj, scoreVector);
targetPhrase.GetScoreBreakdown().Assign(m_obj, scoreVector);
targetPhrase.Evaluate();
}

View File

@ -186,6 +186,14 @@ void ScoreComponentCollection::Assign(const FeatureFunction* sp, const string li
}
}
void ScoreComponentCollection::ZeroDenseFeatures(const FeatureFunction* sp)
{
size_t numScores = sp->GetNumScoreComponents();
Scores vec(numScores, 0);
Assign(sp, vec);
}
}

View File

@ -388,6 +388,7 @@ public:
float GetWeightedScore() const;
void ZeroDenseFeatures(const FeatureFunction* sp);
void ZeroAllLM(const LMList& lmList);
void PlusEqualsAllLM(const LMList& lmList, const ScoreComponentCollection& rhs);
void L1Normalise();

View File

@ -38,7 +38,7 @@ public:
const AlignmentInfo &alignmentInfo) const;
virtual StatelessFeatureType GetStatelessFeatureType() const
{ return NotCacheable; }
{ return RequiresSegmentation; }
};
}

View File

@ -55,9 +55,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "ScoreComponentCollection.h"
#include "LM/Ken.h"
#ifdef LM_IRST
//#ifdef LM_IRST
#include "LM/IRST.h"
#endif
//#endif
#ifdef HAVE_SYNLM
#include "SyntacticLanguageModel.h"
@ -630,13 +630,13 @@ bool StaticData::LoadData(Parameter *parameter)
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
}
#ifdef LM_IRST
//#ifdef LM_IRST
else if (feature == "IRSTLM") {
LanguageModelIRST *model = new LanguageModelIRST(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
}
#endif
//#endif
else if (feature == "Generation") {
GenerationDictionary *model = new GenerationDictionary(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());

View File

@ -86,7 +86,19 @@ void TargetPhrase::Evaluate()
for (size_t i = 0; i < ffs.size(); ++i) {
const FeatureFunction &ff = *ffs[i];
if (!ff.IsDecodeFeature()) {
bool evaluate = false;
if (!ff.IsStateless()) {
evaluate = true;
}
else {
const StatelessFeatureFunction &sff = static_cast<const StatelessFeatureFunction&>(ff);
if (sff.GetStatelessFeatureType() != SetByOriginator) {
evaluate = true;
}
}
if (evaluate) {
ff.Evaluate(*this, m_scoreBreakdown, futureScoreBreakdown);
}
}
@ -103,11 +115,14 @@ void TargetPhrase::SetXMLScore(float score)
const FeatureFunction* prod = staticData.GetPhraseDictionaries()[0];
size_t numScores = prod->GetNumScoreComponents();
vector <float> scoreVector(numScores,score/numScores);
SetScore(prod, scoreVector);
m_scoreBreakdown.Assign(prod, scoreVector);
}
void TargetPhrase::SetInputScore(const Scores &scoreVector)
{
cerr << scoreVector.size() << endl;
//we use an existing score producer to figure out information for score setting (number of scores and weights)
const StaticData &staticData = StaticData::Instance();
const FeatureFunction* prod = staticData.GetPhraseDictionaries()[0];
@ -117,15 +132,7 @@ void TargetPhrase::SetInputScore(const Scores &scoreVector)
Scores sizedScoreVector = scoreVector;
sizedScoreVector.resize(prod->GetNumScoreComponents(),0.0f);
SetScore(prod, sizedScoreVector);
}
// used to set translation or gen score
void TargetPhrase::SetScore(const FeatureFunction* producer, const Scores &scoreVector)
{
// used when creating translations of unknown words (chart decoding)
m_scoreBreakdown.Assign(producer, scoreVector);
m_fullScore = m_scoreBreakdown.GetWeightedScore();
m_scoreBreakdown.Assign(prod, sizedScoreVector);
}
TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const

View File

@ -65,15 +65,9 @@ public:
void SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString);
// used to set translation or gen score
void SetScore(const FeatureFunction* producer, const Scores &scoreVector);
void SetXMLScore(float score);
void SetInputScore(const Scores &scoreVector);
void SetScore(const ScoreComponentCollection &scores)
{
m_scoreBreakdown.PlusEquals(scores);
}
TargetPhrase *MergeNext(const TargetPhrase &targetPhrase) const;
// used for translation step
@ -92,9 +86,9 @@ public:
}
inline const ScoreComponentCollection &GetScoreBreakdown() const
{
return m_scoreBreakdown;
}
{ return m_scoreBreakdown; }
inline ScoreComponentCollection &GetScoreBreakdown()
{ return m_scoreBreakdown; }
//TODO: Probably shouldn't copy this, but otherwise ownership is unclear
void SetSourcePhrase(const Phrase& p)

View File

@ -38,7 +38,7 @@ public:
const AlignmentInfo &alignmentInfo) const;
virtual StatelessFeatureType GetStatelessFeatureType() const
{ return NotCacheable; }
{ return RequiresSegmentation; }
};

View File

@ -439,7 +439,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
if(scores.size() == m_numScoreComponent)
{
targetPhrase->SetScore(&m_phraseDictionary, scores);
targetPhrase->GetScoreBreakdown().Assign(&m_phraseDictionary, scores);
targetPhrase->Evaluate();
if(m_containsAlignmentInfo)

View File

@ -52,7 +52,7 @@ const TargetPhraseCollection *PhraseDictionaryDynSuffixArray::GetTargetPhraseCol
//std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),NegateScore);
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
targetPhrase->SetScore(this, scoreVector);
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
targetPhrase->Evaluate();
//cout << *targetPhrase << "\t" << std::setprecision(8) << scoreVector[2] << endl;

View File

@ -47,6 +47,22 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
}
} // for
CHECK(m_pdStr.size() == m_multimodelweights.size());
}
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &description, const std::string &line)
:PhraseDictionary(description, line)
{
for (size_t i = 0; i < m_args.size(); ++i) {
const vector<string> &args = m_args[i];
if (args[0] == "components") {
m_pdStr = Tokenize(args[1], ",");
m_numModels = m_pdStr.size();
}
else if (args[0] == "lambda") {
m_multimodelweights = Tokenize<float>(args[1], ",");
}
} // for
CHECK(m_pdStr.size() == m_multimodelweights.size());
}
@ -118,8 +134,9 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect
void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const
{
for(size_t i = 0; i < m_numModels; ++i){
const PhraseDictionary &pd = *m_pd[i];
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) m_pd[i]->GetTargetPhraseCollection( src);
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollection( src);
if (ret_raw != NULL) {
TargetPhraseCollection::iterator iterTargetPhrase, iterLast;
@ -132,7 +149,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != iterLast; ++iterTargetPhrase) {
TargetPhrase * targetPhrase = *iterTargetPhrase;
std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(this);
std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
std::string targetString = targetPhrase->GetStringRep(m_output);
if (allStats->find(targetString) == allStats->end()) {
@ -140,6 +157,9 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
multiModelStatistics * statistics = new multiModelStatistics;
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
// zero out scores from original phrase table
statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
Scores scoreVector(m_numScoreComponents);
statistics->p.resize(m_numScoreComponents);
for(size_t j = 0; j < m_numScoreComponents; ++j){
@ -147,7 +167,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
scoreVector[j] = -raw_scores[j];
}
statistics->targetPhrase->SetScore(this, scoreVector); // set scores to 0
statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); // set scores to 0
(*allStats)[targetString] = statistics;
@ -181,7 +201,10 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection
//assuming that last value is phrase penalty
scoreVector[m_numScoreComponents-1] = 1.0;
statistics->targetPhrase->SetScore(this, scoreVector);
for (size_t i = 0; i < scoreVector.size(); ++i) cerr << scoreVector[i] << " ";
cerr << endl;
statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
ret->Add(new TargetPhrase(*statistics->targetPhrase));
}
return ret;

View File

@ -58,6 +58,7 @@ friend class CrossEntropy;
public:
PhraseDictionaryMultiModel(const std::string &line);
PhraseDictionaryMultiModel(const std::string &description, const std::string &line);
~PhraseDictionaryMultiModel();
bool InitDictionary();
virtual void CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const;

View File

@ -49,16 +49,33 @@ vector<string> tokenize( const char* input )
}
namespace Moses
{
PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(const std::string &line)
:PhraseDictionaryMultiModel("PhraseDictionaryMultiModel", line)
{
PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(size_t numScoreComponent,
PhraseDictionaryFeature* feature): PhraseDictionaryMultiModel(numScoreComponent, feature)
{
m_feature_load = feature;
m_mode = "instance_weighting"; //TODO: set this in config; use m_mode to switch between interpolation and instance weighting
m_combineFunction = InstanceWeighting;
//m_mode = "interpolate";
//m_combineFunction = LinearInterpolationFromCounts;
for (size_t i = 0; i < m_args.size(); ++i) {
const vector<string> &args = m_args[i];
if (args[0] == "mode") {
m_mode =args[1];
if (m_mode == "instance_weighting")
m_combineFunction = InstanceWeighting;
else if (m_mode == "interpolate")
m_combineFunction = LinearInterpolationFromCounts;
else {
ostringstream msg;
msg << "combination mode unknown: " << m_mode;
throw runtime_error(msg.str());
}
}
} // for
}
PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()
@ -69,7 +86,7 @@ PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()
RemoveAllInColl(m_inverse_pd);
}
bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
bool PhraseDictionaryMultiModelCounts::InitDictionary(const vector<FactorType> &input
, const vector<FactorType> &output
, const vector<string> &config
, const vector<float> &weight
@ -78,27 +95,7 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
, const LMList &languageModels
, float weightWP)
{
m_languageModels = &languageModels;
m_weight = weight;
m_weightWP = weightWP;
m_input = input;
m_output = output;
m_tableLimit = tableLimit;
m_mode = config[4];
std::vector<std::string> files(config.begin()+5,config.end());
m_numModels = files.size();
if (m_mode == "instance_weighting")
m_combineFunction = InstanceWeighting;
else if (m_mode == "interpolate")
m_combineFunction = LinearInterpolationFromCounts;
else {
ostringstream msg;
msg << "combination mode unknown: " << m_mode;
throw runtime_error(msg.str());
}
/*
for(size_t i = 0; i < m_numModels; ++i){
@ -121,7 +118,6 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
//how many actual scores there are in the phrase tables
size_t numScoresCounts = 3;
size_t numScoresTargetCounts = 1;
if (implementation == Memory) {
if (!FileExists(main_table) && FileExists(main_table + ".gz")) main_table += ".gz";
@ -175,6 +171,8 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
}
*/
return true;
}
@ -214,7 +212,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) {
TargetPhrase * targetPhrase = *iterTargetPhrase;
vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(m_feature);
vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(this);
string targetString = targetPhrase->GetStringRep(m_output);
if (allStats->find(targetString) == allStats->end()) {
@ -228,7 +226,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
scoreVector[0] = -raw_scores[0];
scoreVector[1] = -raw_scores[1];
scoreVector[2] = -raw_scores[2];
statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels); // set scores to 0
statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); // set scores to 0
(*allStats)[targetString] = statistics;
@ -281,7 +279,7 @@ TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseColl
scoreVector[3] = FloorScore(TransformScore(lexts));
scoreVector[4] = FloorScore(TransformScore(2.718));
statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels);
statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
}
catch (AlignmentException& e) {
continue;
@ -303,7 +301,7 @@ float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, siz
// in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
if (ret_raw != NULL) {
TargetPhrase * targetPhrase = *(ret_raw->begin());
return UntransformScore(targetPhrase->GetScoreBreakdown().GetScoresForProducer(m_feature)[0]);
return UntransformScore(targetPhrase->GetScoreBreakdown().GetScoresForProducer(this)[0]);
}
// target phrase unknown
@ -497,14 +495,6 @@ void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexic
}
void PhraseDictionaryMultiModelCounts::CleanUpComponentModels(const InputType &source) {
for(size_t i = 0; i < m_numModels; ++i){
m_pd[i]->CleanUp(source);
m_inverse_pd[i]->CleanUp(source);
}
}
#ifdef WITH_DLIB
vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector) {
@ -668,4 +658,4 @@ double LinearInterpolationFromCounts(vector<float> &joint_counts, vector<float>
}
} //namespace
} //namespace

View File

@ -21,11 +21,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#define moses_PhraseDictionaryMultiModelCounts_h
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
#ifndef WIN32
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
#endif
#include <boost/unordered_map.hpp>
@ -84,9 +79,9 @@ typedef std::vector< std::set<size_t> > AlignVector;
public:
PhraseDictionaryMultiModelCounts(size_t m_numScoreComponent, PhraseDictionaryFeature* feature);
PhraseDictionaryMultiModelCounts(const std::string &line);
~PhraseDictionaryMultiModelCounts();
bool Load(const std::vector<FactorType> &input
bool InitDictionary(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const std::vector<std::string> &files
, const std::vector<float> &weight
@ -106,7 +101,6 @@ public:
void FillLexicalCountsMarginal(std::string &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
void LoadLexicalTable( std::string &fileName, lexicalTable* ltable);
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const;
void CleanUpComponentModels(const InputType &source);
#ifdef WITH_DLIB
std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
#endif
@ -148,4 +142,4 @@ private:
} // end namespace
#endif
#endif

View File

@ -246,7 +246,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
targetPhrase->SetSparseScore(&ruleTable, sparseString);
}
targetPhrase->SetScore(&ruleTable, scoreVector);
targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
targetPhrase->Evaluate();
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, targetPhrase->GetSourcePhrase(), *targetPhrase, sourceLHS);

View File

@ -242,7 +242,7 @@ namespace Moses
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
targetPhrase->SetScore(this, scoreVector);
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
targetPhrase->Evaluate();
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);

View File

@ -200,8 +200,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
{
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer *unknownWordPenaltyProducer = staticData.GetUnknownWordPenaltyProducer();
vector<float> wordPenaltyScore(1, -1);
vector<float> unknownScore(1, FloorScore(TransformScore(0)));
float unknownScore = FloorScore(TransformScore(0));
// unknown word, add as trans opt
FactorCollection &factorCollection = FactorCollection::Instance();
@ -260,7 +259,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
}
targetPhrase.SetScore(unknownWordPenaltyProducer, unknownScore);
targetPhrase.GetScoreBreakdown().Assign(unknownWordPenaltyProducer, unknownScore);
if (inputScores != NULL) {
targetPhrase.SetInputScore(*inputScores);

View File

@ -177,9 +177,11 @@ enum FormatType
enum StatelessFeatureType
{
CacheableInPhraseTable // simplest. eg. phrase table scores. word penalty, phrase penalty.
,DependsOnSource // can't be pre-computed during training, but can be computed before search.eg. source bag-of-word features
,NotCacheable // can't be pre-computed. Depends on segmentation during search. eg. span-length feature
SetByOriginator // The scores are set by the phrase table, generatio table, or unknown word handler
// They shouldn't be subsequently evaluated.
,RequiresTargetPhrase // Default. simplest, but not in phrase-table. eg. word penalty
,RequiresSource // can't be pre-computed during training, but can be computed before search.eg. source bag-of-word features
,RequiresSegmentation // can't be pre-computed. Depends on segmentation during search. eg. span-length feature
};
// typedef

View File

@ -58,7 +58,7 @@ public:
float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
StatelessFeatureType GetStatelessFeatureType() const
{ return NotCacheable; }
{ return RequiresSegmentation; }
};
}