mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
Merge branch 'master' of ssh://github.com/moses-smt/mosesdecoder
This commit is contained in:
commit
8dea116064
@ -244,7 +244,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
|
||||
}
|
||||
|
||||
// scores
|
||||
ret->SetScore(&phraseDict, m_scores);
|
||||
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
|
||||
ret->Evaluate();
|
||||
|
||||
// alignments
|
||||
|
@ -1436,6 +1436,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMultiModel.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/PhraseDictionaryMultiModelCounts.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/PhraseDictionaryMultiModelCounts.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/PhraseDictionaryTree.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -154,6 +154,10 @@ public:
|
||||
bool addTopts = (si != params.end());
|
||||
si = params.find("report-all-factors");
|
||||
bool reportAllFactors = (si != params.end());
|
||||
si = params.find("nbest");
|
||||
int nbest_size = (si == params.end()) ? 0 : int(xmlrpc_c::value_int(si->second));
|
||||
si = params.find("nbest-distinct");
|
||||
bool nbest_distinct = (si != params.end());
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
@ -198,6 +202,9 @@ public:
|
||||
if (addTopts) {
|
||||
insertTranslationOptions(manager,retData);
|
||||
}
|
||||
if (nbest_size>0) {
|
||||
outputNBest(manager, retData, nbest_size, nbest_distinct, reportAllFactors);
|
||||
}
|
||||
}
|
||||
pair<string, xmlrpc_c::value>
|
||||
text("text", xmlrpc_c::value_string(out.str()));
|
||||
@ -248,7 +255,6 @@ public:
|
||||
|
||||
}
|
||||
|
||||
|
||||
bool compareSearchGraphNode(const SearchGraphNode& a, const SearchGraphNode b) {
|
||||
return a.hypo->GetId() < b.hypo->GetId();
|
||||
}
|
||||
@ -283,6 +289,45 @@ public:
|
||||
retData.insert(pair<string, xmlrpc_c::value>("sg", xmlrpc_c::value_array(searchGraphXml)));
|
||||
}
|
||||
|
||||
void outputNBest(const Manager& manager,
|
||||
map<string, xmlrpc_c::value>& retData,
|
||||
const int n=100,
|
||||
const bool distinct=false,
|
||||
const bool reportAllFactors=false)
|
||||
{
|
||||
TrellisPathList nBestList;
|
||||
manager.CalcNBest(n, nBestList, distinct);
|
||||
|
||||
vector<xmlrpc_c::value> nBestXml;
|
||||
TrellisPathList::const_iterator iter;
|
||||
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
|
||||
const TrellisPath &path = **iter;
|
||||
const std::vector<const Hypothesis *> &edges = path.GetEdges();
|
||||
map<string, xmlrpc_c::value> nBestXMLItem;
|
||||
|
||||
// output surface
|
||||
ostringstream out;
|
||||
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
|
||||
const Hypothesis &edge = *edges[currEdge];
|
||||
const Phrase& phrase = edge.GetCurrTargetPhrase();
|
||||
if(reportAllFactors) {
|
||||
out << phrase << " ";
|
||||
} else {
|
||||
for (size_t pos = 0 ; pos < phrase.GetSize() ; pos++) {
|
||||
const Factor *factor = phrase.GetFactor(pos, 0);
|
||||
out << *factor << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
nBestXMLItem["hyp"] = xmlrpc_c::value_string(out.str());
|
||||
|
||||
// weighted score
|
||||
nBestXMLItem["totalScore"] = xmlrpc_c::value_double(path.GetTotalScore());
|
||||
nBestXml.push_back(xmlrpc_c::value_struct(nBestXMLItem));
|
||||
}
|
||||
retData.insert(pair<string, xmlrpc_c::value>("nbest", xmlrpc_c::value_array(nBestXml)));
|
||||
}
|
||||
|
||||
void insertTranslationOptions(Manager& manager, map<string, xmlrpc_c::value>& retData) {
|
||||
const TranslationOptionCollection* toptsColl = manager.getSntTranslationOptions();
|
||||
vector<xmlrpc_c::value> toptsXml;
|
||||
|
@ -168,7 +168,7 @@ void ChartHypothesis::CalcScore()
|
||||
const std::vector<const StatelessFeatureFunction*>& sfs =
|
||||
StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
||||
for (unsigned i = 0; i < sfs.size(); ++i)
|
||||
if (sfs[i]->GetStatelessFeatureType() == NotCacheable)
|
||||
if (sfs[i]->GetStatelessFeatureType() == RequiresSegmentation)
|
||||
sfs[i]->EvaluateChart(ChartBasedFeatureContext(this),&m_scoreBreakdown);
|
||||
|
||||
const std::vector<const StatefulFeatureFunction*>& ffs =
|
||||
|
@ -134,8 +134,8 @@ void ChartManager::AddXmlChartOptions() {
|
||||
i != xmlChartOptionsList.end(); ++i) {
|
||||
ChartTranslationOptions* opt = *i;
|
||||
|
||||
Moses::Scores wordPenaltyScore(1, -1);
|
||||
opt->GetTargetPhraseCollection().GetCollection()[0]->SetScore((FeatureFunction*)staticData.GetWordPenaltyProducer(), wordPenaltyScore);
|
||||
TargetPhrase &targetPhrase = *opt->GetTargetPhraseCollection().GetCollection()[0];
|
||||
targetPhrase.GetScoreBreakdown().Assign(staticData.GetWordPenaltyProducer(), -1);
|
||||
|
||||
const WordsRange &range = opt->GetSourceWordsRange();
|
||||
RuleCubeItem* item = new RuleCubeItem( *opt, m_hypoStackColl );
|
||||
@ -354,7 +354,7 @@ void ChartManager::PreCalculateScores()
|
||||
StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
||||
ScoreComponentCollection& breakdown = m_precalculatedScores[*targetPhrase];
|
||||
for (size_t k = 0; k < sfs.size(); ++k) {
|
||||
if (sfs[k]->GetStatelessFeatureType() == DependsOnSource) {
|
||||
if (sfs[k]->GetStatelessFeatureType() == RequiresSource) {
|
||||
sfs[k]->EvaluateChart(context,&breakdown);
|
||||
}
|
||||
}
|
||||
|
@ -44,7 +44,6 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
|
||||
// unknown word, add as trans opt
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const UnknownWordPenaltyProducer *unknownWordPenaltyProducer = staticData.GetUnknownWordPenaltyProducer();
|
||||
vector<float> wordPenaltyScore(1, -1);
|
||||
|
||||
size_t isDigit = 0;
|
||||
if (staticData.GetDropUnknown()) {
|
||||
@ -84,11 +83,11 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
|
||||
targetWord.CreateUnknownWord(sourceWord);
|
||||
|
||||
// scores
|
||||
vector<float> unknownScore(1, FloorScore(TransformScore(prob)));
|
||||
|
||||
//targetPhrase->SetScore();
|
||||
targetPhrase->SetScore(unknownWordPenaltyProducer, unknownScore);
|
||||
targetPhrase->SetScore(staticData.GetWordPenaltyProducer(), wordPenaltyScore);
|
||||
float unknownScore = FloorScore(TransformScore(prob));
|
||||
|
||||
targetPhrase->GetScoreBreakdown().Assign(unknownWordPenaltyProducer, unknownScore);
|
||||
targetPhrase->Evaluate();
|
||||
|
||||
targetPhrase->SetSourcePhrase(*unksrc);
|
||||
targetPhrase->SetTargetLHS(targetLHS);
|
||||
targetPhrase->SetAlignmentInfo("0-0");
|
||||
@ -98,7 +97,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
|
||||
} // for (iterLHS
|
||||
} else {
|
||||
// drop source word. create blank trans opt
|
||||
vector<float> unknownScore(1, FloorScore(-numeric_limits<float>::infinity()));
|
||||
float unknownScore = FloorScore(-numeric_limits<float>::infinity());
|
||||
|
||||
TargetPhrase *targetPhrase = new TargetPhrase();
|
||||
// loop
|
||||
@ -112,8 +111,10 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
|
||||
targetLHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
|
||||
CHECK(targetLHS.GetFactor(0) != NULL);
|
||||
|
||||
targetPhrase->GetScoreBreakdown().Assign(unknownWordPenaltyProducer, unknownScore);
|
||||
targetPhrase->Evaluate();
|
||||
|
||||
targetPhrase->SetSourcePhrase(*unksrc);
|
||||
targetPhrase->SetScore(unknownWordPenaltyProducer, unknownScore);
|
||||
targetPhrase->SetTargetLHS(targetLHS);
|
||||
|
||||
// chart rule
|
||||
|
@ -142,7 +142,7 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
|
||||
|
||||
const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
|
||||
TargetPhrase outPhrase(inPhrase);
|
||||
outPhrase.SetScore(generationScore);
|
||||
outPhrase.GetScoreBreakdown().PlusEquals(generationScore);
|
||||
|
||||
outPhrase.MergeFactors(genPhrase, m_newOutputFactors);
|
||||
const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
|
||||
|
@ -76,7 +76,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
|
||||
continue;
|
||||
}
|
||||
|
||||
outPhrase.SetScore(transScores);
|
||||
outPhrase.GetScoreBreakdown().PlusEquals(transScores);
|
||||
outPhrase.Evaluate(); // need to do this as all non-transcores would be screwed up
|
||||
|
||||
outPhrase.MergeFactors(targetPhrase, m_newOutputFactors);
|
||||
|
@ -96,10 +96,8 @@ public:
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
|
||||
bool IsDecodeFeature() const
|
||||
{ return true; }
|
||||
|
||||
|
||||
StatelessFeatureType GetStatelessFeatureType() const
|
||||
{ return SetByOriginator; }
|
||||
};
|
||||
|
||||
class MetaFeatureProducer : public StatelessFeatureFunction
|
||||
|
@ -127,7 +127,6 @@ FeatureFunction::~FeatureFunction() {}
|
||||
|
||||
void FeatureFunction::ParseLine(const std::string& description, const std::string &line)
|
||||
{
|
||||
cerr << "line=" << line << endl;
|
||||
vector<string> toks = Tokenize(line);
|
||||
|
||||
CHECK(toks.size());
|
||||
|
@ -132,9 +132,6 @@ public:
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const = 0;
|
||||
|
||||
virtual bool IsDecodeFeature() const
|
||||
{ return false; }
|
||||
|
||||
};
|
||||
|
||||
/** base class for all stateless feature functions.
|
||||
@ -163,7 +160,7 @@ public:
|
||||
ScoreComponentCollection* accumulator) const = 0;
|
||||
|
||||
virtual StatelessFeatureType GetStatelessFeatureType() const
|
||||
{ return CacheableInPhraseTable; }
|
||||
{ return RequiresTargetPhrase; }
|
||||
|
||||
bool IsStateless() const
|
||||
{ return true; }
|
||||
|
@ -85,7 +85,7 @@ public:
|
||||
|
||||
|
||||
virtual StatelessFeatureType GetStatelessFeatureType() const
|
||||
{ return DependsOnSource; }
|
||||
{ return RequiresSource; }
|
||||
|
||||
};
|
||||
|
||||
|
@ -101,7 +101,7 @@ public:
|
||||
StringPiece targetWord) const;
|
||||
|
||||
virtual StatelessFeatureType GetStatelessFeatureType() const
|
||||
{ return DependsOnSource; }
|
||||
{ return RequiresSource; }
|
||||
|
||||
};
|
||||
|
||||
|
@ -286,7 +286,7 @@ void Hypothesis::CalcScore(const SquareMatrix &futureScore)
|
||||
StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
||||
for (unsigned i = 0; i < sfs.size(); ++i) {
|
||||
const StatelessFeatureFunction &ff = *sfs[i];
|
||||
if (ff.GetStatelessFeatureType() == NotCacheable) {
|
||||
if (ff.GetStatelessFeatureType() == RequiresSegmentation) {
|
||||
EvaluateWith(ff);
|
||||
}
|
||||
}
|
||||
|
@ -287,7 +287,7 @@ protected:
|
||||
|
||||
targetPhrase.SetSourcePhrase(*srcPtr);
|
||||
|
||||
targetPhrase.SetScore(m_obj, scoreVector);
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_obj, scoreVector);
|
||||
targetPhrase.Evaluate();
|
||||
}
|
||||
|
||||
|
@ -186,6 +186,14 @@ void ScoreComponentCollection::Assign(const FeatureFunction* sp, const string li
|
||||
}
|
||||
}
|
||||
|
||||
void ScoreComponentCollection::ZeroDenseFeatures(const FeatureFunction* sp)
|
||||
{
|
||||
size_t numScores = sp->GetNumScoreComponents();
|
||||
Scores vec(numScores, 0);
|
||||
|
||||
Assign(sp, vec);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -388,6 +388,7 @@ public:
|
||||
|
||||
float GetWeightedScore() const;
|
||||
|
||||
void ZeroDenseFeatures(const FeatureFunction* sp);
|
||||
void ZeroAllLM(const LMList& lmList);
|
||||
void PlusEqualsAllLM(const LMList& lmList, const ScoreComponentCollection& rhs);
|
||||
void L1Normalise();
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
const AlignmentInfo &alignmentInfo) const;
|
||||
|
||||
virtual StatelessFeatureType GetStatelessFeatureType() const
|
||||
{ return NotCacheable; }
|
||||
{ return RequiresSegmentation; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -55,9 +55,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "ScoreComponentCollection.h"
|
||||
#include "LM/Ken.h"
|
||||
|
||||
#ifdef LM_IRST
|
||||
//#ifdef LM_IRST
|
||||
#include "LM/IRST.h"
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
#ifdef HAVE_SYNLM
|
||||
#include "SyntacticLanguageModel.h"
|
||||
@ -630,13 +630,13 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
|
||||
SetWeights(model, weights);
|
||||
}
|
||||
#ifdef LM_IRST
|
||||
//#ifdef LM_IRST
|
||||
else if (feature == "IRSTLM") {
|
||||
LanguageModelIRST *model = new LanguageModelIRST(line);
|
||||
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
|
||||
SetWeights(model, weights);
|
||||
}
|
||||
#endif
|
||||
//#endif
|
||||
else if (feature == "Generation") {
|
||||
GenerationDictionary *model = new GenerationDictionary(line);
|
||||
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
|
||||
|
@ -86,7 +86,19 @@ void TargetPhrase::Evaluate()
|
||||
|
||||
for (size_t i = 0; i < ffs.size(); ++i) {
|
||||
const FeatureFunction &ff = *ffs[i];
|
||||
if (!ff.IsDecodeFeature()) {
|
||||
bool evaluate = false;
|
||||
|
||||
if (!ff.IsStateless()) {
|
||||
evaluate = true;
|
||||
}
|
||||
else {
|
||||
const StatelessFeatureFunction &sff = static_cast<const StatelessFeatureFunction&>(ff);
|
||||
if (sff.GetStatelessFeatureType() != SetByOriginator) {
|
||||
evaluate = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (evaluate) {
|
||||
ff.Evaluate(*this, m_scoreBreakdown, futureScoreBreakdown);
|
||||
}
|
||||
}
|
||||
@ -103,11 +115,14 @@ void TargetPhrase::SetXMLScore(float score)
|
||||
const FeatureFunction* prod = staticData.GetPhraseDictionaries()[0];
|
||||
size_t numScores = prod->GetNumScoreComponents();
|
||||
vector <float> scoreVector(numScores,score/numScores);
|
||||
SetScore(prod, scoreVector);
|
||||
|
||||
m_scoreBreakdown.Assign(prod, scoreVector);
|
||||
}
|
||||
|
||||
void TargetPhrase::SetInputScore(const Scores &scoreVector)
|
||||
{
|
||||
cerr << scoreVector.size() << endl;
|
||||
|
||||
//we use an existing score producer to figure out information for score setting (number of scores and weights)
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const FeatureFunction* prod = staticData.GetPhraseDictionaries()[0];
|
||||
@ -117,15 +132,7 @@ void TargetPhrase::SetInputScore(const Scores &scoreVector)
|
||||
Scores sizedScoreVector = scoreVector;
|
||||
sizedScoreVector.resize(prod->GetNumScoreComponents(),0.0f);
|
||||
|
||||
SetScore(prod, sizedScoreVector);
|
||||
}
|
||||
|
||||
// used to set translation or gen score
|
||||
void TargetPhrase::SetScore(const FeatureFunction* producer, const Scores &scoreVector)
|
||||
{
|
||||
// used when creating translations of unknown words (chart decoding)
|
||||
m_scoreBreakdown.Assign(producer, scoreVector);
|
||||
m_fullScore = m_scoreBreakdown.GetWeightedScore();
|
||||
m_scoreBreakdown.Assign(prod, sizedScoreVector);
|
||||
}
|
||||
|
||||
TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
|
||||
|
@ -65,15 +65,9 @@ public:
|
||||
void SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString);
|
||||
|
||||
// used to set translation or gen score
|
||||
void SetScore(const FeatureFunction* producer, const Scores &scoreVector);
|
||||
void SetXMLScore(float score);
|
||||
void SetInputScore(const Scores &scoreVector);
|
||||
|
||||
void SetScore(const ScoreComponentCollection &scores)
|
||||
{
|
||||
m_scoreBreakdown.PlusEquals(scores);
|
||||
}
|
||||
|
||||
TargetPhrase *MergeNext(const TargetPhrase &targetPhrase) const;
|
||||
// used for translation step
|
||||
|
||||
@ -92,9 +86,9 @@ public:
|
||||
}
|
||||
|
||||
inline const ScoreComponentCollection &GetScoreBreakdown() const
|
||||
{
|
||||
return m_scoreBreakdown;
|
||||
}
|
||||
{ return m_scoreBreakdown; }
|
||||
inline ScoreComponentCollection &GetScoreBreakdown()
|
||||
{ return m_scoreBreakdown; }
|
||||
|
||||
//TODO: Probably shouldn't copy this, but otherwise ownership is unclear
|
||||
void SetSourcePhrase(const Phrase& p)
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
const AlignmentInfo &alignmentInfo) const;
|
||||
|
||||
virtual StatelessFeatureType GetStatelessFeatureType() const
|
||||
{ return NotCacheable; }
|
||||
{ return RequiresSegmentation; }
|
||||
|
||||
};
|
||||
|
||||
|
@ -439,7 +439,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
|
||||
if(scores.size() == m_numScoreComponent)
|
||||
{
|
||||
targetPhrase->SetScore(&m_phraseDictionary, scores);
|
||||
targetPhrase->GetScoreBreakdown().Assign(&m_phraseDictionary, scores);
|
||||
targetPhrase->Evaluate();
|
||||
|
||||
if(m_containsAlignmentInfo)
|
||||
|
@ -52,7 +52,7 @@ const TargetPhraseCollection *PhraseDictionaryDynSuffixArray::GetTargetPhraseCol
|
||||
//std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),NegateScore);
|
||||
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
|
||||
|
||||
targetPhrase->SetScore(this, scoreVector);
|
||||
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
|
||||
targetPhrase->Evaluate();
|
||||
|
||||
//cout << *targetPhrase << "\t" << std::setprecision(8) << scoreVector[2] << endl;
|
||||
|
@ -47,6 +47,22 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
|
||||
}
|
||||
} // for
|
||||
|
||||
CHECK(m_pdStr.size() == m_multimodelweights.size());
|
||||
}
|
||||
|
||||
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &description, const std::string &line)
|
||||
:PhraseDictionary(description, line)
|
||||
{
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
if (args[0] == "components") {
|
||||
m_pdStr = Tokenize(args[1], ",");
|
||||
m_numModels = m_pdStr.size();
|
||||
}
|
||||
else if (args[0] == "lambda") {
|
||||
m_multimodelweights = Tokenize<float>(args[1], ",");
|
||||
}
|
||||
} // for
|
||||
|
||||
CHECK(m_pdStr.size() == m_multimodelweights.size());
|
||||
}
|
||||
@ -118,8 +134,9 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect
|
||||
void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const
|
||||
{
|
||||
for(size_t i = 0; i < m_numModels; ++i){
|
||||
const PhraseDictionary &pd = *m_pd[i];
|
||||
|
||||
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) m_pd[i]->GetTargetPhraseCollection( src);
|
||||
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollection( src);
|
||||
if (ret_raw != NULL) {
|
||||
|
||||
TargetPhraseCollection::iterator iterTargetPhrase, iterLast;
|
||||
@ -132,7 +149,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
||||
|
||||
for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != iterLast; ++iterTargetPhrase) {
|
||||
TargetPhrase * targetPhrase = *iterTargetPhrase;
|
||||
std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(this);
|
||||
std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
|
||||
|
||||
std::string targetString = targetPhrase->GetStringRep(m_output);
|
||||
if (allStats->find(targetString) == allStats->end()) {
|
||||
@ -140,6 +157,9 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
||||
multiModelStatistics * statistics = new multiModelStatistics;
|
||||
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
|
||||
|
||||
// zero out scores from original phrase table
|
||||
statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
|
||||
|
||||
Scores scoreVector(m_numScoreComponents);
|
||||
statistics->p.resize(m_numScoreComponents);
|
||||
for(size_t j = 0; j < m_numScoreComponents; ++j){
|
||||
@ -147,7 +167,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
||||
scoreVector[j] = -raw_scores[j];
|
||||
}
|
||||
|
||||
statistics->targetPhrase->SetScore(this, scoreVector); // set scores to 0
|
||||
statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); // set scores to 0
|
||||
|
||||
(*allStats)[targetString] = statistics;
|
||||
|
||||
@ -181,7 +201,10 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection
|
||||
//assuming that last value is phrase penalty
|
||||
scoreVector[m_numScoreComponents-1] = 1.0;
|
||||
|
||||
statistics->targetPhrase->SetScore(this, scoreVector);
|
||||
for (size_t i = 0; i < scoreVector.size(); ++i) cerr << scoreVector[i] << " ";
|
||||
cerr << endl;
|
||||
|
||||
statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
|
||||
ret->Add(new TargetPhrase(*statistics->targetPhrase));
|
||||
}
|
||||
return ret;
|
||||
|
@ -58,6 +58,7 @@ friend class CrossEntropy;
|
||||
|
||||
public:
|
||||
PhraseDictionaryMultiModel(const std::string &line);
|
||||
PhraseDictionaryMultiModel(const std::string &description, const std::string &line);
|
||||
~PhraseDictionaryMultiModel();
|
||||
bool InitDictionary();
|
||||
virtual void CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const;
|
||||
|
@ -49,16 +49,33 @@ vector<string> tokenize( const char* input )
|
||||
}
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(const std::string &line)
|
||||
:PhraseDictionaryMultiModel("PhraseDictionaryMultiModel", line)
|
||||
{
|
||||
PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(size_t numScoreComponent,
|
||||
PhraseDictionaryFeature* feature): PhraseDictionaryMultiModel(numScoreComponent, feature)
|
||||
{
|
||||
m_feature_load = feature;
|
||||
m_mode = "instance_weighting"; //TODO: set this in config; use m_mode to switch between interpolation and instance weighting
|
||||
m_combineFunction = InstanceWeighting;
|
||||
//m_mode = "interpolate";
|
||||
//m_combineFunction = LinearInterpolationFromCounts;
|
||||
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
if (args[0] == "mode") {
|
||||
m_mode =args[1];
|
||||
if (m_mode == "instance_weighting")
|
||||
m_combineFunction = InstanceWeighting;
|
||||
else if (m_mode == "interpolate")
|
||||
m_combineFunction = LinearInterpolationFromCounts;
|
||||
else {
|
||||
ostringstream msg;
|
||||
msg << "combination mode unknown: " << m_mode;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
|
||||
}
|
||||
} // for
|
||||
|
||||
}
|
||||
|
||||
PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()
|
||||
@ -69,7 +86,7 @@ PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()
|
||||
RemoveAllInColl(m_inverse_pd);
|
||||
}
|
||||
|
||||
bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
|
||||
bool PhraseDictionaryMultiModelCounts::InitDictionary(const vector<FactorType> &input
|
||||
, const vector<FactorType> &output
|
||||
, const vector<string> &config
|
||||
, const vector<float> &weight
|
||||
@ -78,27 +95,7 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
|
||||
, const LMList &languageModels
|
||||
, float weightWP)
|
||||
{
|
||||
m_languageModels = &languageModels;
|
||||
m_weight = weight;
|
||||
m_weightWP = weightWP;
|
||||
m_input = input;
|
||||
m_output = output;
|
||||
m_tableLimit = tableLimit;
|
||||
|
||||
m_mode = config[4];
|
||||
std::vector<std::string> files(config.begin()+5,config.end());
|
||||
|
||||
m_numModels = files.size();
|
||||
|
||||
if (m_mode == "instance_weighting")
|
||||
m_combineFunction = InstanceWeighting;
|
||||
else if (m_mode == "interpolate")
|
||||
m_combineFunction = LinearInterpolationFromCounts;
|
||||
else {
|
||||
ostringstream msg;
|
||||
msg << "combination mode unknown: " << m_mode;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
/*
|
||||
|
||||
for(size_t i = 0; i < m_numModels; ++i){
|
||||
|
||||
@ -121,7 +118,6 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
|
||||
//how many actual scores there are in the phrase tables
|
||||
size_t numScoresCounts = 3;
|
||||
size_t numScoresTargetCounts = 1;
|
||||
|
||||
if (implementation == Memory) {
|
||||
|
||||
if (!FileExists(main_table) && FileExists(main_table + ".gz")) main_table += ".gz";
|
||||
@ -175,6 +171,8 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
|
||||
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -214,7 +212,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
||||
for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) {
|
||||
|
||||
TargetPhrase * targetPhrase = *iterTargetPhrase;
|
||||
vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(m_feature);
|
||||
vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(this);
|
||||
|
||||
string targetString = targetPhrase->GetStringRep(m_output);
|
||||
if (allStats->find(targetString) == allStats->end()) {
|
||||
@ -228,7 +226,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
||||
scoreVector[0] = -raw_scores[0];
|
||||
scoreVector[1] = -raw_scores[1];
|
||||
scoreVector[2] = -raw_scores[2];
|
||||
statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels); // set scores to 0
|
||||
statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); // set scores to 0
|
||||
|
||||
(*allStats)[targetString] = statistics;
|
||||
|
||||
@ -281,7 +279,7 @@ TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseColl
|
||||
scoreVector[3] = FloorScore(TransformScore(lexts));
|
||||
scoreVector[4] = FloorScore(TransformScore(2.718));
|
||||
|
||||
statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels);
|
||||
statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
|
||||
}
|
||||
catch (AlignmentException& e) {
|
||||
continue;
|
||||
@ -303,7 +301,7 @@ float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, siz
|
||||
// in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
|
||||
if (ret_raw != NULL) {
|
||||
TargetPhrase * targetPhrase = *(ret_raw->begin());
|
||||
return UntransformScore(targetPhrase->GetScoreBreakdown().GetScoresForProducer(m_feature)[0]);
|
||||
return UntransformScore(targetPhrase->GetScoreBreakdown().GetScoresForProducer(this)[0]);
|
||||
}
|
||||
|
||||
// target phrase unknown
|
||||
@ -497,14 +495,6 @@ void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexic
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryMultiModelCounts::CleanUpComponentModels(const InputType &source) {
|
||||
for(size_t i = 0; i < m_numModels; ++i){
|
||||
m_pd[i]->CleanUp(source);
|
||||
m_inverse_pd[i]->CleanUp(source);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef WITH_DLIB
|
||||
vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector) {
|
||||
|
||||
@ -668,4 +658,4 @@ double LinearInterpolationFromCounts(vector<float> &joint_counts, vector<float>
|
||||
}
|
||||
|
||||
|
||||
} //namespace
|
||||
} //namespace
|
@ -21,11 +21,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#define moses_PhraseDictionaryMultiModelCounts_h
|
||||
|
||||
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
||||
#ifndef WIN32
|
||||
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
@ -84,9 +79,9 @@ typedef std::vector< std::set<size_t> > AlignVector;
|
||||
|
||||
|
||||
public:
|
||||
PhraseDictionaryMultiModelCounts(size_t m_numScoreComponent, PhraseDictionaryFeature* feature);
|
||||
PhraseDictionaryMultiModelCounts(const std::string &line);
|
||||
~PhraseDictionaryMultiModelCounts();
|
||||
bool Load(const std::vector<FactorType> &input
|
||||
bool InitDictionary(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, const std::vector<std::string> &files
|
||||
, const std::vector<float> &weight
|
||||
@ -106,7 +101,6 @@ public:
|
||||
void FillLexicalCountsMarginal(std::string &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
|
||||
void LoadLexicalTable( std::string &fileName, lexicalTable* ltable);
|
||||
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const;
|
||||
void CleanUpComponentModels(const InputType &source);
|
||||
#ifdef WITH_DLIB
|
||||
std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
|
||||
#endif
|
||||
@ -148,4 +142,4 @@ private:
|
||||
|
||||
} // end namespace
|
||||
|
||||
#endif
|
||||
#endif
|
@ -246,7 +246,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
||||
targetPhrase->SetSparseScore(&ruleTable, sparseString);
|
||||
}
|
||||
|
||||
targetPhrase->SetScore(&ruleTable, scoreVector);
|
||||
targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
|
||||
targetPhrase->Evaluate();
|
||||
|
||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, targetPhrase->GetSourcePhrase(), *targetPhrase, sourceLHS);
|
||||
|
@ -242,7 +242,7 @@ namespace Moses
|
||||
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
|
||||
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
|
||||
|
||||
targetPhrase->SetScore(this, scoreVector);
|
||||
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
|
||||
targetPhrase->Evaluate();
|
||||
|
||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
|
||||
|
@ -200,8 +200,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const UnknownWordPenaltyProducer *unknownWordPenaltyProducer = staticData.GetUnknownWordPenaltyProducer();
|
||||
vector<float> wordPenaltyScore(1, -1);
|
||||
vector<float> unknownScore(1, FloorScore(TransformScore(0)));
|
||||
float unknownScore = FloorScore(TransformScore(0));
|
||||
|
||||
// unknown word, add as trans opt
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
@ -260,7 +259,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
|
||||
|
||||
}
|
||||
|
||||
targetPhrase.SetScore(unknownWordPenaltyProducer, unknownScore);
|
||||
targetPhrase.GetScoreBreakdown().Assign(unknownWordPenaltyProducer, unknownScore);
|
||||
|
||||
if (inputScores != NULL) {
|
||||
targetPhrase.SetInputScore(*inputScores);
|
||||
|
@ -177,9 +177,11 @@ enum FormatType
|
||||
|
||||
enum StatelessFeatureType
|
||||
{
|
||||
CacheableInPhraseTable // simplest. eg. phrase table scores. word penalty, phrase penalty.
|
||||
,DependsOnSource // can't be pre-computed during training, but can be computed before search.eg. source bag-of-word features
|
||||
,NotCacheable // can't be pre-computed. Depends on segmentation during search. eg. span-length feature
|
||||
SetByOriginator // The scores are set by the phrase table, generatio table, or unknown word handler
|
||||
// They shouldn't be subsequently evaluated.
|
||||
,RequiresTargetPhrase // Default. simplest, but not in phrase-table. eg. word penalty
|
||||
,RequiresSource // can't be pre-computed during training, but can be computed before search.eg. source bag-of-word features
|
||||
,RequiresSegmentation // can't be pre-computed. Depends on segmentation during search. eg. span-length feature
|
||||
};
|
||||
|
||||
// typedef
|
||||
|
@ -58,7 +58,7 @@ public:
|
||||
float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
|
||||
|
||||
StatelessFeatureType GetStatelessFeatureType() const
|
||||
{ return NotCacheable; }
|
||||
{ return RequiresSegmentation; }
|
||||
};
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user