Reorganization of phrase scorers in Mmsapt.

This commit is contained in:
Ulrich Germann 2014-06-14 13:03:31 +01:00
parent c3db1a3a67
commit 1a3d7d3266
4 changed files with 225 additions and 210 deletions

View File

@ -23,6 +23,7 @@
#include "ug_typedefs.h"
#include "tpt_pickler.h"
#include "ug_bitext.h"
#include "../mmsapt_phrase_scorers.h"
#include "ug_lexical_phrase_scorer2.h"
using namespace std;
@ -44,7 +45,7 @@ float lbsmooth = .005;
PScorePfwd<Token> calc_pfwd;
PScorePbwd<Token> calc_pbwd;
PScoreLex<Token> calc_lex;
PScoreLex<Token> calc_lex(1.0);
PScoreWP<Token> apply_wp;
vector<float> fweights;
@ -129,7 +130,7 @@ int main(int argc, char* argv[])
bt.setDefaultSampleSize(max_samples);
size_t i;
i = calc_pfwd.init(0,.05);
i = calc_pfwd.init(0,.05,'g');
i = calc_pbwd.init(i,.05);
i = calc_lex.init(i,base+L1+"-"+L2+".lex");
i = apply_wp.init(i);

View File

@ -47,15 +47,22 @@ namespace Moses
}
#endif
vector<string> const&
Mmsapt::
GetFeatureNames() const
{
return m_feature_names;
}
Mmsapt::
Mmsapt(string const& line)
// : PhraseDictionary("Mmsapt",line), ofactor(1,0)
: PhraseDictionary(line)
, m_lex_alpha(1.0)
, withLogCountFeatures(false)
, withPfwd(true), withPbwd(true)
, withCoherence(true)
, m_pfwd_features("g"), withPbwd(true), poolCounts(true)
, ofactor(1,0)
, m_tpc_ctr(0)
// default values chosen for bwd probability
{
this->init(line);
}
@ -101,52 +108,56 @@ namespace Moses
assert(L1.size());
assert(L2.size());
m = param.find("pfwd_denom");
m = param.find("pfwd-denom");
m_pfwd_denom = m != param.end() ? m->second[0] : 's';
m = param.find("smooth");
m_lbop_parameter = m != param.end() ? atof(m->second.c_str()) : .05;
m = param.find("max-samples");
m_default_sample_size = m != param.end() ? atoi(m->second.c_str()) : 1000;
m = param.find("logcnt-features");
if (m != param.end())
if ((m = param.find("logcnt-features")) != param.end())
withLogCountFeatures = m->second != "0";
m = param.find("pfwd");
if (m != param.end())
withPfwd = m->second != "0";
m = param.find("pbwd");
if (m != param.end())
if ((m = param.find("coh")) != param.end())
withCoherence = m->second != "0";
if ((m = param.find("pfwd")) != param.end())
m_pfwd_features = (m->second == "0" ? "" : m->second);
if (m_pfwd_features == "1")
m_pfwd_features[0] = m_pfwd_denom;
if ((m = param.find("pbwd")) != param.end())
withPbwd = m->second != "0";
if ((m = param.find("lexalpha")) != param.end())
m_lex_alpha = atof(m->second.c_str());
m = param.find("workers");
m_workers = m != param.end() ? atoi(m->second.c_str()) : 8;
m_workers = min(m_workers,24UL);
m = param.find("limit");
if (m != param.end()) m_tableLimit = atoi(m->second.c_str());
if ((m = param.find("limit")) != param.end())
m_tableLimit = atoi(m->second.c_str());
m = param.find("cache-size");
m_history.reserve(m != param.end()?max(1000,atoi(m->second.c_str())):10000);
// in plain language: cache size is at least 1000, and 10,000 by default
// this cache keeps track of the most frequently used target phrase collections
// even when not actively in use
this->m_numScoreComponents = atoi(param["num-features"].c_str());
// num_features = 0;
m = param.find("ifactor");
input_factor = m != param.end() ? atoi(m->second.c_str()) : 0;
poolCounts = true;
m = param.find("extra");
if (m != param.end())
{
extra_data = m->second;
// cerr << "have extra data" << endl;
}
// keeps track of the most frequently used target phrase collections
// (to keep them cached even when not actively in use)
if ((m = param.find("extra")) != param.end())
extra_data = m->second;
}
void
@ -175,6 +186,49 @@ namespace Moses
// cerr << "Loaded " << btdyn->T1->size() << " sentence pairs" << endl;
}
size_t
Mmsapt::
add_corpus_specific_features
(vector<sptr<pscorer > >& ffvec, size_t num_feats)
{
float const lbop = m_lbop_parameter; // just for code readability below
// for the time being, we assume that all phrase probability features
// use the same confidence parameter for lower-bound-estimation
for (size_t i = 0; i < m_pfwd_features.size(); ++i)
{
UTIL_THROW_IF2(m_pfwd_features[i] != 'g' &&
m_pfwd_features[i] != 'r' &&
m_pfwd_features[i] != 's',
"Can't handle pfwd feature type '"
<< m_pfwd_features[i] << "'.");
sptr<PScorePfwd<Token> > ff(new PScorePfwd<Token>());
size_t k = num_feats;
num_feats = ff->init(num_feats,lbop,m_pfwd_features[i]);
for (;k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
ffvec.push_back(ff);
}
if (withPbwd)
{
sptr<PScorePbwd<Token> > ff(new PScorePbwd<Token>());
size_t k = num_feats;
num_feats = ff->init(num_feats,lbop);
for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
ffvec.push_back(ff);
}
if (withLogCountFeatures)
{
sptr<PScoreLogCounts<Token> > ff(new PScoreLogCounts<Token>());
size_t k = num_feats;
num_feats = ff->init(num_feats);
for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
ffvec.push_back(ff);
}
return num_feats;
}
void
Mmsapt::
Load()
@ -184,44 +238,52 @@ namespace Moses
btfix.setDefaultSampleSize(m_default_sample_size);
size_t num_feats = 0;
// TO DO: should we use different lbop parameters
// for the relative-frequency based features?
if (withLogCountFeatures) num_feats = add_logcounts_fix.init(num_feats);
float const lbop = m_lbop_parameter; // just for code readability below
if (withPfwd) num_feats = calc_pfwd_fix.init(num_feats,lbop,m_pfwd_denom);
if (withPbwd) num_feats = calc_pbwd_fix.init(num_feats,lbop);
// lexical scores are currently always active
sptr<PScoreLex<Token> > ff(new PScoreLex<Token>(m_lex_alpha));
size_t k = num_feats;
num_feats = ff->init(num_feats, bname + L1 + "-" + L2 + ".lex");
for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
m_active_ff_common.push_back(ff);
// currently always active by default; may (should) change later
num_feats = calc_lex.init(num_feats, bname + L1 + "-" + L2 + ".lex");
// if (this->m_numScoreComponents%2) // a bit of a hack, for backwards compatibility
// num_feats = apply_pp.init(num_feats);
if (num_feats < this->m_numScoreComponents)
if (withCoherence)
{
poolCounts = false;
if (withLogCountFeatures) num_feats = add_logcounts_dyn.init(num_feats);
if (withPfwd) num_feats = calc_pfwd_dyn.init(num_feats,lbop,m_pfwd_denom);
if (withPbwd) num_feats = calc_pbwd_dyn.init(num_feats,lbop);
sptr<PScoreCoherence<Token> > ff(new PScoreCoherence<Token>());
size_t k = num_feats;
num_feats = ff->init(num_feats);
for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
m_active_ff_common.push_back(ff);
}
if (num_feats != this->m_numScoreComponents)
{
ostringstream buf;
buf << "At " << __FILE__ << ":" << __LINE__
<< ": number of feature values provided by Phrase table"
<< " does not match number specified in Moses config file!";
throw buf.str().c_str();
}
// cerr << "MMSAPT provides " << num_feats << " features at "
// << __FILE__ << ":" << __LINE__ << endl;
num_feats = add_corpus_specific_features(m_active_ff_fix,num_feats);
// cerr << num_feats << "/" << this->m_numScoreComponents
// << " at " << __FILE__ << ":" << __LINE__ << endl;
poolCounts = poolCounts && num_feats == this->m_numScoreComponents;
if (!poolCounts)
num_feats = add_corpus_specific_features(m_active_ff_dyn, num_feats);
#if 0
cerr << "MMSAPT provides " << num_feats << " features at "
<< __FILE__ << ":" << __LINE__ << endl;
BOOST_FOREACH(string const& fname, m_feature_names)
cerr << fname << endl;
#endif
UTIL_THROW_IF2(num_feats != this->m_numScoreComponents,
"At " << __FILE__ << ":" << __LINE__
<< ": number of feature values provided by Phrase table ("
<< num_feats << ") does not match number specified in "
<< "Moses config file (" << this->m_numScoreComponents
<< ")!\n";);
btdyn.reset(new imBitext<Token>(btfix.V1, btfix.V2,m_default_sample_size));
btdyn->num_workers = this->m_workers;
if (extra_data.size()) load_extra_data(extra_data);
if (extra_data.size())
{
load_extra_data(extra_data);
}
#if 0
// currently not used
LexicalPhraseScorer2<Token>::table_t & COOC = calc_lex.scorer.COOC;
typedef LexicalPhraseScorer2<Token>::table_t::Cell cell_t;
@ -230,7 +292,8 @@ namespace Moses
for (cell_t const* c = COOC[r].start; c < COOC[r].stop; ++c)
wlex21[c->id].push_back(r);
COOCraw.open(bname + L1 + "-" + L2 + ".coc");
#endif
}
void
@ -283,20 +346,28 @@ namespace Moses
{
PhrasePair pp;
pp.init(pid1, stats, this->m_numScoreComponents);
// if (this->m_numScoreComponents%2)
// apply_pp(bt,pp);
pstats::trg_map_t::const_iterator t;
for (t = stats.trg.begin(); t != stats.trg.end(); ++t)
{
pp.update(t->first,t->second);
calc_lex(bt,pp);
if (withPfwd) calc_pfwd_fix(bt,pp);
if (withPbwd) calc_pbwd_fix(bt,pp);
if (withLogCountFeatures) add_logcounts_fix(bt,pp);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(bt,pp);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
(*ff)(bt,pp);
tpcoll->Add(createTargetPhrase(src,bt,pp));
}
}
void
Mmsapt::
ScorePPfix(bitext::PhrasePair& pp) const
{
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(btfix,pp);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
(*ff)(btfix,pp);
}
// process phrase stats from a single parallel corpus
bool
Mmsapt::
@ -318,8 +389,6 @@ namespace Moses
pp.init(pid1b, *statsb, this->m_numScoreComponents);
else return false; // throw "no stats for pooling available!";
// if (this->m_numScoreComponents%2)
// apply_pp(bta,pp);
pstats::trg_map_t::const_iterator b;
pstats::trg_map_t::iterator a;
if (statsb)
@ -344,10 +413,10 @@ namespace Moses
b->second);
}
else pp.update(b->first,b->second);
calc_lex(btb,pp);
if (withPfwd) calc_pfwd_fix(btb,pp);
if (withPbwd) calc_pbwd_fix(btb,pp);
if (withLogCountFeatures) add_logcounts_fix(btb,pp);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(btb,pp);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
(*ff)(btb,pp);
tpcoll->Add(createTargetPhrase(src,btb,pp));
}
}
@ -377,20 +446,19 @@ namespace Moses
#endif
UTIL_THROW_IF2(pp.raw2 == 0,
"OOPS"
<< bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: "
"OOPS" << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: "
<< bta.T2->pid2str(bta.V2.get(),pp.p2) << ": "
<< pp.raw1 << " " << pp.sample1 << " "
<< pp.good1 << " " << pp.joint << " "
<< pp.raw2);
calc_lex(bta,pp);
if (withPfwd) calc_pfwd_fix(bta,pp);
if (withPbwd) calc_pbwd_fix(bta,pp);
if (withLogCountFeatures) add_logcounts_fix(bta,pp);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(bta,pp);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
(*ff)(bta,pp);
tpcoll->Add(createTargetPhrase(src,bta,pp));
}
return true;
}
}
// process phrase stats from a single parallel corpus
@ -398,75 +466,81 @@ namespace Moses
Mmsapt::
combine_pstats
(Phrase const& src,
uint64_t const pid1a,
pstats * statsa,
Bitext<Token> const & bta,
uint64_t const pid1b,
pstats const* statsb,
Bitext<Token> const & btb,
TargetPhraseCollection* tpcoll
) const
uint64_t const pid1a, pstats * statsa, Bitext<Token> const & bta,
uint64_t const pid1b, pstats const* statsb, Bitext<Token> const & btb,
TargetPhraseCollection* tpcoll) const
{
PhrasePair ppfix,ppdyn,pool;
// ppfix: counts from btfix
// ppdyn: counts from btdyn
// pool: pooled counts from both
Word w;
if (statsa) ppfix.init(pid1a,*statsa,this->m_numScoreComponents);
if (statsb) ppdyn.init(pid1b,*statsb,this->m_numScoreComponents);
pstats::trg_map_t::const_iterator b;
pstats::trg_map_t::iterator a;
if (statsb)
{
pool.init(pid1b,*statsb,0);
// if (this->m_numScoreComponents%2)
// apply_pp(btb,ppdyn);
for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b)
{
ppdyn.update(b->first,b->second);
if (withPfwd) calc_pfwd_dyn(btb,ppdyn);
if (withPbwd) calc_pbwd_dyn(btb,ppdyn);
if (withLogCountFeatures) add_logcounts_dyn(btb,ppdyn);
calc_lex(btb,ppdyn);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
(*ff)(btb,ppdyn);
uint32_t sid,off,len;
parse_pid(b->first, sid, off, len);
Token const* x = bta.T2->sntStart(sid) + off;
TSA<Token>::tree_iterator m(bta.I2.get(),x,x+len);
if (m.size() && statsa &&
((a = statsa->trg.find(m.getPid()))
!= statsa->trg.end()))
((a = statsa->trg.find(m.getPid())) != statsa->trg.end()))
{
// phrase pair found also in btfix
ppfix.update(a->first,a->second);
if (withPfwd) calc_pfwd_fix(bta,ppfix,&ppdyn.fvals);
if (withPbwd) calc_pbwd_fix(bta,ppfix,&ppdyn.fvals);
if (withLogCountFeatures) add_logcounts_fix(bta,ppfix,&ppdyn.fvals);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(bta,ppfix,&ppdyn.fvals);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
(*ff)(bta,ppfix,&ppdyn.fvals);
a->second.invalidate();
}
else
{
if (m.size())
pool.update(b->first,m.approxOccurrenceCount(),
b->second);
else
// phrase pair was not found in btfix
// ... but the source phrase was
if (m.size())
pool.update(b->first,m.approxOccurrenceCount(), b->second);
// ... and not even the source phrase
else
pool.update(b->first,b->second);
if (withPfwd) calc_pfwd_fix(btb,pool,&ppdyn.fvals);
if (withPbwd) calc_pbwd_fix(btb,pool,&ppdyn.fvals);
if (withLogCountFeatures) add_logcounts_fix(btb,pool,&ppdyn.fvals);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(btb,pool,&ppdyn.fvals);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
(*ff)(btb,pool,&ppdyn.fvals);
}
tpcoll->Add(createTargetPhrase(src,btb,ppdyn));
}
}
// now deal with all phraise pairs that are ONLY in btfix
// (the ones that are in both were dealt with above)
if (statsa)
{
pool.init(pid1a,*statsa,0);
// if (this->m_numScoreComponents%2)
// apply_pp(bta,ppfix);
for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a)
{
if (!a->second.valid()) continue; // done above
ppfix.update(a->first,a->second);
if (withPfwd) calc_pfwd_fix(bta,ppfix);
if (withPbwd) calc_pbwd_fix(bta,ppfix);
if (withLogCountFeatures) add_logcounts_fix(bta,ppfix);
calc_lex(bta,ppfix);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(bta,ppfix);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
(*ff)(bta,ppfix);
if (btb.I2)
{
@ -480,9 +554,8 @@ namespace Moses
pool.update(a->first,a->second);
}
else pool.update(a->first,a->second);
if (withPfwd) calc_pfwd_dyn(bta,pool,&ppfix.fvals);
if (withPbwd) calc_pbwd_dyn(bta,pool,&ppfix.fvals);
if (withLogCountFeatures) add_logcounts_dyn(bta,pool,&ppfix.fvals);
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
(*ff)(btb,pool,&ppfix.fvals);
}
if (ppfix.p2)
tpcoll->Add(createTargetPhrase(src,bta,ppfix));
@ -490,92 +563,6 @@ namespace Moses
return (statsa || statsb);
}
// // phrase statistics combination treating the two knowledge
// // sources separately with backoff to pooling when only one
// // of the two knowledge sources contains the phrase pair in
// // question
// void
// Mmsapt::
// process_pstats(uint64_t const mypid1,
// uint64_t const otpid1,
// pstats const& mystats, // my phrase stats
// pstats const* otstats, // other phrase stats
// Bitext<Token> const & mybt, // my bitext
// Bitext<Token> const * otbt, // other bitext
// PhraseScorer<Token> const& mypfwd,
// PhraseScorer<Token> const& mypbwd,
// PhraseScorer<Token> const* otpfwd,
// PhraseScorer<Token> const* otpbwd,
// TargetPhraseCollection* tpcoll)
// {
// boost::unordered_map<uint64_t,jstats>::const_iterator t;
// vector<FactorType> ofact(1,0);
// PhrasePair mypp,otpp,combo;
// mypp.init(mypid1, mystats, this->m_numScoreComponents);
// if (otstats)
// {
// otpp.init(otpid1, *otstats, 0);
// combo.init(otpid1, mystats, *otstats, 0);
// }
// else combo = mypp;
// for (t = mystats.trg.begin(); t != mystats.trg.end(); ++t)
// {
// if (!t->second.valid()) continue;
// // we dealt with this phrase pair already;
// // see j->second.invalidate() below;
// uint32_t sid,off,len; parse_pid(t->first,sid,off,len);
// mypp.update(t->first,t->second);
// apply_pp(mybt,mypp);
// calc_lex (mybt,mypp);
// mypfwd(mybt,mypp);
// mypbwd(mybt,mypp);
// if (otbt) // it's a dynamic phrase table
// {
// assert(otpfwd);
// assert(otpbwd);
// boost::unordered_map<uint64_t,jstats>::iterator j;
// // look up the current target phrase in the other bitext
// Token const* x = mybt.T2->sntStart(sid) + off;
// TSA<TOKEN>::tree_iterator m(otbt->I2.get(),x,x+len);
// if (otstats // source phrase exists in other bitext
// && m.size() // target phrase exists in other bitext
// && ((j = otstats->trg.find(m.getPid()))
// != otstats->trg.end())) // phrase pair found in other bitext
// {
// otpp.update(j->first,j->second);
// j->second.invalidate(); // mark the phrase pair as seen
// otpfwd(*otbt,otpp,&mypp.fvals);
// otpbwd(*otbt,otpp,&mypp.fvals);
// }
// else
// {
// if (m.size()) // target phrase seen in other bitext, but not the phrase pair
// combo.update(t->first,m.approxOccurrenceCount(),t->second);
// else
// combo.update(t->first,t->second);
// (*otpfwd)(mybt,combo,&mypp.fvals);
// (*otpbwd)(mybt,combo,&mypp.fvals);
// }
// }
// // now add the phrase pair to the TargetPhraseCollection:
// TargetPhrase* tp = new TargetPhrase();
// for (size_t k = off; k < stop; ++k)
// {
// StringPiece wrd = (*mybt.V2)[x[k].id()];
// Word w; w.CreateFromString(Output,ofact,wrd,false);
// tp->AddWord(w);
// }
// tp->GetScoreBreakdown().Assign(this,mypp.fvals);
// tp->Evaluate(src);
// tpcoll->Add(tp);
// }
// }
Mmsapt::
TargetPhraseCollectionWrapper::
TargetPhraseCollectionWrapper(size_t r, uint64_t k)

View File

@ -29,6 +29,7 @@
#include <map>
#include "moses/TranslationModel/PhraseDictionary.h"
#include "mmsapt_phrase_scorers.h"
// TO DO:
// - make lexical phrase scorer take addition to the "dynamic overlay" into account
@ -51,6 +52,7 @@ namespace Moses
typedef mmBitext<Token> mmbitext;
typedef imBitext<Token> imbitext;
typedef TSA<Token> tsa;
typedef PhraseScorer<Token> pscorer;
private:
mmbitext btfix;
sptr<imbitext> btdyn;
@ -58,30 +60,48 @@ namespace Moses
string L1;
string L2;
float m_lbop_parameter;
float m_lex_alpha;
// alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha)
// must be > 0 if dynamic
size_t m_default_sample_size;
size_t m_workers; // number of worker threads for sampling the bitexts
// deprecated!
char m_pfwd_denom; // denominator for computation of fwd phrase score:
// 'r' - divide by raw count
// 's' - divide by sample count
// 'g' - devide by number of "good" (i.e. coherent) samples
// size_t num_features;
size_t input_factor;
size_t output_factor; // we can actually return entire Tokens!
bool withLogCountFeatures; // add logs of counts as features?
bool withCoherence;
string m_pfwd_features; // which pfwd functions to use
vector<string> m_feature_names; // names of features activated
vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix)
vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn)
vector<sptr<pscorer > > m_active_ff_common; // activated feature functions (dyn)
size_t
add_corpus_specific_features
(vector<sptr<pscorer > >& ffvec, size_t num_feats);
// built-in feature functions
PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
PScorePbwd<Token> calc_pbwd_fix, calc_pbwd_dyn;
PScoreLex<Token> calc_lex; // this one I'd like to see as an external ff eventually
// PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
// PScorePbwd<Token> calc_pbwd_fix, calc_pbwd_dyn;
// PScoreLex<Token> calc_lex; // this one I'd like to see as an external ff eventually
// PScorePP<Token> apply_pp; // apply phrase penalty
PScoreLogCounts<Token> add_logcounts_fix;
PScoreLogCounts<Token> add_logcounts_dyn;
// PScoreLogCounts<Token> add_logcounts_fix;
// PScoreLogCounts<Token> add_logcounts_dyn;
void init(string const& line);
mutable boost::mutex lock;
bool withPbwd;
bool poolCounts;
bool withLogCountFeatures; // add logs of counts as features?
bool withPfwd,withPbwd;
vector<FactorType> ofactor;
public:
// typedef boost::unordered_map<uint64_t, sptr<TargetPhraseCollection> > tpcoll_cache_t;
class TargetPhraseCollectionWrapper
@ -207,6 +227,12 @@ namespace Moses
bool
PrefixExists(Phrase const& phrase) const;
vector<string> const&
GetFeatureNames() const;
void
ScorePPfix(bitext::PhrasePair& pp) const;
private:
};
} // end namespace

View File

@ -127,6 +127,7 @@ namespace Moses
Alignment::
show(ostream& out, PhraseAlnHyp const& ah)
{
#if 0
LexicalPhraseScorer2<Token>::table_t const&
COOCjnt = PT.calc_lex.scorer.COOC;
@ -164,6 +165,7 @@ namespace Moses
// << " jbwd: " << obwdj[po_jbwd]<<"/"<<obwdm[po_jbwd]
// << " other: " << obwdj[po_other]<<"/"<<obwdm[po_other]
// << "]" << endl;
#endif
}
void
@ -283,9 +285,7 @@ namespace Moses
psiter R = tpid2span.find(y->first);
if (R == tpid2span.end()) continue;
pp.update(y->first, y->second);
PT.calc_lex(PT.btfix,pp);
PT.calc_pfwd_fix(PT.btfix,pp);
PT.calc_pbwd_fix(PT.btfix,pp);
PT.ScorePPfix(pp);
pp.eval(PT.feature_weights);
PP.push_back(pp);
BOOST_FOREACH(span const& sspan, L->second)
@ -329,6 +329,7 @@ namespace Moses
BOOST_FOREACH(int i, o) A.show(cout,A.PAH[i]);
sptr<vector<int> > aln;
return aln;
}
}
}