2014-07-09 05:41:28 +04:00
|
|
|
// -*- c++ -*-
|
|
|
|
// Phrase scorer that counts the number of unaligend words in the phrase
|
2015-04-30 08:05:11 +03:00
|
|
|
// written by Ulrich Germann
|
2014-07-09 05:41:28 +04:00
|
|
|
|
|
|
|
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
|
|
|
|
#include "sapt_pscore_base.h"
|
|
|
|
#include <boost/dynamic_bitset.hpp>
|
|
|
|
|
|
|
|
namespace Moses {
|
|
|
|
namespace bitext
|
|
|
|
{
|
|
|
|
template<typename Token>
|
|
|
|
class
|
|
|
|
PScoreLex1 : public PhraseScorer<Token>
|
|
|
|
{
|
2015-04-29 22:16:52 +03:00
|
|
|
float m_alpha;
|
|
|
|
string m_lexfile;
|
2014-07-09 05:41:28 +04:00
|
|
|
public:
|
|
|
|
LexicalPhraseScorer2<Token> scorer;
|
2015-04-30 08:05:11 +03:00
|
|
|
|
|
|
|
PScoreLex1(string const& alphaspec, string const& lexfile)
|
|
|
|
{
|
2014-07-09 05:41:28 +04:00
|
|
|
this->m_index = -1;
|
2015-04-30 08:05:11 +03:00
|
|
|
this->m_num_feats = 2;
|
2014-07-09 05:41:28 +04:00
|
|
|
this->m_feature_names.reserve(2);
|
|
|
|
this->m_feature_names.push_back("lexfwd");
|
|
|
|
this->m_feature_names.push_back("lexbwd");
|
2015-04-29 22:16:52 +03:00
|
|
|
m_alpha = atof(alphaspec.c_str());
|
|
|
|
m_lexfile = lexfile;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
load()
|
2015-04-30 08:05:11 +03:00
|
|
|
{
|
|
|
|
scorer.open(m_lexfile);
|
2014-07-09 05:41:28 +04:00
|
|
|
}
|
2015-04-30 08:05:11 +03:00
|
|
|
|
|
|
|
void
|
|
|
|
operator()(Bitext<Token> const& bt,
|
|
|
|
PhrasePair<Token>& pp,
|
2014-07-09 05:41:28 +04:00
|
|
|
vector<float> * dest = NULL) const
|
|
|
|
{
|
|
|
|
if (!dest) dest = &pp.fvals;
|
|
|
|
// uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
|
|
|
|
// parse_pid(pp.p1, sid1, off1, len1);
|
|
|
|
// parse_pid(pp.p2, sid2, off2, len2);
|
|
|
|
#if 0
|
|
|
|
cout << len1 << " " << len2 << endl;
|
|
|
|
Token const* t1 = bt.T1->sntStart(sid1);
|
|
|
|
for (size_t i = off1; i < off1 + len1; ++i)
|
2015-04-30 08:05:11 +03:00
|
|
|
cout << (*bt.V1)[t1[i].id()] << " ";
|
2014-07-09 05:41:28 +04:00
|
|
|
cout << __FILE__ << ":" << __LINE__ << endl;
|
2015-04-30 08:05:11 +03:00
|
|
|
|
2014-07-09 05:41:28 +04:00
|
|
|
Token const* t2 = bt.T2->sntStart(sid2);
|
|
|
|
for (size_t i = off2; i < off2 + len2; ++i)
|
2015-04-30 08:05:11 +03:00
|
|
|
cout << (*bt.V2)[t2[i].id()] << " ";
|
2014-07-09 05:41:28 +04:00
|
|
|
cout << __FILE__ << ":" << __LINE__ << endl;
|
2015-04-30 08:05:11 +03:00
|
|
|
|
2014-07-09 05:41:28 +04:00
|
|
|
BOOST_FOREACH (int a, pp.aln)
|
|
|
|
cout << a << " " ;
|
|
|
|
cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
|
2015-04-30 08:05:11 +03:00
|
|
|
|
2014-07-09 05:41:28 +04:00
|
|
|
scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
|
|
|
|
bt.T2->sntStart(sid2)+off2,0,len2,
|
|
|
|
pp.aln, m_alpha,
|
|
|
|
(*dest)[this->m_index],
|
|
|
|
(*dest)[this->m_index+1]);
|
|
|
|
#endif
|
2015-04-30 08:05:11 +03:00
|
|
|
scorer.score(pp.start1,0, pp.len1,
|
|
|
|
pp.start2,0, pp.len2, pp.aln, m_alpha,
|
|
|
|
(*dest)[this->m_index],
|
2014-07-09 05:41:28 +04:00
|
|
|
(*dest)[this->m_index+1]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
} //namespace bitext
|
|
|
|
} // namespace Moses
|
|
|
|
|