mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
Merge branch 'master' of https://github.com/moses-smt/mosesdecoder
This commit is contained in:
commit
951bebb037
@ -129,7 +129,7 @@ public:
|
||||
// Fallback: scoreA < scoreB == false, non-deterministic sort
|
||||
return false;
|
||||
}
|
||||
return (phrA->Compare(*phrB) < 0);
|
||||
return (phrA->Compare(*phrB) > 0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -150,8 +150,8 @@ EvaluateWhenApplied(StatefulFeatureFunction const& sfff, int state_idx)
|
||||
// Manager& manager = this->GetManager(); //Get the manager and the ttask
|
||||
// ttasksptr const& ttask = manager.GetTtask();
|
||||
FFState const* prev = m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL;
|
||||
m_ffStates[state_idx]
|
||||
= sfff.EvaluateWhenApplied(*this, prev, &m_currScoreBreakdown);
|
||||
m_ffStates[state_idx]
|
||||
= sfff.EvaluateWhenApplied(*this, prev, &m_currScoreBreakdown);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,12 +93,12 @@ LanguageModelIRST::
|
||||
TRACE_ERR( "reset mmap\n");
|
||||
if (m_lmtb) m_lmtb->reset_mmap();
|
||||
#endif
|
||||
|
||||
|
||||
delete m_lmtb;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
bool
|
||||
LanguageModelIRST::
|
||||
IsUseable(const FactorMask &mask) const
|
||||
{
|
||||
@ -106,7 +106,7 @@ IsUseable(const FactorMask &mask) const
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
void
|
||||
LanguageModelIRST::
|
||||
Load()
|
||||
{
|
||||
@ -135,7 +135,7 @@ Load()
|
||||
if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);
|
||||
}
|
||||
|
||||
void
|
||||
void
|
||||
LanguageModelIRST::
|
||||
CreateFactors(FactorCollection &factorCollection)
|
||||
{
|
||||
@ -179,31 +179,31 @@ CreateFactors(FactorCollection &factorCollection)
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
int
|
||||
LanguageModelIRST::
|
||||
GetLmID( const std::string &str ) const
|
||||
{
|
||||
return d->encode( str.c_str() ); // at the level of micro tags
|
||||
}
|
||||
|
||||
int
|
||||
int
|
||||
LanguageModelIRST::
|
||||
GetLmID( const Word &word ) const
|
||||
{
|
||||
return GetLmID( word.GetFactor(m_factorType) );
|
||||
}
|
||||
|
||||
int
|
||||
int
|
||||
LanguageModelIRST::
|
||||
GetLmID( const Factor *factor ) const
|
||||
{
|
||||
size_t factorId = factor->GetId();
|
||||
|
||||
|
||||
if ((factorId >= m_lmIdLookup.size()) || (m_lmIdLookup[factorId] == m_empty)) {
|
||||
if (d->incflag()==1) {
|
||||
std::string s = factor->GetString().as_string();
|
||||
int code = d->encode(s.c_str());
|
||||
|
||||
|
||||
//////////
|
||||
///poiche' non c'e' distinzione tra i factorIDs delle parole sorgenti
|
||||
///e delle parole target in Moses, puo' accadere che una parola target
|
||||
@ -231,7 +231,7 @@ GetLmID( const Factor *factor ) const
|
||||
/// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori
|
||||
/// a te la scelta!!!!
|
||||
////////////////
|
||||
|
||||
|
||||
|
||||
if (factorId >= m_lmIdLookup.size()) {
|
||||
//resize and fill with m_empty
|
||||
@ -242,7 +242,7 @@ GetLmID( const Factor *factor ) const
|
||||
//insert new code
|
||||
m_lmIdLookup[factorId] = code;
|
||||
return code;
|
||||
|
||||
|
||||
} else {
|
||||
return m_unknownId;
|
||||
}
|
||||
@ -251,7 +251,7 @@ GetLmID( const Factor *factor ) const
|
||||
}
|
||||
}
|
||||
|
||||
FFState const*
|
||||
FFState const*
|
||||
LanguageModelIRST::
|
||||
EmptyHypothesisState(const InputType &/*input*/) const
|
||||
{
|
||||
@ -260,12 +260,12 @@ EmptyHypothesisState(const InputType &/*input*/) const
|
||||
return ret.release();
|
||||
}
|
||||
|
||||
void
|
||||
void
|
||||
LanguageModelIRST::
|
||||
CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
|
||||
{
|
||||
bool isContextAdaptive
|
||||
= m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
|
||||
bool isContextAdaptive
|
||||
= m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
|
||||
|
||||
fullScore = 0;
|
||||
ngramScore = 0;
|
||||
@ -308,7 +308,7 @@ CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oov
|
||||
++idx;
|
||||
}
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
}
|
||||
}
|
||||
#endif
|
||||
ngramScore = 0.0;
|
||||
int end_loop = (int) phrase.GetSize();
|
||||
@ -334,20 +334,20 @@ CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oov
|
||||
ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp);
|
||||
}
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
before_boundary = TransformLMScore(before_boundary);
|
||||
ngramScore = TransformLMScore(ngramScore);
|
||||
fullScore = ngramScore + before_boundary;
|
||||
}
|
||||
|
||||
FFState*
|
||||
FFState*
|
||||
LanguageModelIRST::
|
||||
EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
|
||||
EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
|
||||
ScoreComponentCollection *out) const
|
||||
{
|
||||
bool isContextAdaptive
|
||||
= m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
|
||||
bool isContextAdaptive
|
||||
= m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
|
||||
|
||||
if (!hypo.GetCurrTargetLength()) {
|
||||
std::auto_ptr<IRSTLMState> ret(new IRSTLMState(ps));
|
||||
@ -387,18 +387,17 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
|
||||
position = (const int) begin+1;
|
||||
float score;
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
if (CW)
|
||||
{
|
||||
score = m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
|
||||
while (position < adjust_end) {
|
||||
for (idx=1; idx<m_lmtb_size; idx++) {
|
||||
codes[idx-1] = codes[idx];
|
||||
}
|
||||
codes[idx-1] = GetLmID(hypo.GetWord(position));
|
||||
score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
|
||||
++position;
|
||||
if (CW) {
|
||||
score = m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
|
||||
while (position < adjust_end) {
|
||||
for (idx=1; idx<m_lmtb_size; idx++) {
|
||||
codes[idx-1] = codes[idx];
|
||||
}
|
||||
} else {
|
||||
codes[idx-1] = GetLmID(hypo.GetWord(position));
|
||||
score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
|
||||
++position;
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
|
||||
position = (const int) begin+1;
|
||||
@ -430,12 +429,12 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
|
||||
--idx;
|
||||
}
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
if (CW) score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
|
||||
if (CW) score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
|
||||
else
|
||||
#else
|
||||
score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
|
||||
score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
|
||||
#endif
|
||||
} else {
|
||||
} else {
|
||||
// need to set the LM state
|
||||
|
||||
if (adjust_end < end) { //the LMstate of this target phrase refers to the last m_lmtb_size-1 words
|
||||
@ -447,16 +446,16 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
|
||||
msp = (char *) m_lmtb->cmaxsuffptr(codes,m_lmtb_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
score = TransformLMScore(score);
|
||||
out->PlusEquals(this, score);
|
||||
|
||||
|
||||
std::auto_ptr<IRSTLMState> ret(new IRSTLMState(msp));
|
||||
|
||||
|
||||
return ret.release();
|
||||
}
|
||||
|
||||
LMResult
|
||||
LMResult
|
||||
LanguageModelIRST::
|
||||
GetValue(const vector<const Word*> &contextFactor, State* finalState) const
|
||||
{
|
||||
@ -494,7 +493,7 @@ GetValue(const vector<const Word*> &contextFactor, State* finalState) const
|
||||
return result;
|
||||
}
|
||||
|
||||
bool
|
||||
bool
|
||||
LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold)
|
||||
{
|
||||
if (sentences_done==-1) return true;
|
||||
@ -512,7 +511,7 @@ void LanguageModelIRST::InitializeForInput(ttasksptr const& ttask)
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
void
|
||||
LanguageModelIRST::
|
||||
CleanUpAfterSentenceProcessing(const InputType& source)
|
||||
{
|
||||
@ -528,7 +527,7 @@ CleanUpAfterSentenceProcessing(const InputType& source)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
void
|
||||
LanguageModelIRST::
|
||||
SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
@ -539,6 +538,6 @@ SetParameter(const std::string& key, const std::string& value)
|
||||
}
|
||||
m_lmtb_size = m_nGramOrder;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -56,7 +56,7 @@ public:
|
||||
/// return shared pointer to ttask
|
||||
// only TargetPhrases have non-NULL ttaskptrs!
|
||||
virtual ttasksptr GetTtask() const {
|
||||
return ttasksptr();
|
||||
return ttasksptr();
|
||||
}
|
||||
|
||||
/// check if this phrase belongs to a valid ttask
|
||||
|
@ -44,7 +44,7 @@ public:
|
||||
// Fallback: compare pointers, non-deterministic sort
|
||||
return A < B;
|
||||
}
|
||||
return (phrA->Compare(*phrB) < 0);
|
||||
return (phrA->Compare(*phrB) > 0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -61,7 +61,7 @@ protected:
|
||||
void ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders);
|
||||
|
||||
// "Document Level Translation" instructions, see aux_interpret_dlt
|
||||
std::vector<std::map<std::string,std::string> > m_dlt_meta;
|
||||
std::vector<std::map<std::string,std::string> > m_dlt_meta;
|
||||
|
||||
public:
|
||||
Sentence();
|
||||
|
@ -287,8 +287,8 @@ void Manager<RuleMatcher>::RecombineAndSort(
|
||||
// any 'duplicate' vertices are deleted.
|
||||
// TODO Set?
|
||||
typedef boost::unordered_map<SVertex *, SVertex *,
|
||||
SVertexRecombinationHasher,
|
||||
SVertexRecombinationEqualityPred> Map;
|
||||
SVertexRecombinationHasher,
|
||||
SVertexRecombinationEqualityPred> Map;
|
||||
Map map;
|
||||
for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
|
||||
p != buffer.end(); ++p) {
|
||||
|
@ -351,8 +351,8 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
|
||||
// any 'duplicate' vertices are deleted.
|
||||
// TODO Set?
|
||||
typedef boost::unordered_map<SVertex *, SVertex *,
|
||||
SVertexRecombinationHasher,
|
||||
SVertexRecombinationEqualityPred> Map;
|
||||
SVertexRecombinationHasher,
|
||||
SVertexRecombinationEqualityPred> Map;
|
||||
Map map;
|
||||
for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
|
||||
p != buffer.end(); ++p) {
|
||||
|
@ -11,7 +11,7 @@ namespace Syntax
|
||||
|
||||
class SVertexRecombinationEqualityPred
|
||||
{
|
||||
public:
|
||||
public:
|
||||
bool operator()(const SVertex *v1, const SVertex *v2) const {
|
||||
assert(v1->states.size() == v2->states.size());
|
||||
for (std::size_t i = 0; i < v1->states.size(); ++i) {
|
||||
|
@ -11,7 +11,7 @@ namespace Syntax
|
||||
|
||||
class SVertexRecombinationHasher
|
||||
{
|
||||
public:
|
||||
public:
|
||||
std::size_t operator()(const SVertex *v) const {
|
||||
std::size_t seed = 0;
|
||||
for (std::vector<FFState*>::const_iterator p = v->states.begin();
|
||||
|
@ -247,8 +247,8 @@ void Manager<RuleMatcher>::RecombineAndSort(
|
||||
// any 'duplicate' vertices are deleted.
|
||||
// TODO Set?
|
||||
typedef boost::unordered_map<SVertex *, SVertex *,
|
||||
SVertexRecombinationHasher,
|
||||
SVertexRecombinationEqualityPred> Map;
|
||||
SVertexRecombinationHasher,
|
||||
SVertexRecombinationEqualityPred> Map;
|
||||
Map map;
|
||||
for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
|
||||
p != buffer.end(); ++p) {
|
||||
|
@ -33,7 +33,8 @@ namespace Moses
|
||||
PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line)
|
||||
: PhraseDictionary(line, true),
|
||||
m_numModels(0),
|
||||
m_restrict(false)
|
||||
m_restrict(false),
|
||||
m_specifiedZeros(false)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
@ -45,6 +46,9 @@ void PhraseDictionaryGroup::SetParameter(const string& key, const string& value)
|
||||
m_numModels = m_memberPDStrs.size();
|
||||
} else if (key == "restrict") {
|
||||
m_restrict = Scan<bool>(value);
|
||||
} else if (key == "zeros") {
|
||||
m_specifiedZeros = true;
|
||||
m_zeros = Scan<float>(Tokenize(value, ","));
|
||||
} else {
|
||||
PhraseDictionary::SetParameter(key, value);
|
||||
}
|
||||
@ -67,10 +71,20 @@ void PhraseDictionaryGroup::Load()
|
||||
}
|
||||
}
|
||||
UTIL_THROW_IF2(!pdFound,
|
||||
"Could not find component phrase table " << pdName);
|
||||
"Could not find member phrase table " << pdName);
|
||||
}
|
||||
UTIL_THROW_IF2(componentWeights != m_numScoreComponents,
|
||||
"Total number of component model scores is unequal to specified number of scores");
|
||||
"Total number of member model scores is unequal to specified number of scores");
|
||||
|
||||
// Determine "zero" scores for features
|
||||
if (m_specifiedZeros) {
|
||||
UTIL_THROW_IF2(m_zeros.size() != m_numScoreComponents,
|
||||
"Number of specified zeros is unequal to number of member model scores");
|
||||
} else {
|
||||
// Default is all 0 (as opposed to e.g. -99 or similar to approximate log(0)
|
||||
// or a smoothed "not in model" score)
|
||||
m_zeros = vector<float>(m_numScoreComponents, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch(
|
||||
@ -150,7 +164,7 @@ CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
|
||||
phrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
|
||||
// Add phrase entry
|
||||
allPhrases.push_back(phrase);
|
||||
allScores[targetPhrase] = vector<float>(m_numScoreComponents, 0);
|
||||
allScores[targetPhrase] = vector<float>(m_zeros);
|
||||
}
|
||||
vector<float>& scores = allScores.find(targetPhrase)->second;
|
||||
|
||||
|
@ -70,6 +70,8 @@ protected:
|
||||
std::vector<PhraseDictionary*> m_memberPDs;
|
||||
size_t m_numModels;
|
||||
bool m_restrict;
|
||||
bool m_specifiedZeros;
|
||||
std::vector<float> m_zeros;
|
||||
std::vector<FeatureFunction*> m_pdFeature;
|
||||
|
||||
typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
|
||||
|
@ -166,21 +166,20 @@ options() const
|
||||
}
|
||||
|
||||
/// parse document-level translation info stored on the input
|
||||
void
|
||||
void
|
||||
TranslationTask::
|
||||
interpret_dlt()
|
||||
{
|
||||
if (m_source->GetType() != SentenceInput) return;
|
||||
Sentence const& snt = static_cast<Sentence const&>(*m_source);
|
||||
typedef std::map<std::string,std::string> dltmap_t;
|
||||
BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta())
|
||||
{
|
||||
dltmap_t::const_iterator i = M.find("type");
|
||||
if (i == M.end() || i->second != "adaptive-lm") continue;
|
||||
dltmap_t::const_iterator j = M.find("context-weights");
|
||||
if (j == M.end()) continue;
|
||||
SetContextWeights(j->second);
|
||||
}
|
||||
BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta()) {
|
||||
dltmap_t::const_iterator i = M.find("type");
|
||||
if (i == M.end() || i->second != "adaptive-lm") continue;
|
||||
dltmap_t::const_iterator j = M.find("context-weights");
|
||||
if (j == M.end()) continue;
|
||||
SetContextWeights(j->second);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -191,14 +190,14 @@ void TranslationTask::Run()
|
||||
<< " input and iowrapper.");
|
||||
|
||||
const size_t translationId = m_source->GetTranslationId();
|
||||
|
||||
|
||||
|
||||
// report wall time spent on translation
|
||||
Timer translationTime;
|
||||
translationTime.start();
|
||||
|
||||
interpret_dlt(); // parse document-level translation info stored on the input
|
||||
|
||||
|
||||
// report thread number
|
||||
#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
|
||||
VERBOSE(2, "Translating line " << translationId << " in thread id " << pthread_self() << endl);
|
||||
|
Loading…
Reference in New Issue
Block a user