redo state API for simple distortion and KenLM. Regression failure with KenLM

This commit is contained in:
Hieu Hoang 2015-10-12 18:35:59 +01:00
parent 1d0f7fcf14
commit 20959b8660
4 changed files with 27 additions and 66 deletions

View File

@ -336,6 +336,11 @@ size_t PhraseBasedReorderingState::hash() const
UTIL_THROW2("TODO:Haven't figure this out yet");
}
bool PhraseBasedReorderingState::operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
LRState*
PhraseBasedReorderingState::
Expand(const TranslationOption& topt, const InputType& input,
@ -385,6 +390,11 @@ size_t BidirectionalReorderingState::hash() const
UTIL_THROW2("TODO:Haven't figure this out yet");
}
bool BidirectionalReorderingState::operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
///////////////////////////
//HierarchicalReorderingBackwardState
@ -415,6 +425,11 @@ size_t HReorderingBackwardState::hash() const
UTIL_THROW2("TODO:Haven't figure this out yet");
}
bool HReorderingBackwardState::operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
LRState*
HReorderingBackwardState::
Expand(const TranslationOption& topt, const InputType& input,
@ -471,6 +486,11 @@ size_t HReorderingForwardState::hash() const
UTIL_THROW2("TODO:Haven't figure this out yet");
}
bool HReorderingForwardState::operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
// For compatibility with the phrase-based reordering model, scoring is one
// step delayed.
// The forward model takes determines orientations heuristically as follows:

View File

@ -223,10 +223,7 @@ public:
Compare(const FFState& o) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
virtual bool operator==(const FFState& other) const;
LRState*
Expand(const TranslationOption& topt, const InputType& input,
@ -255,10 +252,7 @@ public:
Compare(const FFState& o) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
virtual bool operator==(const FFState& other) const;
virtual
LRState*
@ -287,10 +281,7 @@ public:
virtual int Compare(const FFState& o) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
virtual bool operator==(const FFState& other) const;
virtual LRState* Expand(const TranslationOption& hypo, const InputType& input,
ScoreComponentCollection* scores) const;
@ -320,10 +311,7 @@ public:
virtual int Compare(const FFState& o) const;
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
virtual bool operator==(const FFState& other) const;
virtual LRState* Expand(const TranslationOption& hypo,
const InputType& input,

View File

@ -195,35 +195,6 @@ Create(Manager& manager, InputType const& m_source,
#endif
}
/** check, if two hypothesis can be recombined.
this is actually a sorting function that allows us to
keep an ordered list of hypotheses. This makes recombination
much quicker.
*/
int
Hypothesis::
RecombineCompare(const Hypothesis &compare) const
{
// -1 = this < compare
// +1 = this > compare
// 0 = this ==compare
int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted);
if (comp != 0)
return comp;
for (unsigned i = 0; i < m_ffStates.size(); ++i) {
if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
// TODO: Can this situation actually occur?
comp = int(m_ffStates[i] != NULL) - int(compare.m_ffStates[i] != NULL);
} else {
comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
}
if (comp != 0) return comp;
}
return 0;
}
void
Hypothesis::
EvaluateWhenApplied(StatefulFeatureFunction const& sfff,
@ -650,9 +621,10 @@ GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
size_t Hypothesis::hash() const
{
size_t seed = 0;
BOOST_FOREACH(const FFState *state, m_ffStates) {
for (size_t i = 0; i < m_ffStates.size(); ++i) {
const FFState *state = m_ffStates[i];
size_t hash = state->hash();
boost::hash_combine(seed ,hash);
boost::hash_combine(seed, hash);
}
return seed;
}

View File

@ -197,8 +197,6 @@ public:
return m_sourceCompleted.IsComplete();
}
int RecombineCompare(const Hypothesis &compare) const;
void GetOutputPhrase(Phrase &out) const;
void ToStream(std::ostream& out) const {
@ -321,23 +319,6 @@ struct CompareHypothesisTotalScore {
#define FREEHYPO(hypo) delete hypo
#endif
/** defines less-than relation on hypotheses.
* The particular order is not important for us, we need just to figure out
* which hypothesis are equal based on:
* the last n-1 target words are the same
* and the covers (source words translated) are the same
* Directly using RecombineCompare is unreliable because the Compare methods
* of some states are based on archictecture-dependent pointer comparisons.
* That's why we use the hypothesis IDs instead.
*/
class HypothesisRecombinationOrderer
{
public:
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
return (hypoA->RecombineCompare(*hypoB) < 0);
}
};
class HypothesisRecombinationUnordered
{
public: