Hash t-options directly.

Also, don't include source phrase in t-option as it is already
in target phrase.
This commit is contained in:
Barry Haddow 2012-09-10 14:57:45 +01:00
parent c6c2f516f6
commit bc340ab7a2
4 changed files with 31 additions and 57 deletions

View File

@ -40,20 +40,8 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange
, const InputType &inputType)
: m_targetPhrase(targetPhrase)
, m_sourceWordsRange(wordsRange)
{
// set score
m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());
if (inputType.GetType() == SentenceInput)
{
Phrase phrase = inputType.GetSubString(wordsRange);
m_sourcePhrase = new Phrase(phrase);
}
else
{ // TODO lex reordering with confusion network
m_sourcePhrase = new Phrase(targetPhrase.GetSourcePhrase());
}
}
, m_scoreBreakdown(targetPhrase.GetScoreBreakdown())
{}
//TODO this should be a factory function!
TranslationOption::TranslationOption(const WordsRange &wordsRange
@ -70,35 +58,11 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange
score[0] = FloorScore(-numeric_limits<float>::infinity());
m_scoreBreakdown.Assign(scoreProducer, score);
}
if (inputType.GetType() == SentenceInput)
{
Phrase phrase = inputType.GetSubString(wordsRange);
m_sourcePhrase = new Phrase(phrase);
}
else
{ // TODO lex reordering with confusion network
m_sourcePhrase = new Phrase(targetPhrase.GetSourcePhrase());
//the target phrase from a confusion network/lattice has input scores that we want to keep
m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());
}
}
TranslationOption::TranslationOption(const TranslationOption &copy)
: m_targetPhrase(copy.m_targetPhrase)
//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(ARRAY_SIZE_INCR) : new Phrase(*copy.m_sourcePhrase))
, m_sourceWordsRange(copy.m_sourceWordsRange)
, m_futureScore(copy.m_futureScore)
, m_scoreBreakdown(copy.m_scoreBreakdown)
, m_cachedScores(copy.m_cachedScores)
{}
TranslationOption::TranslationOption(const TranslationOption &copy, const WordsRange &sourceWordsRange)
: m_targetPhrase(copy.m_targetPhrase)
//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(ARRAY_SIZE_INCR) : new Phrase(*copy.m_sourcePhrase))
, m_sourceWordsRange(sourceWordsRange)
, m_futureScore(copy.m_futureScore)
, m_scoreBreakdown(copy.m_scoreBreakdown)
@ -169,7 +133,7 @@ ostream& operator<<(ostream& out, const TranslationOption& possibleTranslation)
void TranslationOption::CacheScores(const ScoreProducer &producer, const Scores &score)
{
m_cachedScores[&producer] = new Scores(score);
m_cachedScores[&producer] = score;
}
}

View File

@ -24,6 +24,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <map>
#include <vector>
#include <boost/functional/hash.hpp>
#include "WordsBitmap.h"
#include "WordsRange.h"
#include "Phrase.h"
@ -63,7 +64,6 @@ class TranslationOption
protected:
TargetPhrase m_targetPhrase; /*< output phrase when using this translation option */
Phrase *m_sourcePhrase; /*< input phrase translated by this */
const WordsRange m_sourceWordsRange; /*< word position in the input that are covered by this translation option */
float m_futureScore; /*< estimate of total cost when using this translation option, includes language model probabilities */
@ -73,7 +73,7 @@ protected:
//! possible to estimate, it is included here.
ScoreComponentCollection m_scoreBreakdown;
typedef std::map<const ScoreProducer *, const Scores *> _ScoreCacheMap;
typedef std::map<const ScoreProducer *, Scores> _ScoreCacheMap;
_ScoreCacheMap m_cachedScores;
public:
@ -86,17 +86,10 @@ public:
, const TargetPhrase &targetPhrase
, const InputType &inputType
, const UnknownWordPenaltyProducer* uwpProducer);
/** copy constructor */
TranslationOption(const TranslationOption &copy);
/** copy constructor, but change words range. used by caching */
TranslationOption(const TranslationOption &copy, const WordsRange &sourceWordsRange);
~TranslationOption() {
delete m_sourcePhrase;
for(_ScoreCacheMap::const_iterator it = m_cachedScores.begin(); it != m_cachedScores.end(); ++it)
delete it->second;
}
/** returns true if all feature types in featuresToCheck are compatible between the two phrases */
bool IsCompatible(const Phrase& phrase, const std::vector<FactorType>& featuresToCheck) const;
@ -116,7 +109,7 @@ public:
/** returns source phrase */
const Phrase *GetSourcePhrase() const {
return m_sourcePhrase;
return &(m_targetPhrase.GetSourcePhrase());
}
/** whether source span overlaps with those of a hypothesis */
@ -158,7 +151,7 @@ public:
if(it == m_cachedScores.end())
return NULL;
else
return it->second;
return &(it->second);
}
/** Calculate future score and n-gram score of this trans option, plus the score breakdowns */
@ -167,8 +160,25 @@ public:
void CacheScores(const ScoreProducer &scoreProducer, const Scores &score);
TO_STRING();
bool operator== (const TranslationOption &rhs) const
{
return m_sourceWordsRange == rhs.m_sourceWordsRange &&
m_targetPhrase == rhs.m_targetPhrase;
}
};
inline size_t hash_value(const TranslationOption& translationOption) {
size_t seed = 0;
boost::hash_combine(seed, translationOption.GetTargetPhrase());
boost::hash_combine(seed, translationOption.GetStartPos());
boost::hash_combine(seed, translationOption.GetEndPos());
return seed;
}
}
#endif

View File

@ -734,8 +734,7 @@ void TranslationOptionCollection::PreCalculateScores()
for (size_t j = 0; j < m_collection[i].size(); ++j) {
for (size_t k = 0; k < m_collection[i][j].size(); ++k) {
const TranslationOption* toption = m_collection[i][j].Get(k);
const TranslationOptionKey key(toption->GetTargetPhrase(),*(toption->GetSourcePhrase()));
ScoreComponentCollection& breakdown = m_precalculatedScores[key];
ScoreComponentCollection& breakdown = m_precalculatedScores[*toption];
for (size_t si = 0; si < precomputedFeatures.size(); ++si) {
precomputedFeatures[si]->Evaluate(
*toption,
@ -753,11 +752,13 @@ void TranslationOptionCollection::InsertPreCalculatedScores
(const TranslationOption& translationOption, ScoreComponentCollection* scoreBreakdown)
const
{
const TranslationOptionKey key(translationOption.GetTargetPhrase(),*(translationOption.GetSourcePhrase()));
boost::unordered_map<TranslationOptionKey,ScoreComponentCollection>::const_iterator scoreIter =
m_precalculatedScores.find(key);
boost::unordered_map<TranslationOption,ScoreComponentCollection>::const_iterator scoreIter =
m_precalculatedScores.find(translationOption);
if (scoreIter != m_precalculatedScores.end()) {
scoreBreakdown->PlusEquals(scoreIter->second);
} else {
TRACE_ERR("ERROR: " << translationOption << " missing from precalculation cache" << endl);
assert(0);
}
}

View File

@ -70,8 +70,7 @@ protected:
const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span */
const float m_translationOptionThreshold; /*< threshold for translation options with regard to best option for input span */
std::vector<Phrase*> m_unksrcs;
typedef std::pair<Phrase,Phrase> TranslationOptionKey;
boost::unordered_map<TranslationOptionKey,ScoreComponentCollection> m_precalculatedScores;
boost::unordered_map<TranslationOption,ScoreComponentCollection> m_precalculatedScores;
TranslationOptionCollection(const TranslationSystem* system, InputType const& src, size_t maxNoTransOptPerCoverage,