mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-06 08:18:02 +03:00
bug fix to lexicalized reordering model
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@834 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
ef42ad791e
commit
3e7e4570b5
@ -55,7 +55,7 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
|
||||
, _lmstats(0)
|
||||
{ // used for initial seeding of trans process
|
||||
// initialize scores
|
||||
_hash_computed = false;
|
||||
//_hash_computed = false;
|
||||
s_HypothesesCreated = 1;
|
||||
ResetScore();
|
||||
}
|
||||
@ -85,7 +85,7 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran
|
||||
// that this hypothesis has already translated!
|
||||
assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));
|
||||
|
||||
_hash_computed = false;
|
||||
//_hash_computed = false;
|
||||
m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
|
||||
m_wordDeleted = transOpt.IsDeletionOption();
|
||||
m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
|
||||
@ -160,24 +160,36 @@ Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &em
|
||||
return new(ptr) Hypothesis(m_source, emptyTarget);
|
||||
}
|
||||
|
||||
void Hypothesis::GenerateNGramCompareHash() const
|
||||
{
|
||||
_hash = quick_hash((const char*)&m_languageModelStates[0], sizeof(LanguageModelSingleFactor::State) * m_languageModelStates.size(), 0xcafe5137);
|
||||
_hash_computed = true;
|
||||
vector<size_t> wordCoverage = m_sourceCompleted.GetCompressedReprentation();
|
||||
_hash = quick_hash((const char*)&wordCoverage[0], sizeof(size_t)*wordCoverage.size(), _hash);
|
||||
}
|
||||
//void Hypothesis::GenerateNGramCompareHash() const
|
||||
//{
|
||||
// _hash = quick_hash((const char*)&m_languageModelStates[0], sizeof(LanguageModelSingleFactor::State) * m_languageModelStates.size(), 0xcafe5137);
|
||||
// _hash_computed = true;
|
||||
// vector<size_t> wordCoverage = m_sourceCompleted.GetCompressedRepresentation();
|
||||
// _hash = quick_hash((const char*)&wordCoverage[0], sizeof(size_t)*wordCoverage.size(), _hash);
|
||||
//}
|
||||
|
||||
/** check, if two hypothesis can be recombined.
|
||||
this is actually a sorting function that allows us to
|
||||
keep an ordered list of hypotheses. This makes recombination
|
||||
much quicker.
|
||||
*/
|
||||
int Hypothesis::NGramCompare(const Hypothesis &compare) const
|
||||
{ // -1 = this < compare
|
||||
// +1 = this > compare
|
||||
// 0 = this ==compare
|
||||
if (m_languageModelStates < compare.m_languageModelStates) return -1;
|
||||
if (m_languageModelStates > compare.m_languageModelStates) return 1;
|
||||
if (m_sourceCompleted.GetCompressedRepresentation() < compare.m_sourceCompleted.GetCompressedRepresentation()) return -1;
|
||||
if (m_sourceCompleted.GetCompressedRepresentation() > compare.m_sourceCompleted.GetCompressedRepresentation()) return 1;
|
||||
if (m_currSourceWordsRange.GetEndPos() < compare.m_currSourceWordsRange.GetEndPos()) return -1;
|
||||
if (m_currSourceWordsRange.GetEndPos() > compare.m_currSourceWordsRange.GetEndPos()) return 1;
|
||||
if (! StaticData::Instance()->GetSourceStartPosMattersForRecombination()) return 0;
|
||||
if (m_currSourceWordsRange.GetStartPos() < compare.m_currSourceWordsRange.GetStartPos()) return -1;
|
||||
if (m_currSourceWordsRange.GetStartPos() > compare.m_currSourceWordsRange.GetStartPos()) return 1;
|
||||
return 0;
|
||||
}
|
||||
/**
|
||||
* Calculates the overall language model score by combining the scores
|
||||
|
||||
/** Calculates the overall language model score by combining the scores
|
||||
* of language models generated for each of the factors. Because the factors
|
||||
* represent a variety of tag sets, and because factors with smaller tag sets
|
||||
* (such as POS instead of words) allow us to calculate richer statistics, we
|
||||
|
@ -83,9 +83,9 @@ protected:
|
||||
void CalcDistortionScore();
|
||||
//TODO: add appropriate arguments to score calculator
|
||||
|
||||
void GenerateNGramCompareHash() const;
|
||||
mutable size_t _hash;
|
||||
mutable bool _hash_computed;
|
||||
// void GenerateNGramCompareHash() const;
|
||||
// mutable size_t _hash;
|
||||
// mutable bool _hash_computed;
|
||||
|
||||
public:
|
||||
static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total
|
||||
@ -205,12 +205,12 @@ public:
|
||||
|
||||
int NGramCompare(const Hypothesis &compare) const;
|
||||
|
||||
inline size_t hash() const
|
||||
{
|
||||
if (_hash_computed) return _hash;
|
||||
GenerateNGramCompareHash();
|
||||
return _hash;
|
||||
}
|
||||
// inline size_t hash() const
|
||||
// {
|
||||
// if (_hash_computed) return _hash;
|
||||
// GenerateNGramCompareHash();
|
||||
// return _hash;
|
||||
// }
|
||||
|
||||
void ToStream(std::ostream& out) const
|
||||
{
|
||||
|
@ -102,7 +102,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
|
||||
Hypothesis *hypoExisting = *iter;
|
||||
if (hypo->GetTotalScore() > hypoExisting->GetTotalScore())
|
||||
{ // incoming hypo is better than the one we have
|
||||
VERBOSE(3,"better than matching hyp, recombining, ");
|
||||
VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
|
||||
if (m_nBestIsEnabled) {
|
||||
hypo->AddArc(hypoExisting);
|
||||
Detach(iter);
|
||||
@ -114,7 +114,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
|
||||
}
|
||||
else
|
||||
{ // already storing the best hypo. discard current hypo
|
||||
VERBOSE(3,"worse than matching hyp, recombining" << std::endl)
|
||||
VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
|
||||
if (m_nBestIsEnabled) {
|
||||
(*iter)->AddArc(hypo);
|
||||
} else {
|
||||
@ -183,6 +183,16 @@ void HypothesisCollection::PruneToSize(size_t newSize)
|
||||
}
|
||||
VERBOSE(3,", pruned to size " << size() << endl);
|
||||
|
||||
IFVERBOSE(3) {
|
||||
cerr << "stack now contains: ";
|
||||
for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++)
|
||||
{
|
||||
Hypothesis *hypo = *iter;
|
||||
cerr << hypo->GetId() << " (" << hypo->GetTotalScore() << ") ";
|
||||
}
|
||||
cerr << endl;
|
||||
}
|
||||
|
||||
// set the worstScore, so that newly generated hypotheses will not be added if worse than the worst in the stack
|
||||
m_worstScore = scoreThreshold;
|
||||
// cerr << "Heap contains " << bestScores.size() << " items" << endl;
|
||||
|
@ -66,12 +66,12 @@ struct HypothesisRecombinationComparer
|
||||
}
|
||||
};
|
||||
|
||||
struct HypothesisRecombinationHasher
|
||||
{
|
||||
size_t operator()(const Hypothesis* hypo) const {
|
||||
return hypo->hash();
|
||||
}
|
||||
};
|
||||
//struct HypothesisRecombinationHasher
|
||||
//{
|
||||
// size_t operator()(const Hypothesis* hypo) const {
|
||||
// return hypo->hash();
|
||||
// }
|
||||
//};
|
||||
|
||||
/** Stack for instances of Hypothesis, includes functions for pruning. */
|
||||
class HypothesisCollection
|
||||
|
@ -7,45 +7,56 @@
|
||||
#include <algorithm>
|
||||
#include "LexicalReordering.h"
|
||||
#include "InputFileStream.h"
|
||||
#include "DistortionOrientation.h"
|
||||
#include "StaticData.h"
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
/*
|
||||
* Load the file pointed to by filename; set up the table according to
|
||||
* the orientation and condition parameters. Direction will be used
|
||||
* later for computing the score.
|
||||
*/
|
||||
/** Load the file pointed to by filename; set up the table according to
|
||||
* the orientation and condition parameters. Direction will be used
|
||||
* later for computing the score.
|
||||
* \param filename file that contains the table
|
||||
* \param orientation orientation as defined in DistortionOrientationType (monotone/msd)
|
||||
* \param direction direction as defined in LexReorderType (forward/backward/bidirectional)
|
||||
* \param condition either conditioned on foreign or foreign+english
|
||||
* \param weights weight setting for this model
|
||||
* \param input input factors
|
||||
* \param output output factors
|
||||
*/
|
||||
LexicalReordering::LexicalReordering(const std::string &filename,
|
||||
int orientation, int direction,
|
||||
int condition, const std::vector<float>& weights,
|
||||
vector<FactorType> input, vector<FactorType> output) :
|
||||
m_orientation(orientation), m_condition(condition), m_numberscores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output)
|
||||
m_orientation(orientation), m_condition(condition), m_numScores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output)
|
||||
{
|
||||
//add score producer
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
|
||||
//manage the weights by SetWeightsForScoreProducer method of static data.
|
||||
if(direction == LexReorderType::Bidirectional)
|
||||
{
|
||||
m_direction.push_back(LexReorderType::Backward); // this order is important
|
||||
m_direction.push_back(LexReorderType::Forward);
|
||||
m_direction.push_back(LexReorderType::Backward);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_direction.push_back(direction);
|
||||
}
|
||||
// set number of orientations
|
||||
if( orientation == DistortionOrientationType::Monotone) {
|
||||
m_numOrientationTypes = 2;
|
||||
}
|
||||
else if ( orientation == DistortionOrientationType::Msd) {
|
||||
m_numOrientationTypes = 3;
|
||||
}
|
||||
const_cast<StaticData*>(StaticData::Instance())->SetWeightsForScoreProducer(this, weights);
|
||||
// Load the file
|
||||
LoadFile();
|
||||
// PrintTable();
|
||||
// PrintTable();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Loads the file into a map.
|
||||
*/
|
||||
/** Loads the orientation file into a map
|
||||
*/
|
||||
void LexicalReordering::LoadFile()
|
||||
{
|
||||
InputFileStream inFile(m_filename);
|
||||
@ -73,14 +84,11 @@ void LexicalReordering::LoadFile()
|
||||
probs = Scan<float>(Tokenize(tokens[F_PROBS]));
|
||||
|
||||
}
|
||||
if (m_orientation == DistortionOrientationType::Monotone)
|
||||
{
|
||||
assert(probs.size() == MONO_NUM_PROBS); // 2 backward, 2 forward
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(probs.size() == MSD_NUM_PROBS); // 3 backward, 3 forward
|
||||
}
|
||||
if (probs.size() != m_direction.size() * m_numOrientationTypes) {
|
||||
TRACE_ERR("found " << probs.size() << " probabilities, expected "
|
||||
<< m_direction.size() * m_numOrientationTypes << endl);
|
||||
exit(0);
|
||||
}
|
||||
std::vector<float> scv(probs.size());
|
||||
std::transform(probs.begin(),probs.end(),probs.begin(),TransformScore);
|
||||
m_orientation_table[key] = probs;
|
||||
@ -88,9 +96,8 @@ void LexicalReordering::LoadFile()
|
||||
inFile.Close();
|
||||
}
|
||||
|
||||
/*
|
||||
* Print the table in a readable format.
|
||||
*/
|
||||
/** print the table in a readable format (not used at this point)
|
||||
*/
|
||||
void LexicalReordering::PrintTable()
|
||||
{
|
||||
// iterate over map
|
||||
@ -113,140 +120,141 @@ void LexicalReordering::PrintTable()
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<float> LexicalReordering::CalcScore(Hypothesis *hypothesis)
|
||||
/** compute the orientation given a hypothesis
|
||||
*/
|
||||
int LexicalReordering::GetOrientation(const Hypothesis *curr_hypothesis)
|
||||
{
|
||||
std::vector<float> score(m_numberscores, 0);
|
||||
vector<float> val;
|
||||
for(unsigned int i=0; i < m_direction.size(); i++)
|
||||
{
|
||||
int direction = m_direction[i];
|
||||
int orientation = DistortionOrientation::GetOrientation(hypothesis, direction);
|
||||
if(m_condition==LexReorderType::Fe)
|
||||
const Hypothesis *prevHypo = curr_hypothesis->GetPrevHypo();
|
||||
|
||||
const WordsRange &currSourceRange = curr_hypothesis->GetCurrSourceWordsRange();
|
||||
size_t curr_source_start = currSourceRange.GetStartPos();
|
||||
size_t curr_source_end = currSourceRange.GetEndPos();
|
||||
|
||||
//if there's no previous source...
|
||||
if(prevHypo->GetId() == 0){
|
||||
if (curr_source_start == 0)
|
||||
{
|
||||
//this key string is F+'|||'+E from the hypothesis
|
||||
val=m_orientation_table[hypothesis->GetSourcePhraseStringRep(m_sourceFactors)
|
||||
+"||| "
|
||||
+hypothesis->GetTargetPhraseStringRep(m_targetFactors)];
|
||||
return ORIENTATION_MONOTONE;
|
||||
}
|
||||
else {
|
||||
return ORIENTATION_DISCONTINUOUS;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const WordsRange &prevSourceRange = prevHypo->GetCurrSourceWordsRange();
|
||||
size_t prev_source_start = prevSourceRange.GetStartPos();
|
||||
size_t prev_source_end = prevSourceRange.GetEndPos();
|
||||
if(prev_source_end==curr_source_start-1)
|
||||
{
|
||||
return ORIENTATION_MONOTONE;
|
||||
}
|
||||
// distinguish between monotone, swap, discontinuous
|
||||
else if(m_orientation==DistortionOrientationType::Msd)
|
||||
{
|
||||
if(prev_source_start==curr_source_end+1)
|
||||
{
|
||||
return ORIENTATION_SWAP;
|
||||
}
|
||||
else
|
||||
{
|
||||
//this key string is F from the hypothesis
|
||||
val=m_orientation_table[hypothesis->GetTargetPhraseStringRep(m_sourceFactors)];
|
||||
return ORIENTATION_DISCONTINUOUS;
|
||||
}
|
||||
if(val.size()> 0)
|
||||
}
|
||||
// only distinguish between monotone, non monotone
|
||||
else
|
||||
{
|
||||
return ORIENTATION_NON_MONOTONE;
|
||||
}
|
||||
}
|
||||
|
||||
/** calculate the score(s) for a hypothesis
|
||||
*/
|
||||
std::vector<float> LexicalReordering::CalcScore(Hypothesis *hypothesis)
|
||||
{
|
||||
std::vector<float> score(m_numScores, 0);
|
||||
for(unsigned int i=0; i < m_direction.size(); i++) // backward, forward, or both
|
||||
{
|
||||
vector<float> val; // we will score the matching probability here
|
||||
|
||||
// FIRST, get probability distribution
|
||||
|
||||
int direction = m_direction[i]; // either backward or forward
|
||||
|
||||
// no score, if we would have to compute the forward score from the initial hypothesis
|
||||
if (direction == LexReorderType::Backward || hypothesis->GetPrevHypo()->GetId() != 0) {
|
||||
|
||||
if (direction == LexReorderType::Backward) {
|
||||
// conditioned on both foreign and English
|
||||
if(m_condition==LexReorderType::Fe)
|
||||
{
|
||||
//this key string is F+'|||'+E from the hypothesis
|
||||
val=m_orientation_table[hypothesis->GetSourcePhraseStringRep(m_sourceFactors)
|
||||
+"||| "
|
||||
+hypothesis->GetTargetPhraseStringRep(m_targetFactors)];
|
||||
}
|
||||
// only conditioned on foreign
|
||||
else
|
||||
{
|
||||
//this key string is F from the hypothesis
|
||||
val=m_orientation_table[hypothesis->GetTargetPhraseStringRep(m_sourceFactors)];
|
||||
}
|
||||
}
|
||||
|
||||
// if forward looking, condition on previous phrase
|
||||
else {
|
||||
// conditioned on both foreign and English
|
||||
if(m_condition==LexReorderType::Fe)
|
||||
{
|
||||
//this key string is F+'|||'+E from the hypothesis
|
||||
val=m_orientation_table[hypothesis->GetPrevHypo()->GetSourcePhraseStringRep(m_sourceFactors)
|
||||
+"||| "
|
||||
+hypothesis->GetPrevHypo()->GetTargetPhraseStringRep(m_targetFactors)];
|
||||
}
|
||||
// only conditioned on foreign
|
||||
else
|
||||
{
|
||||
//this key string is F from the hypothesis
|
||||
val=m_orientation_table[hypothesis->GetPrevHypo()->GetTargetPhraseStringRep(m_sourceFactors)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SECOND, look up score
|
||||
|
||||
if(val.size()> 0) // valid entry
|
||||
{
|
||||
if(m_orientation==DistortionOrientationType::Msd)
|
||||
{
|
||||
if(direction==LexReorderType::Backward)
|
||||
{
|
||||
if(orientation==DistortionOrientationType::MONO)
|
||||
{
|
||||
score[BACK_M] = val[BACK_M];
|
||||
}
|
||||
else if(orientation==DistortionOrientationType::SWAP)
|
||||
{
|
||||
score[BACK_S] = val[BACK_S];
|
||||
}
|
||||
else
|
||||
{
|
||||
score[BACK_D] = val[BACK_D];
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
//if we only have forward scores (no backward scores) in the table,
|
||||
//then forward scores have no offset so we can use the indices of the backwards scores
|
||||
if(orientation==DistortionOrientationType::MONO)
|
||||
{
|
||||
if(m_numberscores>3)
|
||||
{
|
||||
score[FOR_M] = val[FOR_M];
|
||||
}
|
||||
else
|
||||
{
|
||||
score[BACK_M] = val[BACK_M];
|
||||
}
|
||||
}
|
||||
else if(orientation==DistortionOrientationType::SWAP)
|
||||
{
|
||||
if(m_numberscores>3)
|
||||
{
|
||||
score[FOR_S] = val[FOR_S];
|
||||
}
|
||||
else
|
||||
{
|
||||
score[BACK_S] = val[BACK_S];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_numberscores>3)
|
||||
{
|
||||
score[FOR_D] = val[FOR_D];
|
||||
}
|
||||
else
|
||||
{
|
||||
score[BACK_D] = val[BACK_D];
|
||||
}
|
||||
}
|
||||
}
|
||||
int orientation = GetOrientation(hypothesis);
|
||||
float value = val[ orientation + i * m_numOrientationTypes ];
|
||||
// one weight per direction
|
||||
if ( m_numScores < m_numOrientationTypes ) {
|
||||
score[i] = value;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(direction==LexReorderType::Backward)
|
||||
{
|
||||
if(orientation==DistortionOrientationType::MONO)
|
||||
{
|
||||
score[BACK_MONO] = val[BACK_MONO];
|
||||
}
|
||||
else
|
||||
{
|
||||
score[BACK_NONMONO] = val[BACK_NONMONO];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//if we only have forward scores (no backward scores) in the table,
|
||||
//then forward scores have no offset so we can use the indices of the backwards scores
|
||||
if(orientation==DistortionOrientationType::MONO)
|
||||
{
|
||||
if(m_numberscores>3)
|
||||
{
|
||||
score[FOR_MONO] = val[FOR_MONO];
|
||||
}
|
||||
else
|
||||
{
|
||||
score[BACK_MONO] = val[BACK_MONO];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_numberscores>3)
|
||||
{
|
||||
score[FOR_NONMONO] = val[FOR_NONMONO];
|
||||
}
|
||||
else
|
||||
{
|
||||
score[BACK_NONMONO] = val[BACK_NONMONO];
|
||||
}
|
||||
}
|
||||
}
|
||||
// one weight per direction and type
|
||||
else {
|
||||
score[ orientation + i * m_numOrientationTypes ] = value;
|
||||
}
|
||||
|
||||
|
||||
// IFVERBOSE(3) {
|
||||
// cerr << "\tdistortion type " << orientation << " =>";
|
||||
// for(unsigned int j=0;j<score.size();j++) {
|
||||
// cerr << " " << score[j];
|
||||
// }
|
||||
// cerr << endl;
|
||||
// }
|
||||
}
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
|
||||
/** return the number of scores produced by this model */
|
||||
unsigned int LexicalReordering::GetNumScoreComponents() const
|
||||
{
|
||||
return m_numberscores;
|
||||
return m_numScores;
|
||||
}
|
||||
|
||||
/** returns description of the model */
|
||||
const std::string LexicalReordering::GetScoreProducerDescription() const
|
||||
{
|
||||
return "Lexicalized reordering score, file=" + m_filename;
|
||||
}
|
||||
|
||||
|
@ -37,11 +37,11 @@ class Phrase;
|
||||
class Hypothesis;
|
||||
using namespace std;
|
||||
|
||||
/***
|
||||
* The LexicalReordering class handles everything involved with
|
||||
/** The LexicalReordering class handles everything involved with
|
||||
* lexical reordering. It loads a probability table P(orientation|f,e)
|
||||
* and computes scores in either forward, backward, or bidirectional
|
||||
* direction.
|
||||
* This model is described in Koehn et al. [IWSLT 2005]
|
||||
*/
|
||||
|
||||
class LexicalReordering : public ScoreProducer
|
||||
@ -49,14 +49,9 @@ class LexicalReordering : public ScoreProducer
|
||||
|
||||
private:
|
||||
|
||||
// Members
|
||||
// This stores the model table
|
||||
typedef std::map<std::string, std::vector<float> > ORIENTATION_TABLE;
|
||||
|
||||
// This is the order in which the different forward/backward
|
||||
// probabilities are stored in the table.
|
||||
enum TableLookupMsd { BACK_M, BACK_S, BACK_D, FOR_M,FOR_S, FOR_D };
|
||||
enum TableLookupMonotone { BACK_MONO, BACK_NONMONO, FOR_MONO, FOR_NONMONO};
|
||||
|
||||
// This is the order in which pieces appear in the orientation table
|
||||
// when conditioning on f and e.
|
||||
enum FEFileFormat { FE_FOREIGN, FE_ENGLISH, FE_PROBS };
|
||||
@ -67,19 +62,25 @@ private:
|
||||
|
||||
// different numbers of probabilities for different ranges of
|
||||
// orientation variable
|
||||
static const unsigned int MSD_NUM_PROBS = 6;
|
||||
static const unsigned int MONO_NUM_PROBS = 4;
|
||||
static const unsigned int MSD_NUM_PROBS = 3;
|
||||
static const unsigned int MONO_NUM_PROBS = 2;
|
||||
|
||||
int m_orientation; // msd or monotone
|
||||
std::vector<int> m_direction; // contains forward, backward, or both (bidirectional)
|
||||
int m_condition; // fe or f
|
||||
int m_numberscores; //2, 3, 4 or 6
|
||||
std::string m_filename; // probability table location
|
||||
vector<FactorType> m_sourceFactors;
|
||||
vector<FactorType> m_targetFactors;
|
||||
static const unsigned int ORIENTATION_MONOTONE = 0;
|
||||
static const unsigned int ORIENTATION_NON_MONOTONE = 1;
|
||||
static const unsigned int ORIENTATION_SWAP = 1;
|
||||
static const unsigned int ORIENTATION_DISCONTINUOUS = 2;
|
||||
|
||||
int m_orientation; /**< msd or monotone */
|
||||
std::vector<int> m_direction; /**< contains forward, backward, or both (bidirectional) */
|
||||
int m_condition; /**< fe or f */
|
||||
int m_numScores; /**< 1, 2, 3, or 6 */
|
||||
int m_numOrientationTypes; /**< 2(mono) or 3(msd) */
|
||||
std::string m_filename; /**< probability table location */
|
||||
vector<FactorType> m_sourceFactors; /**< source factors to condition on */
|
||||
vector<FactorType> m_targetFactors; /**< target factors to condition on */
|
||||
|
||||
|
||||
ORIENTATION_TABLE m_orientation_table; // probability table
|
||||
ORIENTATION_TABLE m_orientation_table; /**< probability table */
|
||||
|
||||
// Functions
|
||||
void LoadFile(void);
|
||||
@ -95,6 +96,9 @@ public:
|
||||
// Descructor
|
||||
~LexicalReordering(void) {}
|
||||
|
||||
// Compute Orientation
|
||||
int GetOrientation(const Hypothesis *curr_hypothesis);
|
||||
|
||||
// Compute and return a score for a hypothesis
|
||||
std::vector<float> CalcScore(Hypothesis *curr_hypothesis);
|
||||
|
||||
|
@ -186,36 +186,35 @@ bool StaticData::LoadParameters(int argc, char* argv[])
|
||||
// load Lexical Reordering model
|
||||
const vector<string> &lrFileVector =
|
||||
m_parameter.GetParam("distortion-file");
|
||||
|
||||
for(unsigned int i=0; i< lrFileVector.size(); i++ ) //loops for each distortion model
|
||||
{
|
||||
|
||||
//if this went wrong, something went wrong in the parsing.
|
||||
const vector<string> &lrTypeVector = m_parameter.GetParam("distortion");
|
||||
vector<string> specification = Tokenize<string>(lrFileVector[i]," ");
|
||||
if (specification.size() != 4 )
|
||||
{
|
||||
TRACE_ERR("ERROR: Expected format 'factors type weight-count filename' in specification of distortion file " << i << std::endl << lrFileVector[i] << std::endl);
|
||||
return false;
|
||||
}
|
||||
|
||||
//defaults, but at least one of these per model should be explicitly specified in the .ini file
|
||||
int orientation = DistortionOrientationType::Msd,
|
||||
direction = LexReorderType::Bidirectional,
|
||||
condition = LexReorderType::Fe;
|
||||
|
||||
if(lrTypeVector.size() < i)
|
||||
{
|
||||
std::cerr<<"ERROR: please specify one line of configuration under [distortion] per distortion model in the moses configuration file\n";
|
||||
abort();
|
||||
}
|
||||
|
||||
//Loop through, overriding defaults with specifications
|
||||
vector<string> parameters = Tokenize<string>(lrTypeVector[i],"-");
|
||||
vector<string> parameters = Tokenize<string>(specification[1],"-");
|
||||
for (size_t param=0; param<parameters.size(); param++)
|
||||
{
|
||||
string val = ToLower(parameters[param]);
|
||||
//orientation
|
||||
if(val == "monotone")
|
||||
if(val == "monotone" || val == "monotonicity")
|
||||
orientation = DistortionOrientationType::Monotone;
|
||||
else if(val == "msd")
|
||||
else if(val == "msd" || val == "orientation")
|
||||
orientation = DistortionOrientationType::Msd;
|
||||
//direction
|
||||
else if(val == "forward")
|
||||
direction = LexReorderType::Forward;
|
||||
else if(val == "backward")
|
||||
else if(val == "backward" || val == "unidirectional")
|
||||
direction = LexReorderType::Backward;
|
||||
else if(val == "bidirectional")
|
||||
direction = LexReorderType::Bidirectional;
|
||||
@ -224,7 +223,10 @@ bool StaticData::LoadParameters(int argc, char* argv[])
|
||||
condition = LexReorderType::F;
|
||||
else if(val == "fe")
|
||||
condition = LexReorderType::Fe;
|
||||
if (orientation == DistortionOrientationType::Msd)
|
||||
m_sourceStartPosMattersForRecombination = true;
|
||||
}
|
||||
|
||||
//compute the number of weights that ought to be in the table from this
|
||||
size_t numWeightsInTable = 0;
|
||||
if(orientation == DistortionOrientationType::Monotone)
|
||||
@ -239,10 +241,17 @@ bool StaticData::LoadParameters(int argc, char* argv[])
|
||||
{
|
||||
numWeightsInTable *= 2;
|
||||
}
|
||||
|
||||
vector<string> token = Tokenize(lrFileVector[i]);
|
||||
//characteristics of the phrase table
|
||||
vector<string> inputfactors = Tokenize(token[0],"-");
|
||||
size_t specifiedNumWeights = Scan<size_t>(specification[2]);
|
||||
if (specifiedNumWeights != numWeightsInTable) {
|
||||
std::cerr << "specified number of weights ("
|
||||
<< specifiedNumWeights
|
||||
<< ") does not match correct number of weights for this type ("
|
||||
<< numWeightsInTable << std::endl;
|
||||
abort();
|
||||
}
|
||||
|
||||
//factors involved in this table
|
||||
vector<string> inputfactors = Tokenize(specification[0],"-");
|
||||
vector<FactorType> input,output;
|
||||
if(inputfactors.size() > 1)
|
||||
{
|
||||
@ -254,11 +263,9 @@ bool StaticData::LoadParameters(int argc, char* argv[])
|
||||
input.push_back(0); // default, just in case the user is actually using a bidirectional model
|
||||
output = Tokenize<FactorType>(inputfactors[0],",");
|
||||
}
|
||||
size_t numWeights = Scan<size_t>(token[1]);
|
||||
std::string filePath= token[2];
|
||||
std::vector<float> m_lexWeights; //will store the weights for this particular distortion reorderer
|
||||
std::vector<float> newLexWeights; //we'll remove the weights used by this distortion reorder, leaving the weights yet to be used
|
||||
if(numWeights == 1) // this is useful if the user just wants to train one weight for the model
|
||||
if(specifiedNumWeights == 1) // this is useful if the user just wants to train one weight for the model
|
||||
{
|
||||
//add appropriate weight to weight vector
|
||||
assert(distortionModelWeights.size()> 0); //if this fails the user has not specified enough weights
|
||||
@ -298,10 +305,11 @@ bool StaticData::LoadParameters(int argc, char* argv[])
|
||||
// TRACE_ERR(m_lexWeights[weight] << "\t");
|
||||
//}
|
||||
//TRACE_ERR(endl);
|
||||
timer.check("Starting to load lexical reorder table...");
|
||||
TRACE_ERR(filePath << "...");
|
||||
|
||||
// loading the file
|
||||
std::string filePath= specification[3];
|
||||
timer.check(("Start loading distortion table " + filePath).c_str());
|
||||
m_reorderModels.push_back(new LexicalReordering(filePath, orientation, direction, condition, m_lexWeights, input, output));
|
||||
// timer.check("Finished loading lexical reorder table.");
|
||||
}
|
||||
|
||||
if (m_parameter.GetParam("lmodel-file").size() > 0)
|
||||
|
@ -87,6 +87,7 @@ protected:
|
||||
bool m_dropUnknown;
|
||||
bool m_wordDeletionEnabled;
|
||||
|
||||
bool m_sourceStartPosMattersForRecombination;
|
||||
|
||||
int m_inputType;
|
||||
unsigned m_numInputScores;
|
||||
@ -156,6 +157,10 @@ public:
|
||||
return m_decodeStepList;
|
||||
}
|
||||
|
||||
inline bool GetSourceStartPosMattersForRecombination() const
|
||||
{
|
||||
return m_sourceStartPosMattersForRecombination;
|
||||
}
|
||||
inline bool GetDropUnknown() const
|
||||
{
|
||||
return m_dropUnknown;
|
||||
|
@ -111,8 +111,6 @@ namespace DistortionOrientationType
|
||||
Monotone, //distinguish only between monotone and non-monotone as possible orientations
|
||||
Msd //further separate non-monotone into swapped and discontinuous
|
||||
};
|
||||
// Possible values for orientation.
|
||||
enum ORIENTATIONS { MONO, NON_MONO, SWAP, DISC }; //TODO explain values
|
||||
}
|
||||
|
||||
enum IOMethod
|
||||
|
@ -58,7 +58,7 @@ int WordsBitmap::GetFutureCosts(int lastPos) const
|
||||
}
|
||||
|
||||
|
||||
std::vector<size_t> WordsBitmap::GetCompressedReprentation() const
|
||||
std::vector<size_t> WordsBitmap::GetCompressedRepresentation() const
|
||||
{
|
||||
std::vector<size_t> res(1 + (m_size >> (sizeof(int) + 3)), 0);
|
||||
size_t c=0; size_t x=0; size_t ci=0;
|
||||
|
@ -140,7 +140,7 @@ public:
|
||||
return m_size;
|
||||
}
|
||||
|
||||
std::vector<size_t> GetCompressedReprentation() const;
|
||||
std::vector<size_t> GetCompressedRepresentation() const;
|
||||
|
||||
inline int Compare (const WordsBitmap &compare) const
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user