Merge github.com:moses-smt/mosesdecoder into hieu_opt_input2

This commit is contained in:
Hieu Hoang 2013-07-08 10:37:53 +01:00
commit a602e2052f
15 changed files with 657 additions and 692 deletions

View File

@ -31,11 +31,11 @@ const char REFLEN_CLOSEST[] = "closest";
namespace MosesTuning
{
BleuDocScorer::BleuDocScorer(const string& config)
: BleuScorer("BLEUDOC", config),
m_ref_length_type(CLOSEST)
: BleuScorer("BLEUDOC", config),
m_ref_length_type(CLOSEST)
{
const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST);
if (reflen == REFLEN_AVERAGE) {
@ -63,41 +63,40 @@ bool BleuDocScorer::OpenReferenceStream(istream* is, size_t file_id)
if (line.find("<doc docid") != std::string::npos) { // new document
doc_id++;
m_references.push_back(new ScopedVector<Reference>());
m_references.push_back(new ScopedVector<Reference>());
sid = 0;
}
else if (line.find("<seg") != std::string::npos) { //new sentence
} else if (line.find("<seg") != std::string::npos) { //new sentence
int start = line.find_first_of('>') + 1;
std::string trans = line.substr(start, line.find_last_of('<')-start);
trans = preprocessSentence(trans);
if (file_id == 0) {
Reference* ref = new Reference;
m_references[doc_id]->push_back(ref); // Take ownership of the Reference object.
Reference* ref = new Reference;
m_references[doc_id]->push_back(ref); // Take ownership of the Reference object.
}
if (m_references[doc_id]->size() <= sid) {
return false;
return false;
}
NgramCounts counts;
size_t length = CountNgrams(trans, counts, kBleuNgramOrder);
//for any counts larger than those already there, merge them in
for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
const NgramCounts::Key& ngram = ci->first;
const NgramCounts::Value newcount = ci->second;
NgramCounts::Value oldcount = 0;
m_references[doc_id]->get().at(sid)->get_counts()->Lookup(ngram, &oldcount);
if (newcount > oldcount) {
m_references[doc_id]->get().at(sid)->get_counts()->operator[](ngram) = newcount;
}
const NgramCounts::Key& ngram = ci->first;
const NgramCounts::Value newcount = ci->second;
NgramCounts::Value oldcount = 0;
m_references[doc_id]->get().at(sid)->get_counts()->Lookup(ngram, &oldcount);
if (newcount > oldcount) {
m_references[doc_id]->get().at(sid)->get_counts()->operator[](ngram) = newcount;
}
}
//add in the length
m_references[doc_id]->get().at(sid)->push_back(length);
m_references[doc_id]->get().at(sid)->push_back(length);
if (sid > 0 && sid % 100 == 0) {
TRACE_ERR(".");
TRACE_ERR(".");
}
++sid;
}
@ -127,14 +126,14 @@ void BleuDocScorer::prepareStats(size_t sid, const string& text, ScoreStats& ent
//precision on each ngram type
for (NgramCounts::const_iterator testcounts_it = testcounts.begin();
testcounts_it != testcounts.end(); ++testcounts_it) {
testcounts_it != testcounts.end(); ++testcounts_it) {
const NgramCounts::Value guess = testcounts_it->second;
const size_t len = testcounts_it->first.size();
NgramCounts::Value correct = 0;
NgramCounts::Value v = 0;
if (m_references[sid]->get().at(i)->get_counts()->Lookup(testcounts_it->first, &v)) {
correct = min(v, guess);
correct = min(v, guess);
}
stats[len * 2 - 2] += correct;
stats[len * 2 - 1] += guess;
@ -143,13 +142,13 @@ void BleuDocScorer::prepareStats(size_t sid, const string& text, ScoreStats& ent
const int reference_len = CalcReferenceLength(sid, i, length);
stats.push_back(reference_len);
//ADD stats to totStats
std::transform(stats.begin(), stats.end(), totStats.begin(),
totStats.begin(), std::plus<int>());
//ADD stats to totStats
std::transform(stats.begin(), stats.end(), totStats.begin(),
totStats.begin(), std::plus<int>());
}
entry.set(totStats);
entry.set(totStats);
}
std::vector<std::string> BleuDocScorer::splitDoc(const std::string& text)
{
std::vector<std::string> res;
@ -188,18 +187,18 @@ statscore_t BleuDocScorer::calculateScore(const vector<int>& comps) const
int BleuDocScorer::CalcReferenceLength(size_t doc_id, size_t sentence_id, size_t length)
{
switch (m_ref_length_type) {
case AVERAGE:
return m_references[doc_id]->get().at(sentence_id)->CalcAverage();
break;
case CLOSEST:
return m_references[doc_id]->get().at(sentence_id)->CalcClosest(length);
break;
case SHORTEST:
return m_references[doc_id]->get().at(sentence_id)->CalcShortest();
break;
default:
cerr << "unknown reference types." << endl;
exit(1);
case AVERAGE:
return m_references[doc_id]->get().at(sentence_id)->CalcAverage();
break;
case CLOSEST:
return m_references[doc_id]->get().at(sentence_id)->CalcClosest(length);
break;
case SHORTEST:
return m_references[doc_id]->get().at(sentence_id)->CalcShortest();
break;
default:
cerr << "unknown reference types." << endl;
exit(1);
}
}

View File

@ -29,7 +29,7 @@ public:
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual statscore_t calculateScore(const std::vector<int>& comps) const;
int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length);
int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length);
// NOTE: this function is used for unit testing.
virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);

View File

@ -67,7 +67,7 @@ public:
// NOTE: this function is used for unit testing.
virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
//private:
//private:
protected:
ReferenceLengthType m_ref_length_type;
@ -76,7 +76,7 @@ protected:
// constructor used by subclasses
BleuScorer(const std::string& name, const std::string& config): StatisticsBasedScorer(name,config) {}
// no copying allowed
BleuScorer(const BleuScorer&);
BleuScorer& operator=(const BleuScorer&);

View File

@ -51,12 +51,12 @@ int main(int argc, char **argv)
const_cast<std::vector<std::string>&>(parameter->GetParam("factor-delimiter")).resize(1, "||dummy_string||");
const_cast<std::vector<std::string>&>(parameter->GetParam("input-factors")).resize(1, "0");
const_cast<std::vector<std::string>&>(parameter->GetParam("verbose")).resize(1, "0");
const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
//const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
//const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
StaticData::InstanceNonConst().LoadData(parameter);
PhraseDictionaryCompact pdc("input-factor=0 output-factor=0 num-features=5 path=" + ttable);
PhraseDictionaryCompact pdc("PhraseDictionaryCompact input-factor=0 output-factor=0 num-features=5 path=" + ttable);
pdc.Load();
std::string line;

View File

@ -11,7 +11,7 @@ namespace Moses
{
OpSequenceModel::OpSequenceModel(const std::string &line)
:StatefulFeatureFunction("OpSequenceModel", 5, line )
:StatefulFeatureFunction("OpSequenceModel", 5, line )
{
ReadParameters();
}
@ -19,29 +19,29 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
void OpSequenceModel :: readLanguageModel(const char *lmFile)
{
string unkOp = "_TRANS_SLF_";
string unkOp = "_TRANS_SLF_";
/*
// Code for SRILM
/*
vector <int> numbers;
// Code for SRILM
vector <int> numbers;
int nonWordFlag = 0;
ptrOp = new Api;
ptrOp -> read_lm(lmFile,lmOrder);
numbers.push_back(ptrOp->getLMID(const_cast <char *> (unkOp.c_str())));
unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag);
*/
// Code to load KenLM
ptrOp = new Api;
ptrOp -> read_lm(lmFile,lmOrder);
numbers.push_back(ptrOp->getLMID(const_cast <char *> (unkOp.c_str())));
unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag);
OSM = new Model(m_lmPath.c_str());
State startState = OSM->NullContextState();
State endState;
unkOpProb = OSM->Score(startState,OSM->GetVocabulary().Index(unkOp),endState);
*/
// Code to load KenLM
OSM = new Model(m_lmPath.c_str());
State startState = OSM->NullContextState();
State endState;
unkOpProb = OSM->Score(startState,OSM->GetVocabulary().Index(unkOp),endState);
}
@ -85,58 +85,55 @@ void OpSequenceModel::Load()
void OpSequenceModel:: Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
osmHypothesis obj;
obj.setState(OSM->NullContextState());
WordsBitmap myBitmap(source.GetSize());
vector <string> mySourcePhrase;
vector <string> myTargetPhrase;
vector<float> scores(5);
vector <int> alignments;
int startIndex = 0;
int endIndex = source.GetSize();
osmHypothesis obj;
obj.setState(OSM->NullContextState());
WordsBitmap myBitmap(source.GetSize());
vector <string> mySourcePhrase;
vector <string> myTargetPhrase;
vector<float> scores(5);
vector <int> alignments;
int startIndex = 0;
int endIndex = source.GetSize();
const AlignmentInfo &align = targetPhrase.GetAlignTerm();
AlignmentInfo::const_iterator iter;
const AlignmentInfo &align = targetPhrase.GetAlignTerm();
AlignmentInfo::const_iterator iter;
for (iter = align.begin(); iter != align.end(); ++iter)
{
alignments.push_back(iter->first);
alignments.push_back(iter->second);
}
for (iter = align.begin(); iter != align.end(); ++iter) {
alignments.push_back(iter->first);
alignments.push_back(iter->second);
}
for (int i = 0; i < targetPhrase.GetSize(); i++)
{
if (targetPhrase.GetWord(i).IsOOV())
myTargetPhrase.push_back("_TRANS_SLF_");
else
myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
}
for (int i = 0; i < targetPhrase.GetSize(); i++) {
if (targetPhrase.GetWord(i).IsOOV())
myTargetPhrase.push_back("_TRANS_SLF_");
else
myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
}
for (int i = 0; i < source.GetSize(); i++)
{
mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
}
obj.setPhrases(mySourcePhrase , myTargetPhrase);
obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
obj.computeOSMFeature(startIndex,myBitmap);
obj.calculateOSMProb(*OSM);
obj.populateScores(scores);
estimatedFutureScore.PlusEquals(this, scores);
for (int i = 0; i < source.GetSize(); i++) {
mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
}
obj.setPhrases(mySourcePhrase , myTargetPhrase);
obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
obj.computeOSMFeature(startIndex,myBitmap);
obj.calculateOSMProb(*OSM);
obj.populateScores(scores);
estimatedFutureScore.PlusEquals(this, scores);
}
FFState* OpSequenceModel::Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase();
const WordsBitmap &bitmap = cur_hypo.GetWordsBitmap();
@ -159,83 +156,81 @@ FFState* OpSequenceModel::Evaluate(
//cerr << source <<endl;
// int a = sourceRange.GetStartPos();
// cerr << source.GetWord(a);
// int a = sourceRange.GetStartPos();
// cerr << source.GetWord(a);
//cerr <<a<<endl;
//const Sentence &sentence = static_cast<const Sentence&>(curr_hypo.GetManager().GetSource());
const WordsRange & sourceRange = cur_hypo.GetCurrSourceWordsRange();
int startIndex = sourceRange.GetStartPos();
int endIndex = sourceRange.GetEndPos();
const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
osmState * statePtr;
const WordsRange & sourceRange = cur_hypo.GetCurrSourceWordsRange();
int startIndex = sourceRange.GetStartPos();
int endIndex = sourceRange.GetEndPos();
const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
osmState * statePtr;
vector <int> alignments;
vector <int> alignments;
AlignmentInfo::const_iterator iter;
AlignmentInfo::const_iterator iter;
for (iter = align.begin(); iter != align.end(); ++iter) {
//cerr << iter->first << "----" << iter->second << " ";
alignments.push_back(iter->first);
alignments.push_back(iter->second);
}
//cerr<<bitmap<<endl;
//cerr<<startIndex<<" "<<endIndex<<endl;
for (int i = startIndex; i <= endIndex; i++)
{
myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
// cerr<<mySourcePhrase[i]<<endl;
for (iter = align.begin(); iter != align.end(); ++iter) {
//cerr << iter->first << "----" << iter->second << " ";
alignments.push_back(iter->first);
alignments.push_back(iter->second);
}
for (int i = 0; i < target.GetSize(); i++)
{
if (target.GetWord(i).IsOOV())
myTargetPhrase.push_back("_TRANS_SLF_");
else
myTargetPhrase.push_back(target.GetWord(i).GetFactor(0)->GetString().as_string());
//cerr<<bitmap<<endl;
//cerr<<startIndex<<" "<<endIndex<<endl;
for (int i = startIndex; i <= endIndex; i++) {
myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
// cerr<<mySourcePhrase[i]<<endl;
}
for (int i = 0; i < target.GetSize(); i++) {
if (target.GetWord(i).IsOOV())
myTargetPhrase.push_back("_TRANS_SLF_");
else
myTargetPhrase.push_back(target.GetWord(i).GetFactor(0)->GetString().as_string());
}
//cerr<<myBitmap<<endl;
obj.setState(prev_state);
obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
obj.setPhrases(mySourcePhrase , myTargetPhrase);
obj.computeOSMFeature(startIndex,myBitmap);
obj.computeOSMFeature(startIndex,myBitmap);
obj.calculateOSMProb(*OSM);
obj.populateScores(scores);
/*
if (bitmap.GetFirstGapPos() == NOT_FOUND)
{
/*
if (bitmap.GetFirstGapPos() == NOT_FOUND)
{
int xx;
cerr<<bitmap<<endl;
int a = bitmap.GetFirstGapPos();
obj.print();
cin>>xx;
}
*/
int xx;
cerr<<bitmap<<endl;
int a = bitmap.GetFirstGapPos();
obj.print();
cin>>xx;
}
*/
/*
vector<float> scores(5);
scores[0] = 0.343423f;
scores[1] = 1.343423f;
scores[2] = 2.343423f;
scores[3] = 3.343423f;
scores[4] = 4.343423f;
*/
/*
vector<float> scores(5);
scores[0] = 0.343423f;
scores[1] = 1.343423f;
scores[2] = 2.343423f;
scores[3] = 3.343423f;
scores[4] = 4.343423f;
*/
accumulator->PlusEquals(this, scores);
@ -245,7 +240,7 @@ FFState* OpSequenceModel::Evaluate(
//return statePtr;
// return NULL;
// return NULL;
}
FFState* OpSequenceModel::EvaluateChart(
@ -276,29 +271,28 @@ std::vector<float> OpSequenceModel::GetFutureScores(const Phrase &source, const
ParallelPhrase pp(source, target);
std::map<ParallelPhrase, Scores>::const_iterator iter;
iter = m_futureCost.find(pp);
//iter = m_coll.find(pp);
//iter = m_coll.find(pp);
if (iter == m_futureCost.end()) {
vector<float> scores(5, 0);
scores[0] = unkOpProb;
return scores;
}
else {
} else {
const vector<float> &scores = iter->second;
return scores;
return scores;
}
}
void OpSequenceModel::SetParameter(const std::string& key, const std::string& value)
{
if (key == "feature-path") {
m_featurePath = value;
} else if (key == "path") {
m_lmPath = value;
} else if (key == "order") {
lmOrder = Scan<int>(value);
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
if (key == "feature-path") {
m_featurePath = value;
} else if (key == "path") {
m_lmPath = value;
} else if (key == "order") {
lmOrder = Scan<int>(value);
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
} // namespace

View File

@ -16,26 +16,26 @@ class OpSequenceModel : public StatefulFeatureFunction
{
public:
lm::ngram::Model * OSM;
int lmOrder;
float unkOpProb;
OpSequenceModel(const std::string &line);
lm::ngram::Model * OSM;
void readLanguageModel(const char *);
void Load();
int lmOrder;
float unkOpProb;
FFState* Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
OpSequenceModel(const std::string &line);
void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
void readLanguageModel(const char *);
void Load();
FFState* Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
virtual FFState* EvaluateChart(
const ChartHypothesis& /* cur_hypo */,
@ -49,17 +49,18 @@ public:
std::vector<float> GetFutureScores(const Phrase &source, const Phrase &target) const;
void SetParameter(const std::string& key, const std::string& value);
bool IsUseable(const FactorMask &mask) const
{ return true; }
bool IsUseable(const FactorMask &mask) const {
return true;
}
protected:
typedef std::pair<Phrase, Phrase> ParallelPhrase;
typedef std::vector<float> Scores;
std::map<ParallelPhrase, Scores> m_futureCost;
typedef std::pair<Phrase, Phrase> ParallelPhrase;
typedef std::vector<float> Scores;
std::map<ParallelPhrase, Scores> m_futureCost;
std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
std::set <int> targetNullWords;
std::string m_featurePath, m_lmPath;
std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
std::set <int> targetNullWords;
std::string m_featurePath, m_lmPath;

File diff suppressed because it is too large Load Diff

View File

@ -17,15 +17,23 @@ public:
osmState(const lm::ngram::State & val);
int Compare(const FFState& other) const;
void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
int getJ()const {return j;}
int getE()const {return E;}
std::map <int , std::string> getGap() const { return gap;}
int getJ()const {
return j;
}
int getE()const {
return E;
}
std::map <int , std::string> getGap() const {
return gap;
}
lm::ngram::State getLMState() const {return lmState;}
lm::ngram::State getLMState() const {
return lmState;
}
void print() const;
std::string getName() const;
protected:
int j, E;
std::map <int,std::string> gap;
@ -35,51 +43,56 @@ protected:
class osmHypothesis
{
private:
std::vector <std::string> operations; // List of operations required to generated this hyp ...
std::map <int,std::string> gap; // Maintains gap history ...
int j; // Position after the last source word generated ...
int E; // Position after the right most source word so far generated ...
lm::ngram::State lmState; // KenLM's Model State ...
private:
int gapCount; // Number of gaps inserted ...
int deletionCount;
int openGapCount;
int gapWidth;
double opProb;
std::vector <std::string> currE;
std::vector <std::string> currF;
std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
std::set <int> targetNullWords;
std::set <int> sourceNullWords;
std::vector <std::string> operations; // List of operations required to generated this hyp ...
std::map <int,std::string> gap; // Maintains gap history ...
int j; // Position after the last source word generated ...
int E; // Position after the right most source word so far generated ...
lm::ngram::State lmState; // KenLM's Model State ...
int closestGap(std::map <int,std::string> gap,int j1, int & gp);
int firstOpenGap(std::vector <int> & coverageVector);
std::string intToString(int);
int getOpenGaps();
int isTranslationOperation(int j);
void removeReorderingOperations();
int gapCount; // Number of gaps inserted ...
int deletionCount;
int openGapCount;
int gapWidth;
double opProb;
void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
std::vector <std::string> currE;
std::vector <std::string> currF;
std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
std::set <int> targetNullWords;
std::set <int> sourceNullWords;
public:
int closestGap(std::map <int,std::string> gap,int j1, int & gp);
int firstOpenGap(std::vector <int> & coverageVector);
std::string intToString(int);
int getOpenGaps();
int isTranslationOperation(int j);
void removeReorderingOperations();
osmHypothesis();
~osmHypothesis(){};
void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
void calculateOSMProb(lm::ngram::Model & ptrOp);
void computeOSMFeature(int startIndex , WordsBitmap & coverageVector);
void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2){currF = val1; currE = val2;}
void setState(const FFState* prev_state);
osmState * saveState();
void print();
void populateScores(std::vector <float> & scores);
void setState(const lm::ngram::State & val){lmState = val;}
void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
public:
osmHypothesis();
~osmHypothesis() {};
void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
void calculateOSMProb(lm::ngram::Model & ptrOp);
void computeOSMFeature(int startIndex , WordsBitmap & coverageVector);
void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2) {
currF = val1;
currE = val2;
}
void setState(const FFState* prev_state);
osmState * saveState();
void print();
void populateScores(std::vector <float> & scores);
void setState(const lm::ngram::State & val) {
lmState = val;
}
};

View File

@ -383,7 +383,7 @@ LanguageModel *ConstructKenLM(const std::string &description, const std::string
try {
lm::ngram::ModelType model_type;
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
switch(model_type) {
case lm::ngram::PROBING:
return new LanguageModelKen<lm::ngram::ProbingModel>(description, line, file, factorType, lazy);

View File

@ -694,9 +694,9 @@ bool StaticData::LoadData(Parameter *parameter)
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "OpSequenceModel") {
OpSequenceModel* model = new OpSequenceModel(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
OpSequenceModel* model = new OpSequenceModel(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "PhrasePenalty") {
PhrasePenalty* model = new PhrasePenalty(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());

View File

@ -190,7 +190,7 @@ std::string PhraseDecoder::MakeSourceKey(std::string &source)
return source + m_separator;
}
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel)
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel, bool eval)
{
// Not using TargetPhraseCollection avoiding "new" operator
@ -234,7 +234,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
// Decompress and decode target phrase collection
TargetPhraseVectorPtr decodedPhraseColl =
DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel);
DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel, eval);
return decodedPhraseColl;
} else
@ -243,7 +243,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
TargetPhraseVectorPtr tpv, BitWrapper<> &encodedBitStream,
const Phrase &sourcePhrase, bool topLevel)
const Phrase &sourcePhrase, bool topLevel, bool eval)
{
bool extending = tpv->size();
@ -397,7 +397,8 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
if(scores.size() == m_numScoreComponent) {
targetPhrase->GetScoreBreakdown().Assign(&m_phraseDictionary, scores);
targetPhrase->Evaluate(sourcePhrase);
if(eval)
targetPhrase->Evaluate(sourcePhrase);
if(m_containsAlignmentInfo)
state = Alignment;

View File

@ -131,12 +131,13 @@ public:
size_t Load(std::FILE* in);
TargetPhraseVectorPtr CreateTargetPhraseCollection(const Phrase &sourcePhrase,
bool topLevel = false);
bool topLevel = false, bool eval = true);
TargetPhraseVectorPtr DecodeCollection(TargetPhraseVectorPtr tpv,
BitWrapper<> &encodedBitStream,
const Phrase &sourcePhrase,
bool topLevel);
bool topLevel,
bool eval);
void PruneCache();
};

View File

@ -117,7 +117,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c
// Retrieve target phrase collection from phrase table
TargetPhraseVectorPtr decodedPhraseColl
= m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
= m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true);
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
@ -130,7 +130,6 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c
std::nth_element(tpv->begin(), nth, tpv->end(), CompareTargetPhrase());
for(TargetPhraseVector::iterator it = tpv->begin(); it != nth; it++) {
TargetPhrase *tp = new TargetPhrase(*it);
cerr << *tp << endl;
phraseColl->Add(tp);
}
@ -152,7 +151,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase
return TargetPhraseVectorPtr();
// Retrieve target phrase collection from phrase table
return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, false);
}
PhraseDictionaryCompact::~PhraseDictionaryCompact()

View File

@ -38,7 +38,7 @@ bool operator<(const PackedItem &pi1, const PackedItem &pi2)
}
std::string PhraseTableCreator::m_phraseStopSymbol = "__SPECIAL_STOP_SYMBOL__";
std::string PhraseTableCreator::m_separator = " ||| ";
std::string PhraseTableCreator::m_separator = "|||";
PhraseTableCreator::PhraseTableCreator(std::string inPath,
std::string outPath,
@ -332,12 +332,12 @@ void PhraseTableCreator::CreateRankHash()
inline std::string PhraseTableCreator::MakeSourceKey(std::string &source)
{
return source + m_separator;
return source + " " + m_separator + " ";
}
inline std::string PhraseTableCreator::MakeSourceTargetKey(std::string &source, std::string &target)
{
return source + m_separator + target + m_separator;
return source + " " + m_separator + " " + target + " " + m_separator + " ";
}
void PhraseTableCreator::EncodeTargetPhrases()
@ -1034,17 +1034,24 @@ void RankingTask::operator()()
for(size_t i = 0; i < lines.size(); i++) {
std::vector<std::string> tokens;
Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
if(tokens.size() < 3) {
for(std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); it++)
*it = Moses::Trim(*it);
if(tokens.size() < 4) {
std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
abort();
}
if(tokens.size() == 3 && m_creator.m_warnMe) {
std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
std::cerr << "but you are using PREnc encoding which makes use of alignment data. " << std::endl;
std::cerr << "Better use -encoding None or disable this warning with -no-warnings ." << std::endl;
if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl;
std::cerr << "but you are using ";
std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
std::cerr << " encoding which makes use of alignment data. " << std::endl;
std::cerr << "Use -encoding None" << std::endl;
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
abort();
}
std::vector<float> scores = Tokenize<float>(tokens[2]);
@ -1125,18 +1132,23 @@ void EncodingTask::operator()()
std::vector<std::string> tokens;
Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
for(std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); it++)
*it = Moses::Trim(*it);
if(tokens.size() < 3) {
std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
abort();
}
if(tokens.size() == 3 && m_creator.m_coding != PhraseTableCreator::None && m_creator.m_warnMe) {
std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl;
std::cerr << "but you are using ";
std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
std::cerr << " encoding which makes use of alignment data. " << std::endl;
std::cerr << "Better use -encoding None or disable this warning with -no-warnings." << std::endl;
std::cerr << "Use -encoding None" << std::endl;
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
abort();
}
size_t ownRank = 0;

View File

@ -59,8 +59,7 @@ public:
/** deep copy */
Word(const Word &copy)
:m_isNonTerminal(copy.m_isNonTerminal)
,m_isOOV(copy.m_isOOV)
{
,m_isOOV(copy.m_isOOV) {
std::memcpy(m_factorArray, copy.m_factorArray, sizeof(FactorArray));
}