mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
KenLM
This commit is contained in:
parent
493d713075
commit
537210b010
@ -14,7 +14,7 @@ namespace Moses
|
||||
OpSequenceModel::OpSequenceModel(const std::string &line)
|
||||
:StatefulFeatureFunction("OpSequenceModel", 5, line )
|
||||
{
|
||||
myLine = line;
|
||||
//myLine = line;
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
@ -31,6 +31,9 @@ void OpSequenceModel :: readLanguageModel(const char *lmFile)
|
||||
|
||||
// Code to load KenLM
|
||||
|
||||
OSM = new Model(m_lmPath.c_str());
|
||||
|
||||
/*
|
||||
vector<string> toks = Tokenize(myLine);
|
||||
myLine = "factor=0";
|
||||
|
||||
@ -41,7 +44,7 @@ void OpSequenceModel :: readLanguageModel(const char *lmFile)
|
||||
}
|
||||
|
||||
cout<<myLine<<endl;
|
||||
OSM = ConstructKenLM("KENLM", myLine);
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
@ -162,6 +165,7 @@ FFState* OpSequenceModel::Evaluate(
|
||||
obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
|
||||
obj.setPhrases(mySourcePhrase , myTargetPhrase);
|
||||
obj.computeOSMFeature(startIndex,myBitmap,*ptrOp,lmOrder);
|
||||
obj.calculateOSMProb(*OSM);
|
||||
obj.populateScores(scores);
|
||||
|
||||
/*
|
||||
@ -208,7 +212,10 @@ FFState* OpSequenceModel::EvaluateChart(
|
||||
const FFState* OpSequenceModel::EmptyHypothesisState(const InputType &input) const
|
||||
{
|
||||
cerr << "OpSequenceModel::EmptyHypothesisState()" << endl;
|
||||
return new osmState();
|
||||
|
||||
State startState = OSM->BeginSentenceState();
|
||||
|
||||
return new osmState(startState);
|
||||
}
|
||||
|
||||
std::string OpSequenceModel::GetScoreProducerWeightShortName(unsigned idx) const
|
||||
|
@ -7,18 +7,22 @@
|
||||
#include "moses/Manager.h"
|
||||
#include "moses/FF/OSM-Feature/osmHyp.h"
|
||||
#include "moses/FF/OSM-Feature/SRILM-API.h"
|
||||
#include "moses/LM/Ken.h"
|
||||
#include "lm/model.hh"
|
||||
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
using namespace lm::ngram;
|
||||
|
||||
class OpSequenceModel : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
|
||||
//LM *LanguageModel;
|
||||
|
||||
LanguageModel * OSM;
|
||||
Model * OSM;
|
||||
|
||||
Api * ptrOp;
|
||||
int lmOrder;
|
||||
float unkOpProb;
|
||||
|
@ -3,11 +3,13 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
osmState::osmState()
|
||||
osmState::osmState(const State & val)
|
||||
:j(0)
|
||||
,E(0)
|
||||
{
|
||||
history.push_back("<s>");
|
||||
lmState = val;
|
||||
|
||||
}
|
||||
|
||||
void osmState::saveState(int jVal, int eVal, vector <string> & histVal , map <int , string> & gapVal)
|
||||
@ -32,6 +34,10 @@ int osmState::Compare(const FFState& otherBase) const
|
||||
if (history != other.history)
|
||||
return (history < other.history) ? -1 : +1;
|
||||
|
||||
if (lmState.length < other.lmState.length) return -1;
|
||||
|
||||
if (lmState.length > other.lmState.length) return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -74,19 +80,18 @@ void osmHypothesis :: setState(const FFState* prev_state)
|
||||
if(prev_state != NULL)
|
||||
{
|
||||
|
||||
|
||||
j = static_cast <const osmState *> (prev_state)->getJ();
|
||||
E = static_cast <const osmState *> (prev_state)->getE();
|
||||
history = static_cast <const osmState *> (prev_state)->getHistory();
|
||||
gap = static_cast <const osmState *> (prev_state)->getGap();
|
||||
|
||||
lmState = static_cast <const osmState *> (prev_state)->getLMState();
|
||||
}
|
||||
}
|
||||
|
||||
osmState * osmHypothesis :: saveState()
|
||||
{
|
||||
|
||||
osmState * statePtr = new osmState;
|
||||
osmState * statePtr = new osmState(lmState);
|
||||
statePtr->saveState(j,E,history,gap);
|
||||
statePtr->saveDelHistory(operations);
|
||||
return statePtr;
|
||||
@ -135,6 +140,28 @@ void osmHypothesis :: removeReorderingOperations()
|
||||
operations = tupleSequence;
|
||||
}
|
||||
|
||||
void osmHypothesis :: calculateOSMProb(Model & ptrOp)
|
||||
{
|
||||
|
||||
cout<<"SRILM "<<opProb<<endl;
|
||||
|
||||
opProb = 0;
|
||||
State currState = lmState;
|
||||
State temp;
|
||||
|
||||
for (int i = 0; i<operations.size(); i++)
|
||||
{
|
||||
temp = currState;
|
||||
opProb += ptrOp.Score(temp,ptrOp.GetVocabulary().Index(operations[i]),currState);
|
||||
}
|
||||
|
||||
lmState = currState;
|
||||
|
||||
cout<<"Ken LM "<<opProb<<endl;
|
||||
|
||||
|
||||
}
|
||||
|
||||
void osmHypothesis :: calculateOSMProb(Api & ptrOp , int order)
|
||||
{
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
# include "SRILM-API.h"
|
||||
#include "lm/model.hh"
|
||||
# include "moses/FF/FFState.h"
|
||||
# include "moses/Manager.h"
|
||||
# include <set>
|
||||
@ -13,14 +14,17 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
using namespace lm::ngram;
|
||||
|
||||
class osmState : public FFState
|
||||
{
|
||||
public:
|
||||
osmState();
|
||||
osmState(const State & val);
|
||||
int Compare(const FFState& other) const;
|
||||
void saveState(int jVal, int eVal, vector <string> & hist , map <int , string> & gapVal);
|
||||
int getJ()const {return j;}
|
||||
int getE()const {return E;}
|
||||
State getLMState() const {return lmState;}
|
||||
map <int , string> getGap() const { return gap;}
|
||||
vector <string> getHistory()const {return history;}
|
||||
void print() const;
|
||||
@ -32,6 +36,7 @@ protected:
|
||||
std::map <int,std::string> gap;
|
||||
std::vector <std::string> history;
|
||||
std::vector <std::string> delHistory;
|
||||
State lmState;
|
||||
};
|
||||
|
||||
class osmHypothesis
|
||||
@ -44,6 +49,7 @@ class osmHypothesis
|
||||
std::map <int,std::string> gap; // Maintains gap history ...
|
||||
int j; // Position after the last source word generated ...
|
||||
int E; // Position after the right most source word so far generated ...
|
||||
State lmState; // KenLM's Model State ...
|
||||
|
||||
int gapCount; // Number of gaps inserted ...
|
||||
int deletionCount;
|
||||
@ -73,7 +79,9 @@ class osmHypothesis
|
||||
void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
|
||||
void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
|
||||
void calculateOSMProb(Api & opPtr , int order);
|
||||
void calculateOSMProb(Model & ptrOp);
|
||||
void computeOSMFeature(int startIndex , WordsBitmap & coverageVector , Api & ptrOp, int order);
|
||||
|
||||
void constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
|
||||
void setPhrases(vector <string> & val1 , vector <string> & val2){currF = val1; currE = val2;}
|
||||
void setState(const FFState* prev_state);
|
||||
|
Loading…
Reference in New Issue
Block a user