From cf55ab6678343b3245cb41f2875678bdfed961c5 Mon Sep 17 00:00:00 2001 From: nadir Date: Mon, 24 Jun 2013 12:29:33 +0100 Subject: [PATCH] OSM-Feature --- moses/FF/OSM-Feature/OpSequenceModel.cpp | 244 ++++++++ moses/FF/OSM-Feature/OpSequenceModel.h | 58 ++ moses/FF/OSM-Feature/SRILM-API.cpp | 175 ++++++ moses/FF/OSM-Feature/SRILM-API.h | 31 + moses/FF/OSM-Feature/osmHyp.cpp | 690 +++++++++++++++++++++++ moses/FF/OSM-Feature/osmHyp.h | 89 +++ 6 files changed, 1287 insertions(+) create mode 100644 moses/FF/OSM-Feature/OpSequenceModel.cpp create mode 100644 moses/FF/OSM-Feature/OpSequenceModel.h create mode 100644 moses/FF/OSM-Feature/SRILM-API.cpp create mode 100644 moses/FF/OSM-Feature/SRILM-API.h create mode 100644 moses/FF/OSM-Feature/osmHyp.cpp create mode 100644 moses/FF/OSM-Feature/osmHyp.h diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp new file mode 100644 index 000000000..269c196b4 --- /dev/null +++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp @@ -0,0 +1,244 @@ +#include +#include "OpSequenceModel.h" +#include "osmHyp.h" +#include "util/check.hh" +#include "moses/Util.h" +#include "moses/OSM-Feature/osmHyp.h" + + + +using namespace std; + +namespace Moses +{ + +OpSequenceModel::OpSequenceModel() +:StatefulFeatureFunction("OpSequenceModel", 5 ) +{ + + + + //LanguageModel = NULL; +} + +void OpSequenceModel :: readLanguageModel(const char *lmFile) +{ + + vector numbers; + int nonWordFlag = 0; + string unkOp = "_TRANS_SLF_"; + ptrOp = new Api; + ptrOp -> read_lm(lmFile,lmOrder); + numbers.push_back(ptrOp->getLMID(const_cast (unkOp.c_str()))); + unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag); + +/* + setlocale(LC_CTYPE, ""); + setlocale(LC_COLLATE, ""); + + Vocab *vocab = new Vocab; + vocab->unkIsWord() = true; // vocabulary contains unknown word tag + + LanguageModel = new Ngram( *vocab,order ); + assert(LanguageModel != 0); + // LanguageModel->debugme(0); + + File file( lmFile, "r" ); + if (!LanguageModel->read( file )) { + cerr << "format error in lm file\n"; + exit(1); + } + + file.close(); + */ +} + + +void OpSequenceModel::Load(const std::string &osmFeatureFile, const std::string &operationLM , int orderVal) +{ + // load future cost + lmOrder= orderVal; + //vector input; + ifstream sr (osmFeatureFile.c_str()); + char* tmp; + + CHECK(sr.is_open()); + + vector factorOrder; + factorOrder.push_back(0); + + string line; + while (std::getline(sr, line)) + { + std::vector tokens; + tokens = TokenizeMultiCharSeparator(line, "|||"); + CHECK(tokens.size() == 3); + + Phrase source, target; + source.CreateFromString(factorOrder, tokens[0], "|"); + target.CreateFromString(factorOrder, tokens[1], "|"); + + ParallelPhrase pp(source, target); + Scores scores = Tokenize(tokens[2], " "); + m_futureCost[pp] = scores; + // m_coll[pp] = scores; + } + + readLanguageModel(operationLM.c_str()); + +} + + + +FFState* OpSequenceModel::Evaluate( + const Hypothesis& cur_hypo, + const FFState* prev_state, + ScoreComponentCollection* accumulator) const +{ + const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase(); + const WordsBitmap &bitmap = cur_hypo.GetWordsBitmap(); + WordsBitmap myBitmap = bitmap; + const Manager &manager = cur_hypo.GetManager(); + const InputType &source = manager.GetSource(); + const Sentence &sourceSentence = static_cast(source); + osmHypothesis obj; + vector mySourcePhrase; + vector myTargetPhrase; + vector scores(5); + + + //target.GetWord(0) + + //cerr << target <<" --- "<(curr_hypo.GetManager().GetSource()); + + + const WordsRange & sourceRange = cur_hypo.GetCurrSourceWordsRange(); + int startIndex = sourceRange.GetStartPos(); + int endIndex = sourceRange.GetEndPos(); + const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm(); + osmState * statePtr; + + vector alignments; + + + + AlignmentInfo::const_iterator iter; + + for (iter = align.begin(); iter != align.end(); ++iter) { + //cerr << iter->first << "----" << iter->second << " "; + alignments.push_back(iter->first); + alignments.push_back(iter->second); + } + + + //cerr<GetString()); + // cerr<GetString()); + + } + + + //cerr<>xx; + } + */ + +/* + vector scores(5); + scores[0] = 0.343423f; + scores[1] = 1.343423f; + scores[2] = 2.343423f; + scores[3] = 3.343423f; + scores[4] = 4.343423f; + */ + + accumulator->PlusEquals(this, scores); + + return obj.saveState(); + + + + + //return statePtr; + // return NULL; +} + +FFState* OpSequenceModel::EvaluateChart( + const ChartHypothesis& /* cur_hypo */, + int /* featureID - used to index the state in the previous hypotheses */, + ScoreComponentCollection* accumulator) const +{ + abort(); + +} + +const FFState* OpSequenceModel::EmptyHypothesisState(const InputType &input) const +{ + cerr << "OpSequenceModel::EmptyHypothesisState()" << endl; + return new osmState(); +} + +std::string OpSequenceModel::GetScoreProducerWeightShortName(unsigned idx) const +{ + return "osm"; +} + +std::vector OpSequenceModel::GetFutureScores(const Phrase &source, const Phrase &target) const +{ + ParallelPhrase pp(source, target); + std::map::const_iterator iter; + iter = m_futureCost.find(pp); + //iter = m_coll.find(pp); + if (iter == m_futureCost.end()) { + vector scores(5, 0); + scores[0] = unkOpProb; + return scores; + } + else { + const vector &scores = iter->second; + return scores; + } +} + +} // namespace diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h new file mode 100644 index 000000000..8c87988f3 --- /dev/null +++ b/moses/FF/OSM-Feature/OpSequenceModel.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include +#include +#include "moses/FeatureFunction.h" +#include "Ngram.h" +#include "moses/Manager.h" +#include "moses/OSM-Feature/osmHyp.h" +#include "moses/OSM-Feature/SRILM-API.h" + +namespace Moses +{ + +class OpSequenceModel : public StatefulFeatureFunction +{ +public: + + //LM *LanguageModel; + Api * ptrOp; + int lmOrder; + float unkOpProb; + + OpSequenceModel(); + + void readLanguageModel(const char *); + void Load(const std::string &osmFeatureFile, const std::string &operationLM , int orderVal); + + FFState* Evaluate( + const Hypothesis& cur_hypo, + const FFState* prev_state, + ScoreComponentCollection* accumulator) const; + + virtual FFState* EvaluateChart( + const ChartHypothesis& /* cur_hypo */, + int /* featureID - used to index the state in the previous hypotheses */, + ScoreComponentCollection* accumulator) const; + + virtual const FFState* EmptyHypothesisState(const InputType &input) const; + + virtual std::string GetScoreProducerWeightShortName(unsigned idx=0) const; + + std::vector GetFutureScores(const Phrase &source, const Phrase &target) const; + +protected: + typedef std::pair ParallelPhrase; + typedef std::vector Scores; + std::map m_futureCost; + + std::vector < std::pair < std::set , std::set > > ceptsInPhrase; + std::set targetNullWords; + + + +}; + + +} // namespace diff --git a/moses/FF/OSM-Feature/SRILM-API.cpp b/moses/FF/OSM-Feature/SRILM-API.cpp new file mode 100644 index 000000000..009bd6fc5 --- /dev/null +++ b/moses/FF/OSM-Feature/SRILM-API.cpp @@ -0,0 +1,175 @@ +#include "SRILM-API.h" +#include "Ngram.h" + + +Api :: Api() +{ + LanguageModel = NULL; +} + +Api :: ~Api() +{ + delete LanguageModel; +} + +int Api :: getLMID(char* toBeChecked) +{ + + VocabString words[11]; + unsigned len = LanguageModel->vocab.parseWords(toBeChecked, words, 10); + + if (len < 1) { + cerr << "Error: in input file!\n"; + exit(1); + } + + VocabString last = words[len-1]; + VocabIndex index = LanguageModel->vocab.getIndex(last,LanguageModel->vocab.unkIndex()); + + return index; +} + +double Api :: contextProbN (vector numbers, int & nonWordFlag) +{ + + VocabIndex wordIndex[11]; + VocabIndex last = numbers[numbers.size()-1]; + + int c = 0; + //cout<=0; i--) + { + //cout<wordProb(last,wordIndex)); + + return LanguageModel->wordProb(last,wordIndex); + +} + +unsigned Api :: backOffLength (vector numbers) +{ + + VocabIndex wordIndex[11]; + VocabIndex last = numbers[numbers.size()-1]; + unsigned length = 0; + + int c = 0; + //cout<=0; i--) + { + //cout<wordProb(last,wordIndex)); + LanguageModel->contextID(last,wordIndex,length); + return length; + +} + +double Api :: contextProb (char * toBeChecked, int & nonWordFlag) +{ + + + //read_lm(languageModel,order); + VocabString words[11]; + + unsigned len = LanguageModel->vocab.parseWords(toBeChecked, words, 10); + + if (len < 1) { + cerr << "Error: in input file!\n"; + exit(1); + } + + + VocabString last = words[len-1]; + + words[len-1] = 0; + // reverse N-gram prefix to obtain context + + VocabIndex index = LanguageModel->vocab.getIndex(last); + + + if(index == Vocab_None) + { + nonWordFlag=1; + + } + + LanguageModel->vocab.reverse( words ); + + // double cost= pow(10,lm_logprobContext(last, words )); + double cost= lm_logprobContext(last, words); + + return cost; + +} + +double Api :: sentProb (char * toBeChecked) +{ + + + //read_lm(languageModel,order); + VocabString sentence[15]; + unsigned len = LanguageModel->vocab.parseWords(toBeChecked, sentence, 15); + + + if (len < 1) + { + cerr << "Error: in input file!\n"; + exit(1); + } + + //printf("%lf\n", exp(lm_logprobSent(sentence))); + //cout<unkIsWord() = true; /* vocabulary contains unknown word tag */ + + LanguageModel = new Ngram( *vocab,order ); + assert(LanguageModel != 0); + // LanguageModel->debugme(0); + + File file( lmFile, "r" ); + if (!LanguageModel->read( file )) { + cerr << "format error in lm file\n"; + exit(1); + } + + file.close(); + + +} + +float Api :: lm_logprobSent( const VocabString *sentence ) + +{ + TextStats obj; + return LanguageModel->sentenceProb(sentence, obj); +} + + +float Api :: lm_logprobContext( const VocabString word, const VocabString *context ) +{ + return LanguageModel->wordProb( word, context ); +} + + diff --git a/moses/FF/OSM-Feature/SRILM-API.h b/moses/FF/OSM-Feature/SRILM-API.h new file mode 100644 index 000000000..b35285919 --- /dev/null +++ b/moses/FF/OSM-Feature/SRILM-API.h @@ -0,0 +1,31 @@ +#pragma once + +#include "Ngram.h" +#include + +using namespace std; + +class Api +{ + + public: + + Api(); + ~Api(); + void read_lm(const char *,int); + float lm_logprobContext( const VocabString word, const VocabString *context ); + float lm_logprobSent( const VocabString *sentence ); + double contextProb(char *, int & ); + double contextProbN (std::vector , int &); + unsigned backOffLength (std::vector ); + + double sentProb(char *) ; + int getLMID(char *); + + private : + + LM *LanguageModel; + +}; + + diff --git a/moses/FF/OSM-Feature/osmHyp.cpp b/moses/FF/OSM-Feature/osmHyp.cpp new file mode 100644 index 000000000..b801f544c --- /dev/null +++ b/moses/FF/OSM-Feature/osmHyp.cpp @@ -0,0 +1,690 @@ +#include "osmHyp.h" +#include + +namespace Moses +{ +osmState::osmState() +:j(0) +,E(0) +{ + history.push_back(""); +} + +void osmState::saveState(int jVal, int eVal, vector & histVal , map & gapVal) +{ + history.clear(); + gap.clear(); + gap = gapVal; + history = histVal; + j = jVal; + E = eVal; +} + +int osmState::Compare(const FFState& otherBase) const +{ + const osmState &other = static_cast(otherBase); + if (j != other.j) + return (j < other.j) ? -1 : +1; + if (E != other.E) + return (E < other.E) ? -1 : +1; + if (gap != other.gap) + return (gap < other.gap) ? -1 : +1; + if (history != other.history) + return (history < other.history) ? -1 : +1; + + return 0; +} + +void osmState :: print() const +{ + + for (int i = 0; i< delHistory.size(); i++) + { + cerr< (prev_state)->getJ(); + E = static_cast (prev_state)->getE(); + history = static_cast (prev_state)->getHistory(); + gap = static_cast (prev_state)->getGap(); + + } +} + +osmState * osmHypothesis :: saveState() +{ + + osmState * statePtr = new osmState; + statePtr->saveState(j,E,history,gap); + statePtr->saveDelHistory(operations); + return statePtr; +} + +int osmHypothesis :: isTranslationOperation(int x) +{ + if (operations[x].find("_JMP_BCK_") != -1) + return 0; + + if (operations[x].find("_JMP_FWD_") != -1) + return 0; + + if (operations[x].find("_CONT_CEPT_") != -1) + return 0; + + if (operations[x].find("_INS_GAP_") != -1) + return 0; + + return 1; + +} + +void osmHypothesis :: removeReorderingOperations() +{ + gapCount = 0; + deletionCount = 0; + openGapCount = 0; + gapWidth = 0; + //cout<<"I came here"< tupleSequence; + + for (int x = 0; x < operations.size(); x++) + { + // cout< numbers; + vector context; + int nonWordFlag = 0; + double temp; + + for (int i=0; i< operations.size(); i++) + numbers.push_back(ptrOp.getLMID(const_cast (operations[i].c_str()))); + + // cerr<<"History Of Operations "< (history[i].c_str()))); + //cerr< order) + { + context.erase(context.begin()); + history.erase(history.begin()); + } + + temp = ptrOp.contextProbN(context,nonWordFlag); + opProb = opProb + temp; + + //cout< order-1) + { + history.erase(history.begin()); + } + +} + + +int osmHypothesis :: firstOpenGap(vector & coverageVector) +{ + + int firstOG =-1; + + for(int nd = 0; nd < coverageVector.size(); nd++) + { + if(coverageVector[nd]==0) + { + firstOG = nd; + return firstOG; + } + } + + return firstOG; + +} + +string osmHypothesis :: intToString(int num) +{ + + std::ostringstream stm; + stm< & targetNullWords , vector & currF) +{ + + int gFlag = 0; + int gp = 0; + int ans; + + + if ( j < j1) // j1 is the index of the source word we are about to generate ... + { + //if(coverageVector[j]==0) // if source word at j is not generated yet ... + if(coverageVector.GetValue(j)==0) // if source word at j is not generated yet ... + { + operations.push_back("_INS_GAP_"); + gFlag++; + gap[j]="Unfilled"; + } + if (j == E) + { + j = j1; + } + else + { + operations.push_back("_JMP_FWD_"); + j=E; + } + } + + if (j1 < j) + { + // if(j < E && coverageVector[j]==0) + if(j < E && coverageVector.GetValue(j)==0) + { + operations.push_back("_INS_GAP_"); + gFlag++; + gap[j]="Unfilled"; + } + + j=closestGap(gap,j1,gp); + operations.push_back("_JMP_BCK_"+ intToString(gp)); + + //cout<<"I am j "< 0) + gapCount++; + + openGapCount += getOpenGaps(); + + //if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end()) + if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end()) + { + j1 = j; + german = currF[j1-startIndex]; + english = "_INS_"; + generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF); + } + + //print(); +} + +void osmHypothesis :: print() +{ + for (int i = 0; i< operations.size(); i++) + { + cerr< gap, int j1, int & gp) +{ + + int dist=1172; + int value=-1; + int temp=0; + gp=0; + int opGap=0; + + map :: iterator iter; + + iter=gap.end(); + + do + { + iter--; + //cout<<"Trapped "<first<first==j1 && iter->second== "Unfilled") + { + opGap++; + gp = opGap; + return j1; + + } + + if(iter->second =="Unfilled") + { + opGap++; + temp = iter->first - j1; + + if(temp<0) + temp=temp * -1; + + if(dist>temp && iter->first < j1) + { + dist=temp; + value=iter->first; + gp=opGap; + } + } + + + } + while(iter!=gap.begin()); + + return value; +} + + + +int osmHypothesis :: getOpenGaps() +{ + map :: iterator iter; + + int nd = 0; + for (iter = gap.begin(); iter!=gap.end(); iter++) + { + if(iter->second == "Unfilled") + nd++; + } + + return nd; + +} + +void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set doneTargetIndexes) +{ + + operations.push_back("_DEL_" + english); + currTargetIndex++; + + while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end()) + { + currTargetIndex++; + } + + if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end()) + { + english = currE[currTargetIndex]; + generateDeleteOperations(english,currTargetIndex,doneTargetIndexes); + } + +} + +void osmHypothesis :: computeOSMFeature(int startIndex , WordsBitmap & coverageVector , Api & ptrOp, int order) +{ + + set doneTargetIndexes; + set eSide; + set fSide; + set :: iterator iter; + string english; + string source; + int j1; + int start = 0; + int targetIndex = 0; + doneTargetIndexes.clear(); + + + if (targetNullWords.size() != 0) // Source words to be deleted in the start of this phrase ... + { + iter = targetNullWords.begin(); + + if (*iter == startIndex) + { + + j1 = startIndex; + source = currF[j1-startIndex]; + english = "_INS_"; + generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF); + } + } + + if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) // first word has to be deleted ... + { + english = currE[targetIndex]; + generateDeleteOperations(english,targetIndex, doneTargetIndexes); + } + + + for (int i = 0; i < ceptsInPhrase.size(); i++) + { + source = ""; + english = ""; + + fSide = ceptsInPhrase[i].first; + eSide = ceptsInPhrase[i].second; + + iter = eSide.begin(); + targetIndex = *iter; + english += currE[*iter]; + iter++; + + for (; iter != eSide.end(); iter++) + { + if(*iter == targetIndex+1) + targetIndex++; + else + doneTargetIndexes.insert(*iter); + + english += "^_^"; + english += currE[*iter]; + } + + iter = fSide.begin(); + source += currF[*iter]; + iter++; + + for (; iter != fSide.end(); iter++) + { + source += "^_^"; + source += currF[*iter]; + } + + iter = fSide.begin(); + j1 = *iter + startIndex; + iter++; + + generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF); + + + for (; iter != fSide.end(); iter++) + { + j1 = *iter + startIndex; + generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF); + } + + targetIndex++; // Check whether the next target word is unaligned ... + + while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end()) + { + targetIndex++; + } + + if(sourceNullWords.find(targetIndex) != sourceNullWords.end()) + { + english = currE[targetIndex]; + generateDeleteOperations(english,targetIndex, doneTargetIndexes); + } + } + + //removeReorderingOperations(); + calculateOSMProb(ptrOp, order); + //print(); + +} + +void osmHypothesis :: getMeCepts ( set & eSide , set & fSide , map > & tS , map > & sT) +{ + set :: iterator iter; + + int sz = eSide.size(); + vector t; + + for (iter = eSide.begin(); iter != eSide.end(); iter++) + { + t = tS[*iter]; + + for (int i = 0; i < t.size(); i++) + { + fSide.insert(t[i]); + } + + } + + for (iter = fSide.begin(); iter != fSide.end(); iter++) + { + + t = sT[*iter]; + + for (int i = 0 ; i sz) + { + getMeCepts(eSide,fSide,tS,sT); + } + +} + +void osmHypothesis :: constructCepts(vector & align , int startIndex , int endIndex, int targetPhraseLength) +{ + + std::map > sT; + std::map > tS; + std::set eSide; + std::set fSide; + std::set :: iterator iter; + std :: map > :: iterator iter2; + std :: pair < set , set > cept; + int src; + int tgt; + + + for (int i = 0; i < align.size(); i+=2) + { + src = align[i]; + tgt = align[i+1]; + tS[tgt].push_back(src); + sT[src].push_back(tgt); + } + + for (int i = startIndex; i<= endIndex; i++) // What are unaligned source words in this phrase ... + { + if (sT.find(i-startIndex) == sT.end()) + { + targetNullWords.insert(i); + } + } + + for (int i = 0; i < targetPhraseLength; i++) // What are unaligned target words in this phrase ... + { + if (tS.find(i) == tS.end()) + { + sourceNullWords.insert(i); + } + } + + + while (tS.size() != 0 && sT.size() != 0) + { + + iter2 = tS.begin(); + + eSide.clear(); + fSide.clear(); + eSide.insert (iter2->first); + + getMeCepts(eSide, fSide, tS , sT); + + for (iter = eSide.begin(); iter != eSide.end(); iter++) + { + iter2 = tS.find(*iter); + tS.erase(iter2); + } + + for (iter = fSide.begin(); iter != fSide.end(); iter++) + { + iter2 = sT.find(*iter); + sT.erase(iter2); + } + + cept = make_pair (fSide , eSide); + ceptsInPhrase.push_back(cept); + } + + + +/* + + cerr<<"Extracted Cepts "< "; + + for (iter = fSide.begin(); iter != fSide.end(); iter++) + { + cerr<<*iter<<" "; + } + + cerr<"<"< & scores) +{ + scores.clear(); + scores.push_back(opProb); + scores.push_back(gapWidth); + scores.push_back(gapCount); + scores.push_back(openGapCount); + scores.push_back(deletionCount); +} + + +} // namespace + diff --git a/moses/FF/OSM-Feature/osmHyp.h b/moses/FF/OSM-Feature/osmHyp.h new file mode 100644 index 000000000..8b98acd38 --- /dev/null +++ b/moses/FF/OSM-Feature/osmHyp.h @@ -0,0 +1,89 @@ +#pragma once + +# include "SRILM-API.h" +# include "moses/FFState.h" +# include "moses/Manager.h" +# include +# include +# include +# include + +using namespace std; + +namespace Moses +{ + +class osmState : public FFState +{ +public: + osmState(); + int Compare(const FFState& other) const; + void saveState(int jVal, int eVal, vector & hist , map & gapVal); + int getJ()const {return j;} + int getE()const {return E;} + map getGap() const { return gap;} + vector getHistory()const {return history;} + void print() const; + std::string getName() const; + void saveDelHistory(vector & histVal){delHistory = histVal;} + +protected: + int j, E; + std::map gap; + std::vector history; + std::vector delHistory; +}; + +class osmHypothesis +{ + + private: + + std::vector history; + std::vector operations; // List of operations required to generated this hyp ... + std::map gap; // Maintains gap history ... + int j; // Position after the last source word generated ... + int E; // Position after the right most source word so far generated ... + + int gapCount; // Number of gaps inserted ... + int deletionCount; + int openGapCount; + int gapWidth; + double opProb; + + vector currE; + vector currF; + vector < pair < set , set > > ceptsInPhrase; + set targetNullWords; + set sourceNullWords; + + int closestGap(std::map gap,int j1, int & gp); + int firstOpenGap(std::vector & coverageVector); + std::string intToString(int); + int getOpenGaps(); + int isTranslationOperation(int j); + void removeReorderingOperations(); + + void getMeCepts ( set & eSide , set & fSide , map > & tS , map > & sT); + + public: + + osmHypothesis(); + ~osmHypothesis(){}; + void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set & targetNullWords , std::vector & currF); + void generateDeleteOperations(std::string english, int currTargetIndex, std::set doneTargetIndexes); + void calculateOSMProb(Api & opPtr , int order); + void computeOSMFeature(int startIndex , WordsBitmap & coverageVector , Api & ptrOp, int order); + void constructCepts(vector & align , int startIndex , int endIndex, int targetPhraseLength); + void setPhrases(vector & val1 , vector & val2){currF = val1; currE = val2;} + void setState(const FFState* prev_state); + osmState * saveState(); + void print(); + void populateScores(vector & scores); + +}; + +} // namespace + + +