OSM-Feature

2024-09-20 15:48:05 +03:00 · 2013-06-24 12:29:33 +01:00 · 2013-06-24 12:29:33 +01:00 · cf55ab6678
commit cf55ab6678
parent bd95c2ccfe
6 changed files with 1287 additions and 0 deletions
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@ -0,0 +1,244 @@
+#include <fstream>
+#include "OpSequenceModel.h"
+#include "osmHyp.h"
+#include "util/check.hh"
+#include "moses/Util.h"
+#include "moses/OSM-Feature/osmHyp.h"
+
+
+
+using namespace std;
+
+namespace Moses
+{
+
+OpSequenceModel::OpSequenceModel()
+:StatefulFeatureFunction("OpSequenceModel", 5 )
+{
+
+
+
+	//LanguageModel = NULL;
+}
+
+void OpSequenceModel :: readLanguageModel(const char *lmFile)
+{
+
+    vector <int> numbers;
+    int nonWordFlag = 0;
+    string unkOp = "_TRANS_SLF_";
+	ptrOp = new Api;
+	ptrOp -> read_lm(lmFile,lmOrder);
+	numbers.push_back(ptrOp->getLMID(const_cast <char *> (unkOp.c_str())));
+	unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag);
+
+/*
+  setlocale(LC_CTYPE, "");
+  setlocale(LC_COLLATE, "");
+
+  Vocab *vocab = new Vocab;
+   vocab->unkIsWord() = true; // vocabulary contains unknown word tag
+
+  LanguageModel = new Ngram( *vocab,order );
+  assert(LanguageModel != 0);
+  // LanguageModel->debugme(0);
+
+  File file( lmFile, "r" );
+  if (!LanguageModel->read( file )) {
+    cerr << "format error in lm file\n";
+    exit(1);
+  }
+
+  file.close();
+  */
+}
+
+
+void OpSequenceModel::Load(const std::string &osmFeatureFile, const std::string &operationLM , int orderVal)
+{
+  // load future cost
+  lmOrder= orderVal;
+  //vector <string> input;
+  ifstream sr (osmFeatureFile.c_str());
+  char* tmp;
+
+  CHECK(sr.is_open());
+
+  vector<FactorType> factorOrder;
+  factorOrder.push_back(0);
+
+  string line;
+  while (std::getline(sr, line))
+  {
+    std::vector<std::string> tokens;
+    tokens = TokenizeMultiCharSeparator(line, "|||");
+    CHECK(tokens.size() == 3);
+
+    Phrase source, target;
+    source.CreateFromString(factorOrder, tokens[0], "|");
+    target.CreateFromString(factorOrder, tokens[1], "|");
+
+    ParallelPhrase pp(source, target);
+    Scores scores = Tokenize<float>(tokens[2], " ");
+    m_futureCost[pp] = scores;
+   // m_coll[pp] = scores;
+  }
+
+  readLanguageModel(operationLM.c_str());
+
+}
+
+
+
+FFState* OpSequenceModel::Evaluate(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const
+{
+  const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase();
+  const WordsBitmap &bitmap = cur_hypo.GetWordsBitmap();
+  WordsBitmap myBitmap = bitmap;
+  const Manager &manager = cur_hypo.GetManager();
+  const InputType &source = manager.GetSource();
+  const Sentence &sourceSentence = static_cast<const Sentence&>(source);
+  osmHypothesis obj;
+  vector <string> mySourcePhrase;
+  vector <string> myTargetPhrase;
+  vector<float> scores(5);
+
+
+  //target.GetWord(0)
+
+  //cerr << target <<" --- "<<target.GetSourcePhrase()<< endl;  // English ...
+
+  //cerr << align << endl;   // Alignments ...
+  //cerr << cur_hypo.GetCurrSourceWordsRange() << endl;
+
+  //cerr << source <<endl;
+
+ // int a = sourceRange.GetStartPos();
+ // cerr << source.GetWord(a);
+  //cerr <<a<<endl;
+
+  //const Sentence &sentence = static_cast<const Sentence&>(curr_hypo.GetManager().GetSource());
+
+
+   const WordsRange & sourceRange = cur_hypo.GetCurrSourceWordsRange();
+   int startIndex  = sourceRange.GetStartPos();
+   int endIndex = sourceRange.GetEndPos();
+   const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
+   osmState * statePtr;
+
+   vector <int> alignments;
+
+
+
+   AlignmentInfo::const_iterator iter;
+
+      for (iter = align.begin(); iter != align.end(); ++iter) {
+        //cerr << iter->first << "----" << iter->second << " ";
+    	 alignments.push_back(iter->first);
+    	 alignments.push_back(iter->second);
+      }
+
+
+   //cerr<<bitmap<<endl;
+   //cerr<<startIndex<<" "<<endIndex<<endl;
+
+
+  for (int i = startIndex; i <= endIndex; i++)
+  {
+	  myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
+	 mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString());
+	 // cerr<<mySourcePhrase[i]<<endl;
+  }
+
+  for (int i = 0; i < target.GetSize(); i++)
+  {
+
+	  if (target.GetWord(i).IsOOV())
+		  myTargetPhrase.push_back("_TRANS_SLF_");
+	  else
+		  myTargetPhrase.push_back(target.GetWord(i).GetFactor(0)->GetString());
+
+  }
+
+ 
+  //cerr<<myBitmap<<endl;
+
+  obj.setState(prev_state);
+  obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
+  obj.setPhrases(mySourcePhrase , myTargetPhrase);
+  obj.computeOSMFeature(startIndex,myBitmap,*ptrOp,lmOrder);
+  obj.populateScores(scores);
+
+/*
+  if (bitmap.GetFirstGapPos() == NOT_FOUND)
+  {
+
+    int xx;
+	 cerr<<bitmap<<endl;
+	 int a = bitmap.GetFirstGapPos();
+	 obj.print();
+    cin>>xx;
+  }
+  */
+
+/*
+  vector<float> scores(5);
+  scores[0] = 0.343423f;
+  scores[1] = 1.343423f;
+  scores[2] = 2.343423f;
+  scores[3] = 3.343423f;
+  scores[4] = 4.343423f;
+  */
+
+  accumulator->PlusEquals(this, scores);
+
+  return obj.saveState();
+
+
+
+
+  //return statePtr;
+ // return NULL;
+}
+
+FFState* OpSequenceModel::EvaluateChart(
+  const ChartHypothesis& /* cur_hypo */,
+  int /* featureID - used to index the state in the previous hypotheses */,
+  ScoreComponentCollection* accumulator) const
+{
+  abort();
+
+}
+
+const FFState* OpSequenceModel::EmptyHypothesisState(const InputType &input) const
+{
+  cerr << "OpSequenceModel::EmptyHypothesisState()" << endl;
+  return new osmState();
+}
+
+std::string OpSequenceModel::GetScoreProducerWeightShortName(unsigned idx) const
+{
+  return "osm";
+}
+
+std::vector<float> OpSequenceModel::GetFutureScores(const Phrase &source, const Phrase &target) const
+{
+  ParallelPhrase pp(source, target);
+  std::map<ParallelPhrase, Scores>::const_iterator iter;
+  iter = m_futureCost.find(pp);
+ //iter = m_coll.find(pp);
+  if (iter == m_futureCost.end()) {
+    vector<float> scores(5, 0);
+    scores[0] = unkOpProb;
+    return scores;
+  }
+  else {
+    const vector<float> &scores = iter->second;
+	return scores;
+  }
+}
+
+} // namespace
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@ -0,0 +1,58 @@
+#pragma once
+
+#include <string>
+#include <map>
+#include <vector>
+#include "moses/FeatureFunction.h"
+#include "Ngram.h"
+#include "moses/Manager.h"
+#include "moses/OSM-Feature/osmHyp.h"
+#include "moses/OSM-Feature/SRILM-API.h"
+
+namespace Moses
+{
+
+class OpSequenceModel : public StatefulFeatureFunction
+{
+public:
+
+	//LM *LanguageModel;
+	Api * ptrOp;
+	int lmOrder;
+	float unkOpProb;
+
+	OpSequenceModel();
+
+	void readLanguageModel(const char *);
+	void Load(const std::string &osmFeatureFile, const std::string &operationLM , int orderVal);
+
+	FFState* Evaluate(
+	    const Hypothesis& cur_hypo,
+	    const FFState* prev_state,
+	    ScoreComponentCollection* accumulator) const;
+
+  virtual FFState* EvaluateChart(
+    const ChartHypothesis& /* cur_hypo */,
+    int /* featureID - used to index the state in the previous hypotheses */,
+    ScoreComponentCollection* accumulator) const;
+
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const;
+
+  virtual std::string GetScoreProducerWeightShortName(unsigned idx=0) const;
+
+  std::vector<float> GetFutureScores(const Phrase &source, const Phrase &target) const;
+
+protected:
+	typedef std::pair<Phrase, Phrase> ParallelPhrase;
+	typedef std::vector<float> Scores;
+	std::map<ParallelPhrase, Scores> m_futureCost;
+
+	std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
+	std::set <int> targetNullWords;
+
+
+
+};
+
+
+} // namespace
--- a/moses/FF/OSM-Feature/SRILM-API.cpp
+++ b/moses/FF/OSM-Feature/SRILM-API.cpp
@ -0,0 +1,175 @@
+#include "SRILM-API.h"
+#include "Ngram.h"
+
+
+Api :: Api()
+{
+	LanguageModel = NULL;
+}
+
+Api :: ~Api()
+{
+	delete LanguageModel;
+}
+
+int Api :: getLMID(char* toBeChecked)
+{
+
+    VocabString words[11];
+    unsigned len = LanguageModel->vocab.parseWords(toBeChecked, words, 10);
+
+    if (len < 1) {
+      cerr << "Error: in input file!\n";
+      exit(1);
+    }
+
+    VocabString last = words[len-1];
+    VocabIndex index = LanguageModel->vocab.getIndex(last,LanguageModel->vocab.unkIndex());
+	
+    return index;	
+}
+
+double Api :: contextProbN (vector <int> numbers, int & nonWordFlag)
+{
+
+	VocabIndex wordIndex[11];
+	VocabIndex last = numbers[numbers.size()-1];	
+	
+	int c = 0;
+	//cout<<last<<endl;
+	for(int i = numbers.size()-2; i>=0; i--)
+	{
+	  //cout<<numbers[i]<<endl;
+	  wordIndex[c] = numbers[i];
+	  c++;
+	}
+
+	wordIndex[c]=Vocab_None;
+
+	//return pow(10,LanguageModel->wordProb(last,wordIndex));
+	
+	return LanguageModel->wordProb(last,wordIndex);
+	
+}
+
+unsigned Api :: backOffLength (vector <int> numbers)
+{
+
+	VocabIndex wordIndex[11];
+	VocabIndex last = numbers[numbers.size()-1];	
+	unsigned length = 0;
+	
+	int c = 0;
+	//cout<<last<<endl;
+	for(int i = numbers.size()-2; i>=0; i--)
+	{
+	  //cout<<numbers[i]<<endl;
+	  wordIndex[c] = numbers[i];
+	  c++;
+	}
+
+	wordIndex[c]=Vocab_None;
+
+	//return pow(10,LanguageModel->wordProb(last,wordIndex));
+	LanguageModel->contextID(last,wordIndex,length);
+	return length;
+	
+}
+
+double Api :: contextProb (char * toBeChecked, int & nonWordFlag)
+{
+
+
+  //read_lm(languageModel,order);
+  VocabString words[11];
+
+    unsigned len = LanguageModel->vocab.parseWords(toBeChecked, words, 10);
+
+    if (len < 1) {
+      cerr << "Error: in input file!\n";
+      exit(1);
+    }
+
+
+    VocabString last = words[len-1];
+
+    words[len-1] = 0;
+    // reverse N-gram prefix to obtain context
+
+    VocabIndex index = LanguageModel->vocab.getIndex(last);	
+
+
+	if(index == Vocab_None)
+	{
+		nonWordFlag=1;
+		
+	}
+
+    LanguageModel->vocab.reverse( words );
+
+    // double cost= pow(10,lm_logprobContext(last, words ));
+	double cost= lm_logprobContext(last, words);
+
+   return cost;
+
+}
+
+double Api :: sentProb (char * toBeChecked)
+{
+
+	
+	//read_lm(languageModel,order);
+	VocabString sentence[15];
+	unsigned len = LanguageModel->vocab.parseWords(toBeChecked, sentence, 15);
+	
+	
+	if (len < 1) 
+	{
+      		cerr << "Error: in input file!\n";
+      		exit(1);
+    	}
+	
+	//printf("%lf\n", exp(lm_logprobSent(sentence)));
+	//cout<<lm_logprobSent(sentence)<<endl;
+	return pow(10,lm_logprobSent(sentence));
+}	
+
+void Api :: read_lm(const char *lmFile,int order)
+{
+	
+
+  setlocale(LC_CTYPE, "");
+  setlocale(LC_COLLATE, "");
+
+  Vocab *vocab = new Vocab;
+   vocab->unkIsWord() = true; /* vocabulary contains unknown word tag */
+
+  LanguageModel = new Ngram( *vocab,order );
+  assert(LanguageModel != 0);
+  // LanguageModel->debugme(0);
+
+  File file( lmFile, "r" );
+  if (!LanguageModel->read( file )) {
+    cerr << "format error in lm file\n";
+    exit(1);
+  }
+
+  file.close();
+
+
+}
+
+float Api :: lm_logprobSent( const VocabString *sentence )
+
+{
+  TextStats obj;
+  return LanguageModel->sentenceProb(sentence, obj);
+}
+
+
+float Api :: lm_logprobContext( const VocabString word, const VocabString *context )
+{
+  return LanguageModel->wordProb( word, context );  
+}
+
+
--- a/moses/FF/OSM-Feature/SRILM-API.h
+++ b/moses/FF/OSM-Feature/SRILM-API.h
@ -0,0 +1,31 @@
+#pragma once
+
+#include "Ngram.h"
+#include <vector>
+
+using namespace std;
+
+class Api
+{
+
+	public:
+
+	Api();
+	~Api();
+	void read_lm(const char *,int);
+	float lm_logprobContext( const VocabString word, const VocabString *context );
+	float lm_logprobSent( const VocabString *sentence );
+	double contextProb(char *, int & );
+	double contextProbN (std::vector <int> , int &);
+	unsigned backOffLength (std::vector <int>);
+
+	double sentProb(char *) ;
+	int getLMID(char *);
+
+	private :
+		
+	LM *LanguageModel;
+
+};
+
+
--- a/moses/FF/OSM-Feature/osmHyp.cpp
+++ b/moses/FF/OSM-Feature/osmHyp.cpp
@ -0,0 +1,690 @@
+#include "osmHyp.h"
+#include <sstream>
+
+namespace Moses
+{
+osmState::osmState()
+:j(0)
+,E(0)
+{
+  history.push_back("<s>");
+}
+
+void osmState::saveState(int jVal, int eVal, vector <string> & histVal , map <int , string> & gapVal)
+{
+	history.clear();
+	gap.clear();
+	gap = gapVal;
+	history = histVal;
+	j = jVal;
+	E = eVal;
+}
+
+int osmState::Compare(const FFState& otherBase) const
+{
+  const osmState &other = static_cast<const osmState&>(otherBase);
+  if (j != other.j)
+    return (j < other.j) ? -1 : +1;
+  if (E != other.E)
+    return (E < other.E) ? -1 : +1;
+  if (gap != other.gap)
+    return (gap < other.gap) ? -1 : +1;
+  if (history != other.history)
+    return (history < other.history) ? -1 : +1;
+
+  return 0;
+}
+
+void osmState :: print() const
+{
+
+	for (int i = 0; i< delHistory.size(); i++)
+		{
+			cerr<<delHistory[i]<<" ";
+		}
+
+}
+
+std::string osmState :: getName() const
+{
+
+		print();
+		return "done";
+}
+
+//////////////////////////////////////////////////
+
+osmHypothesis :: osmHypothesis()
+{
+	opProb = 0;
+	gapWidth = 0;
+	gapCount = 0;
+	openGapCount = 0;
+	deletionCount = 0;
+	gapCount = 0;
+	j = 0;
+	E = 0;
+	history.clear();
+	gap.clear();
+}
+
+void osmHypothesis :: setState(const FFState* prev_state)
+{
+
+	if(prev_state != NULL)
+	{
+
+
+		j = static_cast <const osmState *> (prev_state)->getJ();
+		E =  static_cast <const osmState *> (prev_state)->getE();
+		history = static_cast <const osmState *> (prev_state)->getHistory();
+		gap = static_cast <const osmState *> (prev_state)->getGap();
+
+	}
+}
+
+osmState * osmHypothesis :: saveState()
+{
+
+	osmState * statePtr = new osmState;
+	statePtr->saveState(j,E,history,gap);
+	statePtr->saveDelHistory(operations);
+	return statePtr;
+}
+
+int osmHypothesis :: isTranslationOperation(int x)
+{
+	if (operations[x].find("_JMP_BCK_") != -1)
+	  return 0;
+	
+	if (operations[x].find("_JMP_FWD_") != -1)
+	  return 0;
+	
+	if (operations[x].find("_CONT_CEPT_") != -1)
+	  return 0;
+	
+	if (operations[x].find("_INS_GAP_") != -1)
+	  return 0;
+		
+	return 1;
+	
+}
+
+void osmHypothesis :: removeReorderingOperations()
+{
+	gapCount = 0; 	
+       deletionCount = 0;
+	openGapCount = 0;
+	gapWidth = 0;	
+	//cout<<"I came here"<<endl;
+
+	std::vector <std::string> tupleSequence;	
+
+	for (int x = 0; x < operations.size(); x++)
+	{
+		// cout<<operations[x]<<endl;
+
+		if(isTranslationOperation(x) == 1)
+		{
+			tupleSequence.push_back(operations[x]);
+		}
+		
+	} 
+
+	operations.clear();
+	operations = tupleSequence;
+}
+
+void osmHypothesis :: calculateOSMProb(Api & ptrOp , int order)
+{
+	
+	opProb = 0;
+	vector <int> numbers;
+	vector <int> context;
+	int nonWordFlag  = 0;
+	double temp;
+
+	for (int i=0; i< operations.size(); i++)
+		numbers.push_back(ptrOp.getLMID(const_cast <char *> (operations[i].c_str())));
+
+	// cerr<<"History Of Operations "<<history.size()<<endl;
+
+	for (int i=0; i< history.size(); i++)
+	{
+		context.push_back(ptrOp.getLMID(const_cast <char *> (history[i].c_str())));
+		//cerr<<history[i]<<" ";
+	}
+	//cerr<<endl;
+
+	for (int i = 0; i<operations.size(); i++)
+	{
+		//cerr<<operations[i]<<endl;
+		context.push_back(numbers[i]);
+		history.push_back(operations[i]);
+		//cout<<"Context Size "<<context.size()<<endl;
+		if (context.size() > order)
+		{
+			context.erase(context.begin());
+			history.erase(history.begin());
+		}
+		
+		temp = ptrOp.contextProbN(context,nonWordFlag);		   
+		opProb = opProb + temp;
+		
+		 //cout<<temp<<" "<<opProb<<endl;
+	
+	}
+
+	if (history.size() > order-1)
+	{
+	      history.erase(history.begin());
+	}
+
+}
+
+
+int osmHypothesis :: firstOpenGap(vector <int> & coverageVector)
+{
+	
+	int firstOG =-1;
+
+	for(int nd = 0; nd < coverageVector.size(); nd++)
+	{
+		if(coverageVector[nd]==0)
+		{
+		 firstOG = nd;
+		 return firstOG;
+		}
+	}
+	
+	return firstOG;	
+
+}
+
+string osmHypothesis :: intToString(int num)
+{
+ 	
+       std::ostringstream stm;
+       stm<<num;
+
+    return stm.str();
+
+}
+
+void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , WordsBitmap & coverageVector , string english , string german , set <int> & targetNullWords , vector <string> & currF)
+{
+	
+	int gFlag = 0;
+	int gp = 0; 	
+	int ans;
+	
+
+		if ( j < j1) // j1 is the index of the source word we are about to generate ...
+		{
+			//if(coverageVector[j]==0) // if source word at j is not generated yet ...
+			if(coverageVector.GetValue(j)==0) // if source word at j is not generated yet ...
+			{
+				operations.push_back("_INS_GAP_");
+				gFlag++;
+				gap[j]="Unfilled";
+			}
+			if (j == E)
+			{
+				j = j1;
+			}
+			else
+			{
+				operations.push_back("_JMP_FWD_");
+				j=E;
+			}
+		}
+		
+		if (j1 < j)
+		{
+			// if(j < E && coverageVector[j]==0)
+			if(j < E && coverageVector.GetValue(j)==0)
+			{
+				operations.push_back("_INS_GAP_");
+				gFlag++;
+				gap[j]="Unfilled";
+			}
+
+			j=closestGap(gap,j1,gp);
+			operations.push_back("_JMP_BCK_"+ intToString(gp));
+
+			//cout<<"I am j "<<j<<endl;
+			//cout<<"I am j1 "<<j1<<endl;
+
+			if(j==j1)
+			  gap[j]="Filled";
+		}
+
+		if (j < j1)
+		{
+			operations.push_back("_INS_GAP_");
+			gap[j] = "Unfilled";
+			gFlag++;
+			j=j1;
+		}
+
+		if(contFlag == 0) // First words of the multi-word cept ...
+		{
+
+			if(english == "_TRANS_SLF_") // Unknown word ...
+			{
+				operations.push_back("_TRANS_SLF_");
+			}
+			else
+			{
+				operations.push_back("_TRANS_" + english + "_TO_" + german);
+			}
+
+			//ans = firstOpenGap(coverageVector);
+			ans = coverageVector.GetFirstGapPos();
+		
+			if (ans != -1)
+		 		gapWidth += j - ans;
+
+		}
+		else if (contFlag == 2)
+		{
+
+			operations.push_back("_INS_" + german);
+			ans = coverageVector.GetFirstGapPos();
+
+			if (ans != -1)
+				gapWidth += j - ans;
+			deletionCount++;
+		}
+		else
+		{
+			operations.push_back("_CONT_CEPT_");
+		}
+
+		//coverageVector[j]=1;
+		coverageVector.SetValue(j,1);
+		j+=1;
+		
+		if(E<j)
+		  E=j;
+
+	if (gFlag > 0)
+		gapCount++;
+
+	openGapCount += getOpenGaps();
+
+	//if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end())
+	if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end())
+	{
+		j1 = j;
+		german = currF[j1-startIndex];
+		english = "_INS_";
+		generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
+	}
+
+	//print();
+}
+
+void osmHypothesis :: print()
+{
+	for (int i = 0; i< operations.size(); i++)
+	{
+		cerr<<operations[i]<<" ";
+
+	}
+
+	cerr<<endl<<endl;
+	
+	cerr<<"Operation Probability "<<opProb<<endl;
+	cerr<<"Gap Count "<<gapCount<<endl;
+	cerr<<"Open Gap Count "<<openGapCount<<endl;
+	cerr<<"Gap Width "<<gapWidth<<endl;
+	cerr<<"Deletion Count "<<deletionCount<<endl;
+
+	cerr<<"_______________"<<endl;
+}
+
+int osmHypothesis :: closestGap(map <int,string> gap, int j1, int & gp)
+{
+
+	int dist=1172;
+	int value=-1;
+	int temp=0;
+	gp=0;
+	int opGap=0;
+
+	map <int,string> :: iterator iter;
+
+	iter=gap.end();
+		
+		do
+		{
+			iter--;
+			//cout<<"Trapped "<<iter->first<<endl;
+
+		   	if(iter->first==j1 && iter->second== "Unfilled")
+			{
+				opGap++;
+				gp = opGap;
+				return j1;
+
+			}
+		
+		   	if(iter->second =="Unfilled")
+		   	{
+				opGap++;
+				temp = iter->first - j1;
+
+				if(temp<0)
+			 	temp=temp * -1;
+			
+				if(dist>temp && iter->first < j1)
+				{
+					dist=temp;
+					value=iter->first;
+					gp=opGap;
+				}
+		  	}
+			 
+
+		}
+		while(iter!=gap.begin());
+
+	return value;
+}
+
+
+
+int osmHypothesis :: getOpenGaps()
+{
+	map <int,string> :: iterator iter;
+
+	int nd = 0;
+	for (iter = gap.begin(); iter!=gap.end(); iter++)
+	{
+		if(iter->second == "Unfilled")
+		 nd++;
+	}
+
+	return nd;
+
+}
+
+void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes)
+{
+
+	operations.push_back("_DEL_" + english);
+	currTargetIndex++;
+
+	while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end())
+	{
+		currTargetIndex++;
+	}
+
+	if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end())
+	{
+			english = currE[currTargetIndex];
+			generateDeleteOperations(english,currTargetIndex,doneTargetIndexes);
+	}
+
+}
+
+void osmHypothesis :: computeOSMFeature(int startIndex , WordsBitmap & coverageVector , Api & ptrOp, int order)
+{
+
+	set <int> doneTargetIndexes;
+	set <int> eSide;
+	set <int> fSide;
+	set <int> :: iterator iter;
+	string english;
+	string source;
+	int j1;
+	int start = 0;
+	int targetIndex = 0;
+	doneTargetIndexes.clear();
+
+
+	if (targetNullWords.size() != 0) // Source words to be deleted in the start of this phrase ...
+	{
+		iter = targetNullWords.begin();
+
+		if (*iter == startIndex)
+		{
+
+			j1 = startIndex;
+			source = currF[j1-startIndex];
+			english = "_INS_";
+			generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF);
+		}
+	}
+
+	if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) // first word has to be deleted ...
+	{
+			english = currE[targetIndex];
+			generateDeleteOperations(english,targetIndex, doneTargetIndexes);
+	}
+
+
+	for (int i = 0; i < ceptsInPhrase.size(); i++)
+	{
+		source = "";
+		english = "";
+
+		fSide = ceptsInPhrase[i].first;
+		eSide = ceptsInPhrase[i].second;
+
+		iter = eSide.begin();
+		targetIndex = *iter;
+		english += currE[*iter];
+		iter++;
+
+		for (; iter != eSide.end(); iter++)
+		{
+			if(*iter == targetIndex+1)
+				targetIndex++;
+			else
+				doneTargetIndexes.insert(*iter);
+
+			english += "^_^";
+			english += currE[*iter];
+		}
+
+		iter = fSide.begin();
+		source += currF[*iter];
+		iter++;
+
+		for (; iter != fSide.end(); iter++)
+		{
+			source += "^_^";
+			source += currF[*iter];
+		}
+
+		iter = fSide.begin();
+		j1 = *iter + startIndex;
+		iter++;
+
+		generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF);
+
+
+		for (; iter != fSide.end(); iter++)
+		{
+		     j1 = *iter + startIndex;
+		     generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF);
+		}
+
+		targetIndex++; // Check whether the next target word is unaligned ...
+
+		while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end())
+		{
+				targetIndex++;
+		}
+
+		if(sourceNullWords.find(targetIndex) != sourceNullWords.end())
+		{
+			english = currE[targetIndex];
+			generateDeleteOperations(english,targetIndex, doneTargetIndexes);
+		}
+	}
+
+	//removeReorderingOperations();
+	calculateOSMProb(ptrOp, order);
+	//print();
+
+}
+
+void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
+{
+	set <int> :: iterator iter;
+
+	int sz = eSide.size();
+	vector <int> t;
+
+	for (iter = eSide.begin(); iter != eSide.end(); iter++)
+	{
+	   t = tS[*iter];
+
+	   for (int i = 0; i < t.size(); i++)
+	   {
+		fSide.insert(t[i]);
+	   }
+
+	}
+
+	for (iter = fSide.begin(); iter != fSide.end(); iter++)
+	{
+
+		t = sT[*iter];
+
+		for (int i = 0 ; i<t.size(); i++)
+		{
+				eSide.insert(t[i]);
+		}
+
+	}
+
+	if (eSide.size () > sz)
+	{
+		getMeCepts(eSide,fSide,tS,sT);
+	}
+
+}
+
+void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength)
+{
+
+		std::map <int , vector <int> > sT;
+		std::map <int , vector <int> > tS;
+		std::set <int> eSide;
+		std::set <int> fSide;
+		std::set <int> :: iterator iter;
+		std :: map <int , vector <int> > :: iterator iter2;
+		std :: pair < set <int> , set <int> > cept;
+		int src;
+		int tgt;
+
+
+		for (int i = 0;  i < align.size(); i+=2)
+		{
+			src = align[i];
+			tgt = align[i+1];
+			tS[tgt].push_back(src);
+			sT[src].push_back(tgt);
+		}
+
+		for (int i = startIndex; i<= endIndex; i++)  // What are unaligned source words in this phrase ...
+		{
+			if (sT.find(i-startIndex) == sT.end())
+			{
+				targetNullWords.insert(i);
+			}
+		}
+
+		for (int i = 0; i < targetPhraseLength; i++)  // What are unaligned target words in this phrase ...
+		{
+			if (tS.find(i) == tS.end())
+			{
+				sourceNullWords.insert(i);
+			}
+		}
+
+
+		while (tS.size() != 0 && sT.size() != 0)
+		{
+
+			iter2 = tS.begin();
+
+			eSide.clear();
+			fSide.clear();
+			eSide.insert (iter2->first);
+
+			getMeCepts(eSide, fSide, tS , sT);
+
+			for (iter = eSide.begin(); iter != eSide.end(); iter++)
+			{
+				iter2 = tS.find(*iter);
+				tS.erase(iter2);
+			}
+
+			for (iter = fSide.begin(); iter != fSide.end(); iter++)
+			{
+				iter2 = sT.find(*iter);
+				sT.erase(iter2);
+			}
+
+			cept = make_pair (fSide , eSide);
+			ceptsInPhrase.push_back(cept);
+		}
+
+
+
+/*
+
+	  cerr<<"Extracted Cepts "<<endl;
+		for (int i = 0; i < ceptsInPhrase.size(); i++)
+			{
+
+				fSide = ceptsInPhrase[i].first;
+				eSide = ceptsInPhrase[i].second;
+
+				for (iter = eSide.begin(); iter != eSide.end(); iter++)
+				{
+			   		cerr<<*iter<<" ";
+				}
+			    	cerr<<"<---> ";
+
+				for (iter = fSide.begin(); iter != fSide.end(); iter++)
+				{
+					cerr<<*iter<<" ";
+				}
+
+				cerr<<endl;
+			}
+			cerr<<endl;
+
+		cerr<<"Unaligned Target Words"<<endl;
+
+		for (iter = sourceNullWords.begin(); iter != sourceNullWords.end(); iter++)
+			cerr<<*iter<<"<--->"<<endl;
+
+		cerr<<"Unaligned Source Words"<<endl;
+
+		for (iter = targetNullWords.begin(); iter != targetNullWords.end(); iter++)
+			cerr<<*iter<<"<--->"<<endl;
+
+*/
+
+}
+
+void osmHypothesis :: populateScores(vector <float> & scores)
+{
+	scores.clear();
+	scores.push_back(opProb);
+	scores.push_back(gapWidth);
+	scores.push_back(gapCount);
+	scores.push_back(openGapCount);
+	scores.push_back(deletionCount);
+}
+
+
+} // namespace
+
--- a/moses/FF/OSM-Feature/osmHyp.h
+++ b/moses/FF/OSM-Feature/osmHyp.h
@ -0,0 +1,89 @@
+#pragma once
+
+# include "SRILM-API.h"
+# include "moses/FFState.h"
+# include "moses/Manager.h"
+# include <set>
+# include <map>
+# include <string>
+# include <vector>
+
+using namespace std;
+
+namespace Moses
+{
+
+class osmState : public FFState
+{
+public:
+  osmState();
+  int Compare(const FFState& other) const;
+  void saveState(int jVal, int eVal, vector <string> & hist , map <int , string> & gapVal);
+  int getJ()const {return j;}
+  int getE()const {return E;}
+  map <int , string> getGap() const { return gap;}
+  vector <string> getHistory()const {return history;}
+  void print() const;
+  std::string getName() const;
+  void saveDelHistory(vector <string> & histVal){delHistory = histVal;}
+
+protected:
+  int j, E;
+  std::map <int,std::string> gap;
+  std::vector <std::string> history;
+  std::vector <std::string> delHistory;
+};
+
+class osmHypothesis
+{
+
+	private:
+	
+	std::vector <std::string> history;
+	std::vector <std::string> operations;	// List of operations required to generated this hyp ...
+	std::map <int,std::string> gap;	// Maintains gap history ...
+	int j;	// Position after the last source word generated ...
+	int E; // Position after the right most source word so far generated ...
+
+	int gapCount; // Number of gaps inserted ...
+	int deletionCount;
+	int openGapCount;
+	int gapWidth;	
+	double opProb;
+
+	vector <string> currE;
+	vector <string> currF;
+	vector < pair < set <int> , set <int> > > ceptsInPhrase;
+	set <int> targetNullWords;
+	set <int> sourceNullWords;
+
+	int closestGap(std::map <int,std::string> gap,int j1, int & gp);
+	int firstOpenGap(std::vector <int> & coverageVector);
+	std::string intToString(int);
+	int  getOpenGaps();
+	int isTranslationOperation(int j);
+	void removeReorderingOperations();
+
+	void getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT);
+
+	public:
+
+	osmHypothesis();
+	~osmHypothesis(){};
+	void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
+	void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
+	void calculateOSMProb(Api & opPtr , int order);
+	void computeOSMFeature(int startIndex , WordsBitmap & coverageVector , Api & ptrOp, int order);
+	void constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
+	void setPhrases(vector <string> & val1 , vector <string> & val2){currF = val1; currE = val2;}
+	void setState(const FFState* prev_state);
+	osmState * saveState();
+	void print();
+	void populateScores(vector <float> & scores);
+
+};
+
+} // namespace
+
+
+