Transliteration Mining

This commit is contained in:
Nadir Durrani 2013-12-16 18:19:44 +00:00
parent c74d85f58b
commit c291f859a0
5 changed files with 904 additions and 3 deletions

242
misc/1-1-Extraction.cpp Normal file
View File

@ -0,0 +1,242 @@
#include <fstream>
#include <iostream>
#include <vector>
#include <sstream>
#include <map>
#include <set>
#include <cstdlib>
using namespace std;
int stringToInteger(string s)
{
istringstream buffer(s);
int some_int;
buffer >> some_int;
return some_int;
}
void loadInput(const char * fileName, vector <string> & input)
{
ifstream sr (fileName);
string line;
if(sr.is_open()) {
while(getline(sr , line )) {
input.push_back(line);
}
sr.close();
} else {
cout<<"Unable to read "<<fileName<<endl;
exit(1);
}
}
void getWords(string s, vector <string> & currInput)
{
istringstream iss(s);
currInput.clear();
do {
string sub;
iss >> sub;
currInput.push_back(sub);
} while (iss);
currInput.pop_back();
}
void getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
{
set <int> :: iterator iter;
int sz = eSide.size();
vector <int> t;
for (iter = eSide.begin(); iter != eSide.end(); iter++) {
t = tS[*iter];
for (int i = 0; i < t.size(); i++) {
fSide.insert(t[i]);
}
}
for (iter = fSide.begin(); iter != fSide.end(); iter++) {
t = sT[*iter];
for (int i = 0 ; i<t.size(); i++) {
eSide.insert(t[i]);
}
}
if (eSide.size () > sz) {
getMeCepts(eSide,fSide,tS,sT);
}
}
void constructCepts(vector < pair < set <int> , set <int> > > & ceptsInPhrase, set <int> & sourceNullWords, set <int> & targetNullWords, vector <string> & alignment, int eSize, int fSize)
{
ceptsInPhrase.clear();
sourceNullWords.clear();
targetNullWords.clear();
vector <int> align;
vector <string> mAlign;
std::map <int , vector <int> > sT;
std::map <int , vector <int> > tS;
std::set <int> eSide;
std::set <int> fSide;
std::set <int> :: iterator iter;
std :: map <int , vector <int> > :: iterator iter2;
std :: pair < set <int> , set <int> > cept;
int src;
int tgt;
ceptsInPhrase.clear();
int res;
for (int j=0; j<alignment.size(); j+=1)
{
res = alignment[j].find("-");
mAlign.push_back(alignment[j].substr(0,res));
mAlign.push_back(alignment[j].substr(res+1));
}
for (int j=0; j<mAlign.size(); j+=2) {
align.push_back(stringToInteger(mAlign[j+1]));
align.push_back(stringToInteger(mAlign[j]));
}
for (int i = 0; i < align.size(); i+=2) {
src = align[i];
tgt = align[i+1];
tS[tgt].push_back(src);
sT[src].push_back(tgt);
}
for (int i = 0; i< fSize; i++) {
if (sT.find(i) == sT.end()) {
targetNullWords.insert(i);
}
}
for (int i = 0; i< eSize; i++) {
if (tS.find(i) == tS.end()) {
sourceNullWords.insert(i);
}
}
while (tS.size() != 0 && sT.size() != 0) {
iter2 = tS.begin();
eSide.clear();
fSide.clear();
eSide.insert (iter2->first);
getMeCepts(eSide, fSide, tS , sT);
for (iter = eSide.begin(); iter != eSide.end(); iter++) {
iter2 = tS.find(*iter);
tS.erase(iter2);
}
for (iter = fSide.begin(); iter != fSide.end(); iter++) {
iter2 = sT.find(*iter);
sT.erase(iter2);
}
cept = make_pair (fSide , eSide);
ceptsInPhrase.push_back(cept);
}
}
void getOneToOne(vector < pair < set <int> , set <int> > > & ceptsInPhrase , vector <string> & currF , vector <string> & currE, set <string> & one)
{
string temp;
for (int i = 0; i< ceptsInPhrase.size(); i++)
{
if (ceptsInPhrase[i].first.size() == 1 && ceptsInPhrase[i].second.size() == 1)
{
temp = currF[(*ceptsInPhrase[i].second.begin())] + "\t" + currE[(*ceptsInPhrase[i].first.begin())];
if (one.find(temp) == one.end())
one.insert(temp);
}
}
}
void printOneToOne ( set <string> & one)
{
set <string> :: iterator iter;
for (iter = one.begin(); iter != one.end(); iter++)
{
cout<<*iter<<endl;
}
}
int main(int argc, char * argv[])
{
vector <string> e;
vector <string> f;
vector <string> a;
vector < pair < set <int> , set <int> > > ceptsInPhrase;
vector < pair < string , vector <int> > > gCepts;
set <int> sourceNullWords;
set <int> targetNullWords;
vector <string> currE;
vector <string> currF;
vector <string> currA;
set <string> one;
loadInput(argv[1],f);
loadInput(argv[2],e);
loadInput(argv[3],a);
for (int i=0; i<a.size(); i++) {
getWords(e[i],currE);
getWords(f[i],currF);
getWords(a[i],currA);
cerr<<"Processing "<<i<<endl;
constructCepts(ceptsInPhrase, sourceNullWords , targetNullWords, currA , currE.size(), currF.size());
getOneToOne(ceptsInPhrase , currF , currE, one);
/*
cout<<"________________________________________"<<endl;
cout<<"Press any integer to continue ..."<<endl;
int xx;
cin>>xx;
*/
}
printOneToOne(one);
return 0;
}

View File

@ -8,6 +8,10 @@ exe queryLexicalTable : queryLexicalTable.cpp ../moses//moses ;
exe generateSequences : GenerateSequences.cpp ../moses//moses ;
exe TMining : TransliterationMining.cpp ../moses//moses ;
exe 1-1-Extraction : 1-1-Extraction.cpp ../moses//moses ;
local with-cmph = [ option.get "with-cmph" ] ;
if $(with-cmph) {
exe processPhraseTableMin : processPhraseTableMin.cpp ../moses//moses ;
@ -21,4 +25,4 @@ else {
alias programsMin ;
}
alias programs : generateSequences processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin ;
alias programs : 1-1-Extraction TMining generateSequences processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin ;

View File

@ -0,0 +1,648 @@
/*
########################################################################################
Transliteration Mining - A Program to Extract Transliteration Pairs from
a bilingual word list
Source Contributor: Nadir Durrani
########################################################################################
*/
#include <cstdlib>
#include <map>
#include <set>
#include <cmath>
#include <fstream>
#include <iostream>
#include <vector>
#include <sstream>
using namespace std;
double initTransitionProb;
double LAMBDA;
double addLogProbs(double A , double B) // this function adds probabilities ...
{
if (A == B)
return (A + log10(2.0));
if (A > B)
{
if (A - B > 6) // A is a lot bigger ...
return A;
else
return (A + log10(1+pow(10,(B-A))));
}
else // B > A
{
if (B - A > 6)
return B;
else
return (B + log10(1+pow(10,(A-B))));
}
}
class NodeStructure
{
public:
NodeStructure(){};
NodeStructure(vector <string> & s , vector <string> & t);
double getPosterior(){return PPR;}
void computeFwdBckProbs(map <string , double> & gammas, map <string, double> & alignmentCounts);
void computeNonTransliterationProb (map <string , double> & sourceUnigrams , map <string , double> & targetUnigrams);
void print();
vector <string> source;
vector <string> target;
~NodeStructure(){};
private:
double NTR; // Non-transliteration probability of a sentence pair ...
double PPR; // Posterior Probability ...
double ALPHA;
double BETA;
void computeGammaForEdges(map < pair <int , int> , double > & parents, map < pair <int , int> , double > & children , map <string, double> & transitionProbs , map <string, double> & alignmentCounts);
double computeFwdProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & parents);
double FwdProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & parents);
double BckProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & chidren);
double computeBckProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & children);
void getIncomingEdges (pair <int , int> & ST , vector < pair < int , int> > & incomingEdges);
void getOutgoingEdges (pair <int , int> & ST , vector < pair < int , int> > & outgoingEdges);
double getTransitionProb(map <string, double> & transitionProbs , pair <int,int> & edge);
void updateAlignmentCount(map <string, double> & transitionProbs, map <string, double> & alignmentCounts , pair <int,int> & edge , double alpha , double beta);
void computePosteriorProb();
double scaleGamma(double g);
void getEdge (pair <int , int> & v1 , pair <int , int> & v2 , pair <int , int> & v3);
};
void NodeStructure :: print()
{
for (int i = 0; i < source.size(); i++)
cout<<source[i];
cout<<"\t";
for (int i = 0; i < target.size(); i++)
cout<<target[i];
cout<<"\t"<<pow(10,PPR)<<endl;
}
NodeStructure :: NodeStructure(vector <string> & s , vector <string> & t)
{
source = s;
target = t;
}
void NodeStructure :: getEdge (pair <int , int> & v1 , pair <int , int> & v2 , pair <int , int> & v3)
{
if (v2.first - v1.first == 0)
v3.first = -1;
else
v3.first = v2.first;
if (v2.second - v1.second == 0)
v3.second = -1;
else
v3.second = v2.second;
}
void NodeStructure :: computeGammaForEdges(map < pair <int , int> , double > & parents, map < pair <int , int> , double > & children , map <string, double> & transitionProbs , map <string, double> & alignmentCounts)
{
vector < pair < int , int> > incomingEdges;
map < pair <int , int> , double > :: iterator cIter;
map < pair <int , int> , double > :: iterator pIter;
pair <int , int> ST = make_pair (-1,-1);
pair <int , int> edge;
children.erase(ST);
double tProb;
double alpha;
double beta;
for (cIter = children.begin(); cIter != children.end(); cIter++)
{
ST = cIter->first;
getIncomingEdges (ST , incomingEdges);
beta = cIter->second;
for (int i = 0; i< incomingEdges.size(); i++)
{
pIter = parents.find(incomingEdges[i]);
alpha = pIter->second;
getEdge (incomingEdges[i] , ST , edge);
updateAlignmentCount(transitionProbs, alignmentCounts , edge , alpha , beta);
}
}
}
void NodeStructure :: computeNonTransliterationProb (map <string , double> & sourceUnigrams , map <string , double> & targetUnigrams)
{
NTR = 0.0;
for (int i = 0; i < source.size(); i++)
{
NTR += sourceUnigrams[source[i]];
}
for (int i = 0; i < target.size(); i++)
{
NTR += targetUnigrams[target[i]];
}
}
double NodeStructure :: scaleGamma(double g)
{
double translit = log10 (1 - pow (10, PPR));
return g + translit;
}
void NodeStructure :: computePosteriorProb()
{
double LAMBDA2 = log10(1 - pow(10, LAMBDA));
double transliterate = LAMBDA2 + ALPHA; // Transliteration Prob ...
double translate = LAMBDA + NTR; // Translation Prob ...
double trans = transliterate - translate;
//cout<<LAMBDA<<" "<<LAMBDA2<<endl;
//cout<<transliterate<<" "<<translate<<" "<<trans<<endl;
//cout<<pow(10 , trans)<<endl;
double prob = 1/(1+ pow(10 , trans));
PPR = log10(prob);
//cout<<"Posterior Prob "<<PPR<<endl;
}
void NodeStructure :: computeFwdBckProbs(map <string , double> & gammas , map <string, double> & alignmentCounts)
{
pair <int , int> START = make_pair (source.size()-1 , target.size()-1);
pair <int , int> END = make_pair (-1 , -1);
map < pair <int , int> , double > parents;
parents[make_pair(-1,-1)] = 0.0;
map < pair <int , int> , double > children;
children[make_pair(source.size()-1,target.size()-1)] = 0.0;
ALPHA = computeFwdProbs(START , gammas, parents);
BETA = computeBckProbs(END , gammas, children);
computePosteriorProb();
//cout<<"Alpha "<<ALPHA<<" Beta "<<BETA<<endl;
computeGammaForEdges(parents , children , gammas , alignmentCounts);
}
void NodeStructure :: getIncomingEdges (pair <int , int> & ST , vector < pair < int , int> > & incomingEdges)
{
incomingEdges.clear();
if (ST.first == -1) // Source is NULL ..
{
incomingEdges.push_back(make_pair(ST.first , ST.second-1));
}
else if (ST.second == -1) // Target is NULL ...
{
incomingEdges.push_back(make_pair(ST.first-1 , ST.second));
}
else
{
incomingEdges.push_back(make_pair(ST.first , ST.second-1));
incomingEdges.push_back(make_pair(ST.first-1 , ST.second));
incomingEdges.push_back(make_pair(ST.first-1 , ST.second-1));
}
}
void NodeStructure :: getOutgoingEdges (pair <int , int> & ST , vector < pair < int , int> > & outgoingEdges)
{
if (ST.first == source.size()-1) // Source is END ..
{
outgoingEdges.push_back(make_pair(ST.first , ST.second+1));
}
else if (ST.second == target.size()-1) // Target is END ...
{
outgoingEdges.push_back(make_pair(ST.first+1 , ST.second));
}
else
{
outgoingEdges.push_back(make_pair(ST.first , ST.second+1));
outgoingEdges.push_back(make_pair(ST.first+1 , ST.second));
outgoingEdges.push_back(make_pair(ST.first+1 , ST.second+1));
}
}
void NodeStructure :: updateAlignmentCount(map <string, double> & transitionProbs, map <string, double> & alignmentCounts , pair <int,int> & edge , double alpha , double beta)
{
double tProb;
double tgamma;
double gamma;
map <string , double> :: iterator aCounts;
string query;
if (edge.first == -1)
query = "NULL";
else
query = source[edge.first];
query += "-";
if (edge.second == -1)
query += "NULL";
else
query += target[edge.second];
//cout<<" Query "<<query<<endl;
if (transitionProbs.size() == 0)
tProb = initTransitionProb;
else
tProb = transitionProbs[query];
tgamma = alpha + tProb + beta - ALPHA;
gamma = scaleGamma(tgamma);
//cout<<alpha<<" "<<beta<<" "<<gamma<<endl;
//cout<<tProb<<" "<<ALPHA<<endl;
aCounts = alignmentCounts.find(query);
if (aCounts == alignmentCounts.end())
{
alignmentCounts[query] = gamma;
}
else
{
double temp = aCounts->second;
aCounts->second = addLogProbs(temp , gamma);
}
}
double NodeStructure :: getTransitionProb(map <string, double> & transitionProbs , pair <int,int> & edge)
{
if (transitionProbs.size() == 0)
return initTransitionProb;
string query;
if (edge.first == -1)
query = "NULL";
else
query = source[edge.first];
query += "-";
if (edge.second == -1)
query += "NULL";
else
query += target[edge.second];
//cout<<" Query "<<query<<endl;
return transitionProbs[query];
}
double NodeStructure :: FwdProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & parents)
{
double thisAlpha;
double alpha = -2000;
vector < pair < int , int> > incomingEdges;
pair <int , int> edge;
getIncomingEdges (TS , incomingEdges);
for (int k = 0; k < incomingEdges.size(); k++)
{
thisAlpha = parents[incomingEdges[k]];
getEdge (incomingEdges[k], TS , edge);
thisAlpha += getTransitionProb(gammas , edge); // Get Transition Prob ...
double temp = alpha;
alpha = addLogProbs(temp , thisAlpha); // Sum of all parents * transition prob ..
// cout<<temp<<"+"<<thisAlpha<<"="<<alpha<<endl;
}
return alpha;
}
double NodeStructure :: computeFwdProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & parents)
{
pair <int , int> TS;
double alpha;
for (int i = 0; i < source.size(); i++)
{
TS = make_pair (i , -1);
alpha = FwdProb (TS, gammas, parents);
parents[TS] = alpha;
}
for (int i = 0; i < target.size(); i++)
{
TS = make_pair (-1 , i);
alpha = FwdProb (TS, gammas, parents);
parents[TS] = alpha;
}
for (int i = 0; i < source.size(); i++)
{
for (int j = 0; j < target.size(); j++)
{
TS = make_pair (i , j);
alpha = FwdProb (TS, gammas, parents);
parents[TS] = alpha;
}
}
return parents[ST];
}
double NodeStructure :: BckProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & children)
{
double thisBeta;
double beta = -2000;
vector < pair < int , int> > outgoingEdges;
pair <int , int> edge;
getOutgoingEdges (TS , outgoingEdges);
for (int k = 0; k < outgoingEdges.size(); k++)
{
thisBeta = children[outgoingEdges[k]];
getEdge (TS , outgoingEdges[k], edge);
thisBeta += getTransitionProb(gammas , edge); // Get Transition Prob ...
double temp = beta;
beta = addLogProbs(temp , thisBeta); // Sum of all parents * transition prob ..
// cout<<temp<<"+"<<thisAlpha<<"="<<alpha<<endl;
}
return beta;
}
double NodeStructure :: computeBckProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & children)
{
pair <int , int> TS;
double beta;
for (int i = source.size()-2; i >= -1; i--)
{
TS = make_pair (i , target.size()-1);
beta = BckProb (TS, gammas, children);
children[TS] = beta;
}
for (int i = target.size()-2; i >=-1; i--)
{
TS = make_pair (source.size()-1 , i);
beta = BckProb (TS, gammas, children);
children[TS] = beta;
}
for (int i = source.size()-2 ; i >= -1 ; i--)
{
for (int j = target.size()-2 ; j >= -1; j--)
{
TS = make_pair (i , j);
beta = BckProb (TS, gammas, children);
children[TS] = beta;
}
}
return children[ST];
}
void loadInput(const char * fileName, vector <string> & input)
{
/* This function loads a file into a vector of strings */
ifstream sr (fileName);
string line;
if(sr.is_open())
{
while(getline(sr , line ))
{
input.push_back(line);
}
sr.close();
}
else
{
cout<<"Unable to read "<<fileName<<endl;
exit(1);
}
}
void printGammas(map <string, double> & alignmentCounts)
{
map <string , double> :: iterator aCounts;
for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++)
{
cout<<aCounts->first<<" "<<aCounts->second<<endl;
}
}
void getWords(string s, vector <string> & currInput)
{
/* This function splits a string into vector of strings using space character as a delimiter */
istringstream iss(s);
currInput.clear();
do
{
string sub;
iss >> sub;
currInput.push_back(sub);
} while (iss);
currInput.pop_back();
}
double getInitTransitionProb(int sourceToken, int targetToken)
{
double prod = sourceToken * targetToken;
return log10(1/prod);
}
void runIteration(map <int , NodeStructure> & graph , map <string , double> & gammas , int size)
{
map <string, double> alignmentCounts;
map <int , NodeStructure> :: iterator i;
map <string , double> :: iterator aCounts;
double sum = -2000.0;
double tPPR = -2000.0;
for (i = graph.begin(); i != graph.end(); i++)
{
i->second.computeFwdBckProbs(gammas , alignmentCounts);
double temp = tPPR;
tPPR = addLogProbs(graph[i->first].getPosterior() , temp);
}
for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++)
{
double temp = sum;
sum = addLogProbs(aCounts->second, temp);
}
for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) // Normalizing ...
{
aCounts->second = aCounts->second - sum;
}
gammas.clear();
gammas = alignmentCounts;
LAMBDA = tPPR - log10(size);
}
void setNTRProbabilities(map <int , NodeStructure> & graph , map <string , double> & sourceTypes , map <string , double > & targetTypes, double sourceTokens, double targetTokens)
{
map <string , double> :: iterator i;
map <int , NodeStructure> :: iterator j;
for (i = sourceTypes.begin(); i!= sourceTypes.end(); i++)
{
i->second = log10(i->second/sourceTokens);
}
for (i = targetTypes.begin(); i!= targetTypes.end(); i++)
{
i->second = log10(i->second/targetTokens);
}
for (j = graph.begin(); j != graph.end(); j++)
{
j->second.computeNonTransliterationProb(sourceTypes , targetTypes);
}
}
void printPosterior(map <int , NodeStructure> & graph)
{
map <int , NodeStructure> :: iterator i;
for (i = graph.begin(); i != graph.end(); i++)
graph[i->first].print();
}
int main(int argc, char * argv[])
{
vector <string> input;
vector <string> source;
vector <string> target;
map <string , double> sourceTypes;
map <string , double> targetTypes;
set < vector <string> > tgt;
set < vector <string> > src;
double sourceTokens = 0;
double targetTokens = 0;
map <int , NodeStructure> graph;
map <string , double> gammas;
loadInput(argv[1],input);
cerr<<"Constructing Graph "<<endl;
for(int i=0; i<input.size(); i+=2)
{
//cerr<<input[i]<<endl;
//cerr<<input[i+1]<<endl;
getWords(input[i],source);
getWords(input[i+1],target);
if (src.find(source) == src.end())
{
for (int j = 0; j< source.size(); j++)
sourceTypes[source[j]]++;
src.insert(source);
sourceTokens += source.size();
}
if (tgt.find(target) == tgt.end())
{
for (int j = 0; j< target.size(); j++)
targetTypes[target[j]]++;
tgt.insert(target);
targetTokens += target.size();
}
NodeStructure obj (source,target);
graph[i] = obj;
}
setNTRProbabilities(graph, sourceTypes, targetTypes, sourceTokens, targetTokens);
initTransitionProb = getInitTransitionProb(sourceTypes.size()+1, targetTypes.size()+1);
LAMBDA = log10(0.5);
for (int i = 0; i< 10; i++)
{
cerr<<"Computing Probs : iteration "<<i+1<<endl;
runIteration(graph , gammas , input.size()/2);
}
printPosterior(graph);
cerr<<"Finished..."<<endl;
return 0;
}

View File

@ -518,6 +518,13 @@ build-osm
rerun-on-change: operation-sequence-model training-options script giza-settings operation-sequence-model-settings
template: $moses-script-dir/OSM/OSM-Train.perl --corpus-f IN0.$input-extension --corpus-e IN0.$output-extension --alignment IN1.$alignment-symmetrization-method --order $operation-sequence-model-order --out-dir OUT --moses-src-dir $moses-src-dir --srilm-dir $srilm-dir $operation-sequence-model-settings
default-name: model/OSM
build-transliteration-model
in: corpus word-alignment
out: transliteration-model
ignore-unless: transliteration-module
rerun-on-change: transliteration-module training-options script giza-settings
template: $moses-script-dir/Transliteration/train-transliteration-module.pl --corpus-f IN0.$input-extension --corpus-e IN0.$output-extension --alignment IN1.$alignment-symmetrization-method --out-dir OUT --moses-src-dir $moses-src-dir --external-bin-dir $external-bin-dir --srilm-dir $srilm-dir --input-extension $input-extension --output-extension $output-extension
default-name: model/Transliteration
extract-phrases
in: corpus-mml-postfilter=OR=word-alignment scored-corpus
out: extracted-phrases
@ -586,7 +593,7 @@ build-sparse
default-name: model/sparse-features
template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features"
create-config
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-model generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
out: config
ignore-if: use-hiero
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini

View File

@ -2162,7 +2162,7 @@ sub get_config_tables {
sub define_training_create_config {
my ($step_id) = @_;
my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,$osm, @LM)
my ($config,$reordering_table,$phrase_translation_table,$translit_model,$generation_table,$sparse_lexical_features,$domains,$osm, @LM)
= &get_output_and_input($step_id);
my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains);