From b03e035b5dd04b9b7a5e640dad0f8ae169bc8a77 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 20 Jan 2014 18:37:48 +0000 Subject: [PATCH] output glue rules --- .../extract-mixed-syntax/extract.cpp | 44 ++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/contrib/other-builds/extract-mixed-syntax/extract.cpp b/contrib/other-builds/extract-mixed-syntax/extract.cpp index d16d1eee2..334a3e124 100644 --- a/contrib/other-builds/extract-mixed-syntax/extract.cpp +++ b/contrib/other-builds/extract-mixed-syntax/extract.cpp @@ -39,6 +39,8 @@ using namespace std; +void writeGlueGrammar(const string &, Global &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection); + int main(int argc, char* argv[]) { cerr << "Extract v2.0, written by Philipp Koehn\n" @@ -261,8 +263,48 @@ int main(int argc, char* argv[]) extractFile.Close(); extractFileInv.Close(); - delete global; + if (global->glueGrammarFlag) { + writeGlueGrammar(fileNameGlueGrammar, *global, targetLabelCollection, targetTopLabelCollection); + } + + delete global; } +void writeGlueGrammar( const string & fileName, Global &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection ) +{ + ofstream grammarFile; + grammarFile.open(fileName.c_str()); + if (!options.targetSyntax) { + grammarFile << " [X] ||| [S] ||| 1 ||| ||| 0" << endl + << "[X][S] [X] ||| [X][S] [S] ||| 1 ||| 0-0 ||| 0" << endl + << "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl; + } else { + // chose a top label that is not already a label + string topLabel = "QQQQQQ"; + for( unsigned int i=1; i<=topLabel.length(); i++) { + if(targetLabelCollection.find( topLabel.substr(0,i) ) == targetLabelCollection.end() ) { + topLabel = topLabel.substr(0,i); + break; + } + } + // basic rules + grammarFile << " [X] ||| [" << topLabel << "] ||| 1 ||| " << endl + << "[X][" << topLabel << "] [X] ||| [X][" << topLabel << "] [" << topLabel << "] ||| 1 ||| 0-0 " << endl; + + // top rules + for( map::const_iterator i = targetTopLabelCollection.begin(); + i != targetTopLabelCollection.end(); i++ ) { + grammarFile << " [X][" << i->first << "] [X] ||| [X][" << i->first << "] [" << topLabel << "] ||| 1 ||| 1-1" << endl; + } + + // glue rules + for( set::const_iterator i = targetLabelCollection.begin(); + i != targetLabelCollection.end(); i++ ) { + grammarFile << "[X][" << topLabel << "] [X][" << *i << "] [X] ||| [X][" << topLabel << "] [X][" << *i << "] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1" << endl; + } + grammarFile << "[X][" << topLabel << "] [X][X] [X] ||| [X][" << topLabel << "] [X][X] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 " << endl; // glue rule for unknown word... + } + grammarFile.close(); +}