diff --git a/Jamroot b/Jamroot index b1637e5ba..5a9f52b11 100644 --- a/Jamroot +++ b/Jamroot @@ -104,7 +104,7 @@ build-project scripts ; #Regression tests (only does anything if --with-regtest is passed) build-project regression-testing ; -alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs scripts//training//symal scripts//training//lexical-reordering scripts//training//phrase-extract scripts//training//phrase-extract//extract-ghkm scripts//training//phrase-extract//pcfg-extract scripts//training//phrase-extract//pcfg-score ; +alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs symal scripts//training//lexical-reordering phrase-extract phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score ; install-bin-libs programs ; install-headers headers-base : [ glob-tree *.h *.hh : jam-files dist bin lib include kenlm moses ] : . ; diff --git a/scripts/training/phrase-extract/AlignmentPhrase.cpp b/phrase-extract/AlignmentPhrase.cpp similarity index 100% rename from scripts/training/phrase-extract/AlignmentPhrase.cpp rename to phrase-extract/AlignmentPhrase.cpp diff --git a/scripts/training/phrase-extract/AlignmentPhrase.h b/phrase-extract/AlignmentPhrase.h similarity index 100% rename from scripts/training/phrase-extract/AlignmentPhrase.h rename to phrase-extract/AlignmentPhrase.h diff --git a/scripts/training/phrase-extract/ExtractedRule.cpp b/phrase-extract/ExtractedRule.cpp similarity index 100% rename from scripts/training/phrase-extract/ExtractedRule.cpp rename to phrase-extract/ExtractedRule.cpp diff --git a/scripts/training/phrase-extract/ExtractedRule.h b/phrase-extract/ExtractedRule.h similarity index 100% rename from scripts/training/phrase-extract/ExtractedRule.h rename to phrase-extract/ExtractedRule.h diff --git a/scripts/training/phrase-extract/Hole.h b/phrase-extract/Hole.h similarity index 100% rename from scripts/training/phrase-extract/Hole.h rename to phrase-extract/Hole.h diff --git a/scripts/training/phrase-extract/HoleCollection.cpp b/phrase-extract/HoleCollection.cpp similarity index 100% rename from scripts/training/phrase-extract/HoleCollection.cpp rename to phrase-extract/HoleCollection.cpp diff --git a/scripts/training/phrase-extract/HoleCollection.h b/phrase-extract/HoleCollection.h similarity index 100% rename from scripts/training/phrase-extract/HoleCollection.h rename to phrase-extract/HoleCollection.h diff --git a/scripts/training/phrase-extract/InputFileStream.cpp b/phrase-extract/InputFileStream.cpp similarity index 100% rename from scripts/training/phrase-extract/InputFileStream.cpp rename to phrase-extract/InputFileStream.cpp diff --git a/scripts/training/phrase-extract/InputFileStream.h b/phrase-extract/InputFileStream.h similarity index 100% rename from scripts/training/phrase-extract/InputFileStream.h rename to phrase-extract/InputFileStream.h diff --git a/scripts/training/phrase-extract/Jamfile b/phrase-extract/Jamfile similarity index 79% rename from scripts/training/phrase-extract/Jamfile rename to phrase-extract/Jamfile index 6fae90bb0..d834674b8 100644 --- a/scripts/training/phrase-extract/Jamfile +++ b/phrase-extract/Jamfile @@ -1,5 +1,5 @@ obj InputFileStream.o : InputFileStream.cpp : . ; -alias InputFileStream : InputFileStream.o ../../..//z ; +alias InputFileStream : InputFileStream.o ..//z ; obj tables-core.o : tables-core.cpp : . ; obj AlignmentPhrase.o : AlignmentPhrase.cpp : . ; @@ -10,17 +10,17 @@ obj XmlTree.o : XmlTree.cpp : . ; alias filestreams : InputFileStream.cpp OutputFileStream.cpp : : : . ; alias trees : SyntaxTree.cpp tables-core.o XmlTree.o : : : . ; -exe extract : tables-core.o SentenceAlignment.o extract.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ; +exe extract : tables-core.o SentenceAlignment.o extract.cpp OutputFileStream.cpp InputFileStream ..//boost_iostreams ; -exe extract-rules : tables-core.o SentenceAlignment.o SyntaxTree.o XmlTree.o SentenceAlignmentWithSyntax.cpp HoleCollection.cpp extract-rules.cpp ExtractedRule.cpp OutputFileStream.cpp InputFileStream ../../../moses/src//ThreadPool ../../..//boost_iostreams ; +exe extract-rules : tables-core.o SentenceAlignment.o SyntaxTree.o XmlTree.o SentenceAlignmentWithSyntax.cpp HoleCollection.cpp extract-rules.cpp ExtractedRule.cpp OutputFileStream.cpp InputFileStream ../moses/src//ThreadPool ..//boost_iostreams ; exe extract-lex : extract-lex.cpp InputFileStream ; -exe score : tables-core.o AlignmentPhrase.o score.cpp PhraseAlignment.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ; +exe score : tables-core.o AlignmentPhrase.o score.cpp PhraseAlignment.cpp OutputFileStream.cpp InputFileStream ..//boost_iostreams ; -exe consolidate : consolidate.cpp tables-core.o OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ; +exe consolidate : consolidate.cpp tables-core.o OutputFileStream.cpp InputFileStream ..//boost_iostreams ; -exe consolidate-direct : consolidate-direct.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ; +exe consolidate-direct : consolidate-direct.cpp OutputFileStream.cpp InputFileStream ..//boost_iostreams ; exe consolidate-reverse : consolidate-reverse.cpp tables-core.o InputFileStream ; diff --git a/scripts/training/phrase-extract/OutputFileStream.cpp b/phrase-extract/OutputFileStream.cpp similarity index 100% rename from scripts/training/phrase-extract/OutputFileStream.cpp rename to phrase-extract/OutputFileStream.cpp diff --git a/scripts/training/phrase-extract/OutputFileStream.h b/phrase-extract/OutputFileStream.h similarity index 100% rename from scripts/training/phrase-extract/OutputFileStream.h rename to phrase-extract/OutputFileStream.h diff --git a/scripts/training/phrase-extract/PhraseAlignment.cpp b/phrase-extract/PhraseAlignment.cpp similarity index 100% rename from scripts/training/phrase-extract/PhraseAlignment.cpp rename to phrase-extract/PhraseAlignment.cpp diff --git a/scripts/training/phrase-extract/PhraseAlignment.h b/phrase-extract/PhraseAlignment.h similarity index 100% rename from scripts/training/phrase-extract/PhraseAlignment.h rename to phrase-extract/PhraseAlignment.h diff --git a/scripts/training/phrase-extract/RuleExist.h b/phrase-extract/RuleExist.h similarity index 100% rename from scripts/training/phrase-extract/RuleExist.h rename to phrase-extract/RuleExist.h diff --git a/scripts/training/phrase-extract/RuleExtractionOptions.h b/phrase-extract/RuleExtractionOptions.h similarity index 100% rename from scripts/training/phrase-extract/RuleExtractionOptions.h rename to phrase-extract/RuleExtractionOptions.h diff --git a/scripts/training/phrase-extract/SafeGetline.h b/phrase-extract/SafeGetline.h similarity index 100% rename from scripts/training/phrase-extract/SafeGetline.h rename to phrase-extract/SafeGetline.h diff --git a/scripts/training/phrase-extract/SentenceAlignment.cpp b/phrase-extract/SentenceAlignment.cpp similarity index 100% rename from scripts/training/phrase-extract/SentenceAlignment.cpp rename to phrase-extract/SentenceAlignment.cpp diff --git a/scripts/training/phrase-extract/SentenceAlignment.h b/phrase-extract/SentenceAlignment.h similarity index 100% rename from scripts/training/phrase-extract/SentenceAlignment.h rename to phrase-extract/SentenceAlignment.h diff --git a/scripts/training/phrase-extract/SentenceAlignmentWithSyntax.cpp b/phrase-extract/SentenceAlignmentWithSyntax.cpp similarity index 100% rename from scripts/training/phrase-extract/SentenceAlignmentWithSyntax.cpp rename to phrase-extract/SentenceAlignmentWithSyntax.cpp diff --git a/scripts/training/phrase-extract/SentenceAlignmentWithSyntax.h b/phrase-extract/SentenceAlignmentWithSyntax.h similarity index 100% rename from scripts/training/phrase-extract/SentenceAlignmentWithSyntax.h rename to phrase-extract/SentenceAlignmentWithSyntax.h diff --git a/scripts/training/phrase-extract/SyntaxTree.cpp b/phrase-extract/SyntaxTree.cpp similarity index 100% rename from scripts/training/phrase-extract/SyntaxTree.cpp rename to phrase-extract/SyntaxTree.cpp diff --git a/scripts/training/phrase-extract/SyntaxTree.h b/phrase-extract/SyntaxTree.h similarity index 100% rename from scripts/training/phrase-extract/SyntaxTree.h rename to phrase-extract/SyntaxTree.h diff --git a/scripts/training/phrase-extract/XmlException.h b/phrase-extract/XmlException.h similarity index 100% rename from scripts/training/phrase-extract/XmlException.h rename to phrase-extract/XmlException.h diff --git a/scripts/training/phrase-extract/XmlTree.cpp b/phrase-extract/XmlTree.cpp similarity index 100% rename from scripts/training/phrase-extract/XmlTree.cpp rename to phrase-extract/XmlTree.cpp diff --git a/scripts/training/phrase-extract/XmlTree.h b/phrase-extract/XmlTree.h similarity index 100% rename from scripts/training/phrase-extract/XmlTree.h rename to phrase-extract/XmlTree.h diff --git a/scripts/training/phrase-extract/consolidate-direct.cpp b/phrase-extract/consolidate-direct.cpp similarity index 100% rename from scripts/training/phrase-extract/consolidate-direct.cpp rename to phrase-extract/consolidate-direct.cpp diff --git a/scripts/training/phrase-extract/consolidate-direct.vcxproj b/phrase-extract/consolidate-direct.vcxproj similarity index 100% rename from scripts/training/phrase-extract/consolidate-direct.vcxproj rename to phrase-extract/consolidate-direct.vcxproj diff --git a/scripts/training/phrase-extract/consolidate-reverse.cpp b/phrase-extract/consolidate-reverse.cpp similarity index 100% rename from scripts/training/phrase-extract/consolidate-reverse.cpp rename to phrase-extract/consolidate-reverse.cpp diff --git a/scripts/training/phrase-extract/consolidate.cpp b/phrase-extract/consolidate.cpp similarity index 100% rename from scripts/training/phrase-extract/consolidate.cpp rename to phrase-extract/consolidate.cpp diff --git a/scripts/training/phrase-extract/consolidate.vcxproj b/phrase-extract/consolidate.vcxproj similarity index 100% rename from scripts/training/phrase-extract/consolidate.vcxproj rename to phrase-extract/consolidate.vcxproj diff --git a/scripts/training/phrase-extract/extract-ghkm/Alignment.cpp b/phrase-extract/extract-ghkm/Alignment.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Alignment.cpp rename to phrase-extract/extract-ghkm/Alignment.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/Alignment.h b/phrase-extract/extract-ghkm/Alignment.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Alignment.h rename to phrase-extract/extract-ghkm/Alignment.h diff --git a/scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.cpp b/phrase-extract/extract-ghkm/AlignmentGraph.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.cpp rename to phrase-extract/extract-ghkm/AlignmentGraph.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.h b/phrase-extract/extract-ghkm/AlignmentGraph.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.h rename to phrase-extract/extract-ghkm/AlignmentGraph.h diff --git a/scripts/training/phrase-extract/extract-ghkm/ComposedRule.cpp b/phrase-extract/extract-ghkm/ComposedRule.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/ComposedRule.cpp rename to phrase-extract/extract-ghkm/ComposedRule.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/ComposedRule.h b/phrase-extract/extract-ghkm/ComposedRule.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/ComposedRule.h rename to phrase-extract/extract-ghkm/ComposedRule.h diff --git a/scripts/training/phrase-extract/extract-ghkm/Exception.h b/phrase-extract/extract-ghkm/Exception.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Exception.h rename to phrase-extract/extract-ghkm/Exception.h diff --git a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp rename to phrase-extract/extract-ghkm/ExtractGHKM.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.h b/phrase-extract/extract-ghkm/ExtractGHKM.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.h rename to phrase-extract/extract-ghkm/ExtractGHKM.h diff --git a/phrase-extract/extract-ghkm/Jamfile b/phrase-extract/extract-ghkm/Jamfile new file mode 100644 index 000000000..1a81c5f87 --- /dev/null +++ b/phrase-extract/extract-ghkm/Jamfile @@ -0,0 +1 @@ +exe extract-ghkm : [ glob *.cpp ] ..//filestreams ..//trees ../..//boost_iostreams ../..//boost_program_options ../..//z ; diff --git a/scripts/training/phrase-extract/extract-ghkm/Main.cpp b/phrase-extract/extract-ghkm/Main.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Main.cpp rename to phrase-extract/extract-ghkm/Main.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/Node.cpp b/phrase-extract/extract-ghkm/Node.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Node.cpp rename to phrase-extract/extract-ghkm/Node.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/Node.h b/phrase-extract/extract-ghkm/Node.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Node.h rename to phrase-extract/extract-ghkm/Node.h diff --git a/scripts/training/phrase-extract/extract-ghkm/Options.h b/phrase-extract/extract-ghkm/Options.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Options.h rename to phrase-extract/extract-ghkm/Options.h diff --git a/scripts/training/phrase-extract/extract-ghkm/ParseTree.cpp b/phrase-extract/extract-ghkm/ParseTree.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/ParseTree.cpp rename to phrase-extract/extract-ghkm/ParseTree.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/ParseTree.h b/phrase-extract/extract-ghkm/ParseTree.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/ParseTree.h rename to phrase-extract/extract-ghkm/ParseTree.h diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRule.cpp b/phrase-extract/extract-ghkm/ScfgRule.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/ScfgRule.cpp rename to phrase-extract/extract-ghkm/ScfgRule.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRule.h b/phrase-extract/extract-ghkm/ScfgRule.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/ScfgRule.h rename to phrase-extract/extract-ghkm/ScfgRule.h diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp rename to phrase-extract/extract-ghkm/ScfgRuleWriter.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.h b/phrase-extract/extract-ghkm/ScfgRuleWriter.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.h rename to phrase-extract/extract-ghkm/ScfgRuleWriter.h diff --git a/scripts/training/phrase-extract/extract-ghkm/Span.cpp b/phrase-extract/extract-ghkm/Span.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Span.cpp rename to phrase-extract/extract-ghkm/Span.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/Span.h b/phrase-extract/extract-ghkm/Span.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Span.h rename to phrase-extract/extract-ghkm/Span.h diff --git a/scripts/training/phrase-extract/extract-ghkm/Subgraph.cpp b/phrase-extract/extract-ghkm/Subgraph.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Subgraph.cpp rename to phrase-extract/extract-ghkm/Subgraph.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/Subgraph.h b/phrase-extract/extract-ghkm/Subgraph.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/Subgraph.h rename to phrase-extract/extract-ghkm/Subgraph.h diff --git a/scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.cpp b/phrase-extract/extract-ghkm/XmlTreeParser.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.cpp rename to phrase-extract/extract-ghkm/XmlTreeParser.cpp diff --git a/scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.h b/phrase-extract/extract-ghkm/XmlTreeParser.h similarity index 100% rename from scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.h rename to phrase-extract/extract-ghkm/XmlTreeParser.h diff --git a/scripts/training/phrase-extract/extract-lex.cpp b/phrase-extract/extract-lex.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-lex.cpp rename to phrase-extract/extract-lex.cpp diff --git a/scripts/training/phrase-extract/extract-lex.h b/phrase-extract/extract-lex.h similarity index 100% rename from scripts/training/phrase-extract/extract-lex.h rename to phrase-extract/extract-lex.h diff --git a/scripts/training/phrase-extract/extract-lex.vcxproj b/phrase-extract/extract-lex.vcxproj similarity index 100% rename from scripts/training/phrase-extract/extract-lex.vcxproj rename to phrase-extract/extract-lex.vcxproj diff --git a/scripts/training/phrase-extract/extract-rules.cpp b/phrase-extract/extract-rules.cpp similarity index 100% rename from scripts/training/phrase-extract/extract-rules.cpp rename to phrase-extract/extract-rules.cpp diff --git a/scripts/training/phrase-extract/extract-rules.vcxproj b/phrase-extract/extract-rules.vcxproj similarity index 100% rename from scripts/training/phrase-extract/extract-rules.vcxproj rename to phrase-extract/extract-rules.vcxproj diff --git a/scripts/training/phrase-extract/extract.cpp b/phrase-extract/extract.cpp similarity index 100% rename from scripts/training/phrase-extract/extract.cpp rename to phrase-extract/extract.cpp diff --git a/scripts/training/phrase-extract/extract.vcxproj b/phrase-extract/extract.vcxproj similarity index 100% rename from scripts/training/phrase-extract/extract.vcxproj rename to phrase-extract/extract.vcxproj diff --git a/scripts/training/phrase-extract/extract.xcodeproj/project.pbxproj b/phrase-extract/extract.xcodeproj/project.pbxproj similarity index 100% rename from scripts/training/phrase-extract/extract.xcodeproj/project.pbxproj rename to phrase-extract/extract.xcodeproj/project.pbxproj diff --git a/scripts/training/phrase-extract/gzfilebuf.h b/phrase-extract/gzfilebuf.h similarity index 100% rename from scripts/training/phrase-extract/gzfilebuf.h rename to phrase-extract/gzfilebuf.h diff --git a/scripts/training/phrase-extract/hierarchical.h b/phrase-extract/hierarchical.h similarity index 100% rename from scripts/training/phrase-extract/hierarchical.h rename to phrase-extract/hierarchical.h diff --git a/scripts/training/phrase-extract/pcfg-common/Jamfile b/phrase-extract/pcfg-common/Jamfile similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/Jamfile rename to phrase-extract/pcfg-common/Jamfile diff --git a/scripts/training/phrase-extract/pcfg-common/exception.h b/phrase-extract/pcfg-common/exception.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/exception.h rename to phrase-extract/pcfg-common/exception.h diff --git a/scripts/training/phrase-extract/pcfg-common/numbered_set.h b/phrase-extract/pcfg-common/numbered_set.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/numbered_set.h rename to phrase-extract/pcfg-common/numbered_set.h diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg.cc b/phrase-extract/pcfg-common/pcfg.cc similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/pcfg.cc rename to phrase-extract/pcfg-common/pcfg.cc diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg.h b/phrase-extract/pcfg-common/pcfg.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/pcfg.h rename to phrase-extract/pcfg-common/pcfg.h diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg_tree.h b/phrase-extract/pcfg-common/pcfg_tree.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/pcfg_tree.h rename to phrase-extract/pcfg-common/pcfg_tree.h diff --git a/scripts/training/phrase-extract/pcfg-common/syntax_tree.h b/phrase-extract/pcfg-common/syntax_tree.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/syntax_tree.h rename to phrase-extract/pcfg-common/syntax_tree.h diff --git a/scripts/training/phrase-extract/pcfg-common/tool.cc b/phrase-extract/pcfg-common/tool.cc similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/tool.cc rename to phrase-extract/pcfg-common/tool.cc diff --git a/scripts/training/phrase-extract/pcfg-common/tool.h b/phrase-extract/pcfg-common/tool.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/tool.h rename to phrase-extract/pcfg-common/tool.h diff --git a/scripts/training/phrase-extract/pcfg-common/typedef.h b/phrase-extract/pcfg-common/typedef.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/typedef.h rename to phrase-extract/pcfg-common/typedef.h diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc b/phrase-extract/pcfg-common/xml_tree_parser.cc similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc rename to phrase-extract/pcfg-common/xml_tree_parser.cc diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.h b/phrase-extract/pcfg-common/xml_tree_parser.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/xml_tree_parser.h rename to phrase-extract/pcfg-common/xml_tree_parser.h diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h b/phrase-extract/pcfg-common/xml_tree_writer.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h rename to phrase-extract/pcfg-common/xml_tree_writer.h diff --git a/phrase-extract/pcfg-extract/Jamfile b/phrase-extract/pcfg-extract/Jamfile new file mode 100644 index 000000000..fc93f1a19 --- /dev/null +++ b/phrase-extract/pcfg-extract/Jamfile @@ -0,0 +1 @@ +exe pcfg-extract : [ glob *.cc ] ..//pcfg-common ../..//boost_program_options ; diff --git a/scripts/training/phrase-extract/pcfg-extract/main.cc b/phrase-extract/pcfg-extract/main.cc similarity index 100% rename from scripts/training/phrase-extract/pcfg-extract/main.cc rename to phrase-extract/pcfg-extract/main.cc diff --git a/scripts/training/phrase-extract/pcfg-extract/options.h b/phrase-extract/pcfg-extract/options.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-extract/options.h rename to phrase-extract/pcfg-extract/options.h diff --git a/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc b/phrase-extract/pcfg-extract/pcfg_extract.cc similarity index 100% rename from scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc rename to phrase-extract/pcfg-extract/pcfg_extract.cc diff --git a/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.h b/phrase-extract/pcfg-extract/pcfg_extract.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-extract/pcfg_extract.h rename to phrase-extract/pcfg-extract/pcfg_extract.h diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_collection.cc b/phrase-extract/pcfg-extract/rule_collection.cc similarity index 100% rename from scripts/training/phrase-extract/pcfg-extract/rule_collection.cc rename to phrase-extract/pcfg-extract/rule_collection.cc diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_collection.h b/phrase-extract/pcfg-extract/rule_collection.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-extract/rule_collection.h rename to phrase-extract/pcfg-extract/rule_collection.h diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc b/phrase-extract/pcfg-extract/rule_extractor.cc similarity index 100% rename from scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc rename to phrase-extract/pcfg-extract/rule_extractor.cc diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_extractor.h b/phrase-extract/pcfg-extract/rule_extractor.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-extract/rule_extractor.h rename to phrase-extract/pcfg-extract/rule_extractor.h diff --git a/phrase-extract/pcfg-score/Jamfile b/phrase-extract/pcfg-score/Jamfile new file mode 100644 index 000000000..6b8c963ce --- /dev/null +++ b/phrase-extract/pcfg-score/Jamfile @@ -0,0 +1 @@ +exe pcfg-score : [ glob *.cc ] ..//pcfg-common ../..//boost_program_options ; diff --git a/scripts/training/phrase-extract/pcfg-score/main.cc b/phrase-extract/pcfg-score/main.cc similarity index 100% rename from scripts/training/phrase-extract/pcfg-score/main.cc rename to phrase-extract/pcfg-score/main.cc diff --git a/scripts/training/phrase-extract/pcfg-score/options.h b/phrase-extract/pcfg-score/options.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-score/options.h rename to phrase-extract/pcfg-score/options.h diff --git a/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc b/phrase-extract/pcfg-score/pcfg_score.cc similarity index 100% rename from scripts/training/phrase-extract/pcfg-score/pcfg_score.cc rename to phrase-extract/pcfg-score/pcfg_score.cc diff --git a/scripts/training/phrase-extract/pcfg-score/pcfg_score.h b/phrase-extract/pcfg-score/pcfg_score.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-score/pcfg_score.h rename to phrase-extract/pcfg-score/pcfg_score.h diff --git a/scripts/training/phrase-extract/pcfg-score/tree_scorer.cc b/phrase-extract/pcfg-score/tree_scorer.cc similarity index 100% rename from scripts/training/phrase-extract/pcfg-score/tree_scorer.cc rename to phrase-extract/pcfg-score/tree_scorer.cc diff --git a/scripts/training/phrase-extract/pcfg-score/tree_scorer.h b/phrase-extract/pcfg-score/tree_scorer.h similarity index 100% rename from scripts/training/phrase-extract/pcfg-score/tree_scorer.h rename to phrase-extract/pcfg-score/tree_scorer.h diff --git a/scripts/training/phrase-extract/phrase-extract.sln b/phrase-extract/phrase-extract.sln similarity index 100% rename from scripts/training/phrase-extract/phrase-extract.sln rename to phrase-extract/phrase-extract.sln diff --git a/scripts/training/phrase-extract/relax-parse.cpp b/phrase-extract/relax-parse.cpp similarity index 100% rename from scripts/training/phrase-extract/relax-parse.cpp rename to phrase-extract/relax-parse.cpp diff --git a/scripts/training/phrase-extract/relax-parse.h b/phrase-extract/relax-parse.h similarity index 100% rename from scripts/training/phrase-extract/relax-parse.h rename to phrase-extract/relax-parse.h diff --git a/scripts/training/phrase-extract/score.cpp b/phrase-extract/score.cpp similarity index 100% rename from scripts/training/phrase-extract/score.cpp rename to phrase-extract/score.cpp diff --git a/scripts/training/phrase-extract/score.h b/phrase-extract/score.h similarity index 100% rename from scripts/training/phrase-extract/score.h rename to phrase-extract/score.h diff --git a/scripts/training/phrase-extract/score.vcxproj b/phrase-extract/score.vcxproj similarity index 100% rename from scripts/training/phrase-extract/score.vcxproj rename to phrase-extract/score.vcxproj diff --git a/scripts/training/phrase-extract/statistics.cpp b/phrase-extract/statistics.cpp similarity index 100% rename from scripts/training/phrase-extract/statistics.cpp rename to phrase-extract/statistics.cpp diff --git a/scripts/training/phrase-extract/tables-core.cpp b/phrase-extract/tables-core.cpp similarity index 100% rename from scripts/training/phrase-extract/tables-core.cpp rename to phrase-extract/tables-core.cpp diff --git a/scripts/training/phrase-extract/tables-core.h b/phrase-extract/tables-core.h similarity index 100% rename from scripts/training/phrase-extract/tables-core.h rename to phrase-extract/tables-core.h diff --git a/scripts/training/Jamfile b/scripts/training/Jamfile index 8ca408d07..e290bb0cb 100644 --- a/scripts/training/Jamfile +++ b/scripts/training/Jamfile @@ -1,8 +1,3 @@ -build-project compact-rule-table ; -build-project phrase-extract ; -build-project lexical-reordering ; -build-project symal ; - if $(WITH-GIZA) != no || $(CLEANING) != no { make train-model.perl : train-model.perl.missing_bin_dir : @missing_bin_dir ; actions missing_bin_dir { diff --git a/scripts/training/compact-rule-table/Compactify.cpp b/scripts/training/compact-rule-table/Compactify.cpp deleted file mode 100644 index ceb7eb090..000000000 --- a/scripts/training/compact-rule-table/Compactify.cpp +++ /dev/null @@ -1,296 +0,0 @@ -#include "Compactify.h" - -#include "NumberedSet.h" -#include "Options.h" -#include "RuleTableParser.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -namespace moses { - -int Compactify::main(int argc, char *argv[]) { - // Process the command-line arguments. - Options options; - processOptions(argc, argv, options); - - // Open the input stream. - std::istream *inputPtr; - std::ifstream inputFileStream; - if (options.inputFile.empty() || options.inputFile == "-") { - inputPtr = &(std::cin); - } else { - inputFileStream.open(options.inputFile.c_str()); - if (!inputFileStream) { - std::ostringstream msg; - msg << "failed to open input file: " << options.inputFile; - error(msg.str()); - } - inputPtr = &inputFileStream; - } - std::istream &input = *inputPtr; - - // Open the output stream. - std::ostream *outputPtr; - std::ofstream outputFileStream; - if (options.outputFile.empty()) { - outputPtr = &(std::cout); - } else { - outputFileStream.open(options.outputFile.c_str()); - if (!outputFileStream) { - std::ostringstream msg; - msg << "failed to open output file: " << options.outputFile; - error(msg.str()); - } - outputPtr = &outputFileStream; - } - std::ostream &output = *outputPtr; - - // Open a temporary file: the rule section must appear last in the output - // file, but we don't want to store the full set of rules in memory during - // processing, so instead they're written to a temporary file then copied to - // the output file as a final step. - std::fstream tempFileStream; - { - char fileNameTemplate[] = "/tmp/compact_XXXXXX"; - int fd = mkstemp(fileNameTemplate); - if (fd == -1) { - std::ostringstream msg; - msg << "failed to open temporary file with pattern " << fileNameTemplate; - error(msg.str()); - } - tempFileStream.open(fileNameTemplate); - if (!tempFileStream) { - std::ostringstream msg; - msg << "failed to open existing temporary file: " << fileNameTemplate; - error(msg.str()); - } - // Close the original file descriptor. - close(fd); - // Unlink the file. Its contents will be safe until tempFileStream is - // closed. - unlink(fileNameTemplate); - } - - // Write the version number - output << "1" << '\n'; - - SymbolSet symbolSet; - PhraseSet sourcePhraseSet; - PhraseSet targetPhraseSet; - AlignmentSetSet alignmentSetSet; - - SymbolPhrase symbolPhrase; - - size_t ruleCount = 0; - RuleTableParser end; - try { - for (RuleTableParser parser(input); parser != end; ++parser) { - const RuleTableParser::Entry &entry = *parser; - ++ruleCount; - - // Report progress in the same format as extract-rules. - if (ruleCount % 100000 == 0) { - std::cerr << "." << std::flush; - } - if (ruleCount % 1000000 == 0) { - std::cerr << " " << ruleCount << std::endl; - } - - // Encode the source LHS + RHS as a vector of symbol IDs and insert into - // sourcePhraseSet. - encodePhrase(entry.sourceLhs, entry.sourceRhs, symbolSet, symbolPhrase); - SymbolIDType sourceId = sourcePhraseSet.insert(symbolPhrase); - - // Encode the target LHS + RHS as a vector of symbol IDs and insert into - // targetPhraseSet. - encodePhrase(entry.targetLhs, entry.targetRhs, symbolSet, symbolPhrase); - SymbolIDType targetId = targetPhraseSet.insert(symbolPhrase); - - // Insert the alignments into alignmentSetSet. - AlignmentSetIDType alignmentSetId = alignmentSetSet.insert( - entry.alignments); - - // Write this rule to the temporary file. - tempFileStream << sourceId << " " << targetId << " " << alignmentSetId; - for (std::vector::const_iterator p = entry.scores.begin(); - p != entry.scores.end(); ++p) { - tempFileStream << " " << *p; - } - tempFileStream << " :"; - for (std::vector::const_iterator p = entry.counts.begin(); - p != entry.counts.end(); ++p) { - tempFileStream << " " << *p; - } - tempFileStream << '\n'; - } - } catch (Exception &e) { - std::ostringstream msg; - msg << "error processing line " << ruleCount+1 << ": " << e.getMsg(); - error(msg.str()); - } - - // Report the counts. - - if (ruleCount % 1000000 != 0) { - std::cerr << std::endl; - } - std::cerr << "Rule count: " << ruleCount << std::endl; - std::cerr << "Symbol count: " << symbolSet.size() << std::endl; - std::cerr << "Source phrase count: " << sourcePhraseSet.size() << std::endl; - std::cerr << "Target phrase count: " << targetPhraseSet.size() << std::endl; - std::cerr << "Alignment set count: " << alignmentSetSet.size() << std::endl; - - // Write the symbol vocabulary. - - output << symbolSet.size() << '\n'; - for (SymbolSet::const_iterator p = symbolSet.begin(); - p != symbolSet.end(); ++p) { - const std::string &str = **p; - output << str << '\n'; - } - - // Write the source phrases. - - output << sourcePhraseSet.size() << '\n'; - for (PhraseSet::const_iterator p = sourcePhraseSet.begin(); - p != sourcePhraseSet.end(); ++p) { - const SymbolPhrase &sourcePhrase = **p; - for (SymbolPhrase::const_iterator q = sourcePhrase.begin(); - q != sourcePhrase.end(); ++q) { - if (q != sourcePhrase.begin()) { - output << " "; - } - output << *q; - } - output << '\n'; - } - - // Write the target phrases. - - output << targetPhraseSet.size() << '\n'; - for (PhraseSet::const_iterator p = targetPhraseSet.begin(); - p != targetPhraseSet.end(); ++p) { - const SymbolPhrase &targetPhrase = **p; - for (SymbolPhrase::const_iterator q = targetPhrase.begin(); - q != targetPhrase.end(); ++q) { - if (q != targetPhrase.begin()) { - output << " "; - } - output << *q; - } - output << '\n'; - } - - // Write the alignment sets. - - output << alignmentSetSet.size() << '\n'; - for (AlignmentSetSet::const_iterator p = alignmentSetSet.begin(); - p != alignmentSetSet.end(); ++p) { - const AlignmentSet &alignmentSet = **p; - for (AlignmentSet::const_iterator q = alignmentSet.begin(); - q != alignmentSet.end(); ++q) { - if (q != alignmentSet.begin()) { - output << " "; - } - output << q->first << "-" << q->second; - } - output << '\n'; - } - - // Write the rule count. - output << ruleCount << '\n'; - - // Copy the rules from the temporary file. - tempFileStream.seekg(0); - std::string line; - while (std::getline(tempFileStream, line)) { - output << line << '\n'; - } - - return 0; -} - -void Compactify::processOptions(int argc, char *argv[], - Options &options) const { - namespace po = boost::program_options; - - std::ostringstream usageMsg; - usageMsg << "usage: " << getName() << " [OPTION]... [FILE]"; - - // Declare the command line options that are visible to the user. - std::string caption = usageMsg.str() + std::string("\n\nAllowed options"); - po::options_description visible(caption); - visible.add_options() - ("help", "print help message and exit") - ("output,o", po::value(), - "write rule table to arg instead of standard output") - ; - - // Declare the command line options that are hidden from the user - // (these are used as positional options). - po::options_description hidden("Hidden options"); - hidden.add_options() - ("input", po::value(), "input file") - ; - - // Compose the full set of command-line options. - po::options_description cmdLineOptions; - cmdLineOptions.add(visible).add(hidden); - - // Register the positional options. - po::positional_options_description p; - p.add("input", 1); - - // Process the command-line. - po::variables_map vm; - try { - po::store(po::command_line_parser(argc, argv). - options(cmdLineOptions).positional(p).run(), vm); - po::notify(vm); - } catch (const std::exception &e) { - std::ostringstream msg; - msg << e.what() << "\n\n" << visible; - error(msg.str()); - std::exit(1); - } - - if (vm.count("help")) { - std::cout << visible << std::endl; - std::exit(0); - } - - // Process positional options. - - if (vm.count("input")) { - options.inputFile = vm["input"].as(); - } - - // Process remaining options. - - if (vm.count("output")) { - options.outputFile = vm["output"].as(); - } -} - -void Compactify::encodePhrase(const std::string &lhs, const StringPhrase &rhs, - SymbolSet &symbolSet, SymbolPhrase &vec) const { - vec.clear(); - vec.reserve(rhs.size()+1); - SymbolIDType id = symbolSet.insert(lhs); - vec.push_back(id); - for (std::vector::const_iterator p = rhs.begin(); - p != rhs.end(); ++p) { - SymbolIDType id = symbolSet.insert(*p); - vec.push_back(id); - } -} - -} // namespace moses diff --git a/scripts/training/compact-rule-table/Compactify.h b/scripts/training/compact-rule-table/Compactify.h deleted file mode 100644 index ef5c1d9ec..000000000 --- a/scripts/training/compact-rule-table/Compactify.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once -#ifndef COMPACTIFY_H_ -#define COMPACTIFY_H_ - -#include "NumberedSet.h" -#include "Tool.h" - -#include -#include - -namespace moses { - -struct Options; - -// Tool for converting a rule table into a more compact format. -class Compactify : public Tool { - public: - Compactify() : Tool("compactify") {} - virtual int main(int, char *[]); - private: - typedef unsigned int SymbolIDType; - typedef unsigned int PhraseIDType; - typedef unsigned int AlignmentSetIDType; - typedef std::vector StringPhrase; - typedef std::vector SymbolPhrase; - typedef std::pair AlignmentPair; - typedef std::set AlignmentSet; - typedef NumberedSet SymbolSet; - typedef NumberedSet PhraseSet; - typedef NumberedSet AlignmentSetSet; - - void processOptions(int, char *[], Options &) const; - - // Given the string representations of a source or target LHS and RHS, encode - // the symbols using the given SymbolSet and create a SymbolPhrase object. - // The LHS index is the first element of the SymbolPhrase. - void encodePhrase(const std::string &, const StringPhrase &, - SymbolSet &, SymbolPhrase &) const; -}; - -} // namespace moses - -#endif diff --git a/scripts/training/compact-rule-table/Compactify_Main.cpp b/scripts/training/compact-rule-table/Compactify_Main.cpp deleted file mode 100644 index 5ae8833c1..000000000 --- a/scripts/training/compact-rule-table/Compactify_Main.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include "Compactify.h" - -int main(int argc, char *argv[]) { - moses::Compactify tool; - return tool.main(argc, argv); -} diff --git a/scripts/training/compact-rule-table/Exception.h b/scripts/training/compact-rule-table/Exception.h deleted file mode 100644 index 3868fcec5..000000000 --- a/scripts/training/compact-rule-table/Exception.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once -#ifndef EXCEPTION_H_ -#define EXCEPTION_H_ - -#include - -namespace moses { - -class Exception { - public: - Exception(const char *msg) : m_msg(msg) {} - Exception(const std::string &msg) : m_msg(msg) {} - - const std::string &getMsg() const { return m_msg; } - - private: - std::string m_msg; -}; - -} // namespace moses - -#endif diff --git a/scripts/training/compact-rule-table/Jamfile b/scripts/training/compact-rule-table/Jamfile deleted file mode 100644 index 09f45d454..000000000 --- a/scripts/training/compact-rule-table/Jamfile +++ /dev/null @@ -1,3 +0,0 @@ -exe compactify : [ glob *.cpp ] ../../..//boost_program_options ; - -install tools : compactify : EXE ; diff --git a/scripts/training/compact-rule-table/NumberedSet.h b/scripts/training/compact-rule-table/NumberedSet.h deleted file mode 100644 index 72c01cb30..000000000 --- a/scripts/training/compact-rule-table/NumberedSet.h +++ /dev/null @@ -1,79 +0,0 @@ -#pragma once -#ifndef NUMBEREDSET_H_ -#define NUMBEREDSET_H_ - -#include "Exception.h" - -#include - -#include -#include -#include - -namespace moses { - -// Stores a set of elements of type T, each of which is allocated an integral -// ID of type IdType. IDs are contiguous starting at 0. Elements cannot be -// removed. -template -class NumberedSet { - private: - typedef boost::unordered_map ElementToIdMap; - typedef std::vector IdToElementMap; - - public: - typedef typename IdToElementMap::const_iterator const_iterator; - - NumberedSet() {} - - const_iterator begin() const { return m_idToElement.begin(); } - const_iterator end() const { return m_idToElement.end(); } - - // Static value - static IdType nullID() { return std::numeric_limits::max(); } - - bool empty() const { return m_idToElement.empty(); } - size_t size() const { return m_idToElement.size(); } - - IdType lookup(const T &) const; - const T &lookup(IdType) const; - - // Insert the given object and return its ID. - IdType insert(const T &); - - private: - ElementToIdMap m_elementToId; - IdToElementMap m_idToElement; -}; - -template -IdType NumberedSet::lookup(const T &s) const { - typename ElementToIdMap::const_iterator p = m_elementToId.find(s); - return (p == m_elementToId.end()) ? nullID() : p->second; -} - -template -const T &NumberedSet::lookup(IdType id) const { - if (id < 0 || id >= m_idToElement.size()) { - std::ostringstream msg; - msg << "Value not found: " << id; - throw Exception(msg.str()); - } - return *(m_idToElement[id]); -} - -template -IdType NumberedSet::insert(const T &x) { - std::pair value(x, m_idToElement.size()); - std::pair result = - m_elementToId.insert(value); - if (result.second) { - // x is a new element. - m_idToElement.push_back(&result.first->first); - } - return result.first->second; -} - -} // namespace moses - -#endif diff --git a/scripts/training/compact-rule-table/Options.h b/scripts/training/compact-rule-table/Options.h deleted file mode 100644 index f3fdb9139..000000000 --- a/scripts/training/compact-rule-table/Options.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#ifndef OPTIONS_H_ -#define OPTIONS_H_ - -#include - -namespace moses { - -struct Options { - public: - Options() {} - std::string inputFile; - std::string outputFile; -}; - -} // namespace moses - -#endif diff --git a/scripts/training/compact-rule-table/RuleTableParser.cpp b/scripts/training/compact-rule-table/RuleTableParser.cpp deleted file mode 100644 index 6272079fd..000000000 --- a/scripts/training/compact-rule-table/RuleTableParser.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include "RuleTableParser.h" - -#include "Exception.h" - -#include -#include - -#include - -#include -#include - -namespace moses { - -RuleTableParser::RuleTableParser() - : m_input(0) { -} - -RuleTableParser::RuleTableParser(std::istream &input) - : m_input(&input) { - ++(*this); -} - -RuleTableParser & RuleTableParser::operator++() { - if (!m_input) { - return *this; - } - if (!std::getline(*m_input, m_line)) { - m_input = 0; - return *this; - } - parseLine(m_line); - return *this; -} - -RuleTableParser RuleTableParser::operator++(int) { - RuleTableParser tmp(*this); - ++(*this); - return tmp; -} - -void RuleTableParser::parseLine(const std::string &line) { - // Source symbols - size_t pos = line.find("|||"); - if (pos == std::string::npos) { - throw Exception("missing first delimiter"); - } - std::string text = line.substr(0, pos); - boost::trim(text); - m_value.sourceRhs.clear(); - boost::split(m_value.sourceRhs, text, boost::algorithm::is_space(), - boost::algorithm::token_compress_on); - m_value.sourceLhs = m_value.sourceRhs.back(); - m_value.sourceRhs.pop_back(); - std::for_each(m_value.sourceRhs.begin(), m_value.sourceRhs.end(), - trimPairedSymbolFromRight); - - // Target symbols - size_t begin = pos+3; - pos = line.find("|||", begin); - if (pos == std::string::npos) { - throw Exception("missing second delimiter"); - } - text = line.substr(begin, pos-begin); - boost::trim(text); - m_value.targetRhs.clear(); - boost::split(m_value.targetRhs, text, boost::algorithm::is_space(), - boost::algorithm::token_compress_on); - m_value.targetLhs = m_value.targetRhs.back(); - m_value.targetRhs.pop_back(); - std::for_each(m_value.targetRhs.begin(), m_value.targetRhs.end(), - trimPairedSymbolFromLeft); - - // Scores - begin = pos+3; - pos = line.find("|||", begin); - if (pos == std::string::npos) { - throw Exception("missing third delimiter"); - } - text = line.substr(begin, pos-begin); - boost::trim(text); - m_value.scores.clear(); - boost::split(m_value.scores, text, boost::algorithm::is_space(), - boost::algorithm::token_compress_on); - - // Alignments - begin = pos+3; - pos = line.find("|||", begin); - if (pos == std::string::npos) { - throw Exception("missing fourth delimiter"); - } - text = line.substr(begin, pos-begin); - m_value.alignments.clear(); - boost::trim(text); - // boost::split behaves differently between versions on empry strings - if (!text.empty()) { - tmpStringVec.clear(); - boost::split(tmpStringVec, text, boost::algorithm::is_space(), - boost::algorithm::token_compress_on); - for (std::vector::const_iterator p = tmpStringVec.begin(); - p != tmpStringVec.end(); ++p) { - assert(!p->empty()); - std::vector tmpVec; - tmpVec.reserve(2); - boost::split(tmpVec, *p, boost::algorithm::is_any_of("-")); - if (tmpVec.size() != 2) { - throw Exception("bad alignment pair"); - } - std::pair alignmentPair; - alignmentPair.first = boost::lexical_cast(tmpVec[0]); - alignmentPair.second = boost::lexical_cast(tmpVec[1]); - m_value.alignments.insert(alignmentPair); - } - } - - // Counts + everything else (the 'tail') - begin = pos+3; - pos = line.find("|||", begin); - if (pos == std::string::npos) { - text = line.substr(begin); - m_value.tail.clear(); - } else { - text = line.substr(begin, pos-begin); - m_value.tail = line.substr(pos+3); - } - boost::trim(text); - m_value.counts.clear(); - boost::split(m_value.counts, text, boost::algorithm::is_space(), - boost::algorithm::token_compress_on); -} - -void RuleTableParser::trimPairedSymbolFromLeft(std::string &s) { - size_t len = s.size(); - if (len < 2 || s[0] != '[' || s[len-1] != ']') { - return; - } - size_t pos = s.find('[', 1); - if (pos == std::string::npos) { - std::ostringstream msg; - msg << "malformed non-terminal pair: " << s; - throw Exception(msg.str()); - } - s.erase(0, pos); -} - -void RuleTableParser::trimPairedSymbolFromRight(std::string &s) { - size_t len = s.size(); - if (len < 2 || s[0] != '[' || s[len-1] != ']') { - return; - } - size_t pos = s.find('[', 1); - if (pos == std::string::npos) { - std::ostringstream msg; - msg << "malformed non-terminal pair: " << s; - throw Exception(msg.str()); - } - s.resize(pos); -} - -bool operator==(const RuleTableParser &lhs, const RuleTableParser &rhs) { - return lhs.m_input == rhs.m_input; -} - -bool operator!=(const RuleTableParser &lhs, const RuleTableParser &rhs) { - return !(lhs == rhs); -} - -} // namespace moses diff --git a/scripts/training/compact-rule-table/RuleTableParser.h b/scripts/training/compact-rule-table/RuleTableParser.h deleted file mode 100644 index 5599e63de..000000000 --- a/scripts/training/compact-rule-table/RuleTableParser.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once -#ifndef RULETABLEPARSER_H_INCLUDED_ -#define RULETABLEPARSER_H_INCLUDED_ - -#include -#include -#include -#include -#include - -namespace moses { - -class RuleTableParser { - public: - struct Entry { - std::string sourceLhs; - std::vector sourceRhs; - std::string targetLhs; - std::vector targetRhs; - std::vector scores; - std::set > alignments; - std::vector counts; - std::string tail; - }; - - RuleTableParser(); - RuleTableParser(std::istream &); - - const Entry &operator*() const { return m_value; } - const Entry *operator->() const { return &m_value; } - - RuleTableParser &operator++(); - RuleTableParser operator++(int); - - friend bool operator==(const RuleTableParser &, const RuleTableParser &); - friend bool operator!=(const RuleTableParser &, const RuleTableParser &); - - private: - Entry m_value; - std::istream *m_input; - std::string m_line; - std::vector tmpStringVec; - - void parseLine(const std::string &); - static void trimPairedSymbolFromLeft(std::string &); - static void trimPairedSymbolFromRight(std::string &); -}; - -} // namespace moses - -#endif diff --git a/scripts/training/compact-rule-table/Tool.h b/scripts/training/compact-rule-table/Tool.h deleted file mode 100644 index 4a7e1e0e1..000000000 --- a/scripts/training/compact-rule-table/Tool.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once -#ifndef TOOL_H_ -#define TOOL_H_ - -#include -#include -#include - -namespace moses { - -class Tool { - public: - Tool(const std::string &name) : m_name(name) {} - - const std::string &getName() const { return m_name; } - - virtual int main(int argc, char *argv[]) = 0; - - void warn(const std::string &msg) const { - std::cerr << m_name << ": warning: " << msg << std::endl; - } - - void error(const std::string &msg) const { - std::cerr << m_name << ": error: " << msg << std::endl; - std::exit(1); - } - - private: - std::string m_name; -}; - -} // namespace moses - -#endif diff --git a/scripts/training/phrase-extract/extract-ghkm/Jamfile b/scripts/training/phrase-extract/extract-ghkm/Jamfile deleted file mode 100644 index 92adc2126..000000000 --- a/scripts/training/phrase-extract/extract-ghkm/Jamfile +++ /dev/null @@ -1 +0,0 @@ -exe extract-ghkm : [ glob *.cpp ] ..//filestreams ..//trees ../../../..//boost_iostreams ../../../..//boost_program_options ../../../..//z ; diff --git a/scripts/training/phrase-extract/pcfg-extract/Jamfile b/scripts/training/phrase-extract/pcfg-extract/Jamfile deleted file mode 100644 index be91d6d2f..000000000 --- a/scripts/training/phrase-extract/pcfg-extract/Jamfile +++ /dev/null @@ -1 +0,0 @@ -exe pcfg-extract : [ glob *.cc ] ..//pcfg-common ../../../..//boost_program_options ; diff --git a/scripts/training/phrase-extract/pcfg-score/Jamfile b/scripts/training/phrase-extract/pcfg-score/Jamfile deleted file mode 100644 index 7225381c0..000000000 --- a/scripts/training/phrase-extract/pcfg-score/Jamfile +++ /dev/null @@ -1 +0,0 @@ -exe pcfg-score : [ glob *.cc ] ..//pcfg-common ../../../..//boost_program_options ; diff --git a/scripts/training/symal/Jamfile b/symal/Jamfile similarity index 100% rename from scripts/training/symal/Jamfile rename to symal/Jamfile diff --git a/scripts/training/symal/cmd.c b/symal/cmd.c similarity index 100% rename from scripts/training/symal/cmd.c rename to symal/cmd.c diff --git a/scripts/training/symal/cmd.h b/symal/cmd.h similarity index 100% rename from scripts/training/symal/cmd.h rename to symal/cmd.h diff --git a/scripts/training/symal/giza2bal.pl b/symal/giza2bal.pl similarity index 100% rename from scripts/training/symal/giza2bal.pl rename to symal/giza2bal.pl diff --git a/scripts/training/symal/symal.cpp b/symal/symal.cpp similarity index 100% rename from scripts/training/symal/symal.cpp rename to symal/symal.cpp diff --git a/scripts/training/symal/symal.vcproj b/symal/symal.vcproj similarity index 100% rename from scripts/training/symal/symal.vcproj rename to symal/symal.vcproj