diff --git a/misc/Jamfile b/misc/Jamfile index 1e5929570..81fcb7043 100644 --- a/misc/Jamfile +++ b/misc/Jamfile @@ -46,4 +46,4 @@ $(TOP)//boost_iostreams $(TOP)//boost_program_options ; -alias programs : 1-1-Extraction TMining generateSequences processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin programsProbing merge-sortedprune PhraseTable ; +alias programs : 1-1-Extraction TMining generateSequences processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable ; diff --git a/misc/prunePhraseTable.cpp b/misc/prunePhraseTable.cpp index 745b9cb45..a6158a4ab 100644 --- a/misc/prunePhraseTable.cpp +++ b/misc/prunePhraseTable.cpp @@ -34,7 +34,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include "moses/InputPath.h" #include "moses/Parameter.h" +#include "moses/TranslationModel/PhraseDictionary.h" #include "moses/StaticData.h" #include "util/file_piece.hh" @@ -55,7 +57,45 @@ static void usage(const po::options_description& desc, char** argv) { } //Find top n translations of source, and send them to output -static void outputTopN(const StringPiece& sourcePhrase, const StaticData& staticData, ostream& out) { +static void outputTopN(const StringPiece& sourcePhraseString, PhraseDictionary* phraseTable, const std::vector &input, ostream& out) { + //get list of target phrases + Phrase sourcePhrase; + sourcePhrase.CreateFromString(Input,input,sourcePhraseString,NULL); + InputPath inputPath(sourcePhrase, NonTerminalSet(), WordsRange(0,sourcePhrase.GetSize()-1),NULL,NULL); + InputPathList inputPaths; + inputPaths.push_back(&inputPath); + phraseTable->GetTargetPhraseCollectionBatch(inputPaths); + + + //EvaluateInIsolation ?? + const TargetPhraseCollection* targetPhrases = inputPath.GetTargetPhrases(*phraseTable); + + //sort by total score and prune + // - Already done? + + + //print phrases + const std::vector& output = StaticData::Instance().GetOutputFactorOrder(); + if (targetPhrases) { + for (TargetPhraseCollection::const_iterator i = targetPhrases->begin(); i != targetPhrases->end(); ++i) { + const TargetPhrase* targetPhrase = *i; + out << sourcePhrase.GetStringRep(input); + out << " ||| "; + out << targetPhrase->GetStringRep(output); + out << " ||| "; + const ScoreComponentCollection scores = targetPhrase->GetScoreBreakdown(); + vector phraseScores = scores.GetScoresForProducer(phraseTable); + for (size_t j = 0; j < phraseScores.size(); ++j) { + out << exp(phraseScores[j]) << " "; + } + out << "||| "; + const AlignmentInfo& align = targetPhrase->GetAlignTerm(); + for (AlignmentInfo::const_iterator j = align.begin(); j != align.end(); ++j) { + out << j->first << "-" << j->second << " "; + } + out << endl; + } + } } @@ -63,7 +103,6 @@ int main(int argc, char** argv) { bool help; string input_file; - string output_file; string config_file; @@ -71,15 +110,15 @@ int main(int argc, char** argv) desc.add_options() ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit") ("input-file,i", po::value(&input_file), "Input file") - ("output-file,o", po::value(&output_file), "Output file") ("config-file,f", po::value(&config_file), "Config file") ; po::options_description cmdline_options; cmdline_options.add(desc); po::variables_map vm; - po::store(po::command_line_parser(argc,argv). - options(cmdline_options).run(), vm); + po::parsed_options parsed = po::command_line_parser(argc,argv). + options(cmdline_options).allow_unregistered().run(); + po::store(parsed, vm); po::notify(vm); if (help) { usage(desc, argv); @@ -90,11 +129,6 @@ int main(int argc, char** argv) usage(desc, argv); exit(1); } - if (output_file.empty()) { - cerr << "ERROR: Please specify an output file" << endl << endl; - usage(desc, argv); - exit(1); - } if (config_file.empty()) { cerr << "ERROR: Please specify a config file" << endl << endl; usage(desc, argv); @@ -105,6 +139,19 @@ int main(int argc, char** argv) mosesargs.push_back(argv[0]); mosesargs.push_back("-f"); mosesargs.push_back(config_file); + for (size_t i = 0; i < parsed.options.size(); ++i) { + if (parsed.options[i].position_key == -1 && !parsed.options[i].unregistered) continue; + const string& key = parsed.options[i].string_key; + if (!key.empty()) { + mosesargs.push_back(key); + } + for (size_t j = 0; j < parsed.options[i].value.size(); ++j) { + const string& value = parsed.options[i].value[j]; + if (!value.empty()) { + mosesargs.push_back(value); + } + } + } boost::scoped_ptr params(new Parameter()); char** mosesargv = new char*[mosesargs.size()]; @@ -122,11 +169,27 @@ int main(int argc, char** argv) exit(1); } + const StaticData &staticData = StaticData::Instance(); + const std::vector & input = staticData.GetInputFactorOrder(); + + //Find the phrase table to evaluate with + PhraseDictionary* phraseTable = NULL; + const vector& ffs = FeatureFunction::GetFeatureFunctions(); + for (size_t i = 0; i < ffs.size(); ++i) { + PhraseDictionary* maybePhraseTable = dynamic_cast< PhraseDictionary*>(ffs[i]); + if (maybePhraseTable) { + UTIL_THROW_IF(phraseTable,util::Exception,"Can only score translations with one phrase table"); + phraseTable = maybePhraseTable; + } + } + UTIL_THROW_IF(!phraseTable,util::Exception,"Unable to find scoring phrase table"); + + Sentence sentence; + phraseTable->InitializeForInput(sentence); // //Load and prune the phrase table. This is taken (with mods) from moses/TranslationModel/RuleTable/LoaderStandard.cpp // - const StaticData &staticData = StaticData::Instance(); string lineOrig; @@ -152,9 +215,11 @@ int main(int argc, char** argv) util::TokenIter pipes(line, "|||"); StringPiece sourcePhraseString(*pipes); if (sourcePhraseString != previous) { - outputTopN(previous, staticData, cout); + outputTopN(previous, phraseTable, input, cout); + previous = sourcePhraseString; } } + outputTopN(previous, phraseTable, input, cout);