mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
phrase table pruning
This commit is contained in:
parent
260954bfd5
commit
18437a0351
@ -46,4 +46,4 @@ $(TOP)//boost_iostreams
|
||||
$(TOP)//boost_program_options
|
||||
;
|
||||
|
||||
alias programs : 1-1-Extraction TMining generateSequences processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin programsProbing merge-sortedprune PhraseTable ;
|
||||
alias programs : 1-1-Extraction TMining generateSequences processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable ;
|
||||
|
@ -34,7 +34,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
|
||||
#include "moses/InputPath.h"
|
||||
#include "moses/Parameter.h"
|
||||
#include "moses/TranslationModel/PhraseDictionary.h"
|
||||
#include "moses/StaticData.h"
|
||||
|
||||
#include "util/file_piece.hh"
|
||||
@ -55,7 +57,45 @@ static void usage(const po::options_description& desc, char** argv) {
|
||||
}
|
||||
|
||||
//Find top n translations of source, and send them to output
|
||||
static void outputTopN(const StringPiece& sourcePhrase, const StaticData& staticData, ostream& out) {
|
||||
static void outputTopN(const StringPiece& sourcePhraseString, PhraseDictionary* phraseTable, const std::vector<FactorType> &input, ostream& out) {
|
||||
//get list of target phrases
|
||||
Phrase sourcePhrase;
|
||||
sourcePhrase.CreateFromString(Input,input,sourcePhraseString,NULL);
|
||||
InputPath inputPath(sourcePhrase, NonTerminalSet(), WordsRange(0,sourcePhrase.GetSize()-1),NULL,NULL);
|
||||
InputPathList inputPaths;
|
||||
inputPaths.push_back(&inputPath);
|
||||
phraseTable->GetTargetPhraseCollectionBatch(inputPaths);
|
||||
|
||||
|
||||
//EvaluateInIsolation ??
|
||||
const TargetPhraseCollection* targetPhrases = inputPath.GetTargetPhrases(*phraseTable);
|
||||
|
||||
//sort by total score and prune
|
||||
// - Already done?
|
||||
|
||||
|
||||
//print phrases
|
||||
const std::vector<FactorType>& output = StaticData::Instance().GetOutputFactorOrder();
|
||||
if (targetPhrases) {
|
||||
for (TargetPhraseCollection::const_iterator i = targetPhrases->begin(); i != targetPhrases->end(); ++i) {
|
||||
const TargetPhrase* targetPhrase = *i;
|
||||
out << sourcePhrase.GetStringRep(input);
|
||||
out << " ||| ";
|
||||
out << targetPhrase->GetStringRep(output);
|
||||
out << " ||| ";
|
||||
const ScoreComponentCollection scores = targetPhrase->GetScoreBreakdown();
|
||||
vector<float> phraseScores = scores.GetScoresForProducer(phraseTable);
|
||||
for (size_t j = 0; j < phraseScores.size(); ++j) {
|
||||
out << exp(phraseScores[j]) << " ";
|
||||
}
|
||||
out << "||| ";
|
||||
const AlignmentInfo& align = targetPhrase->GetAlignTerm();
|
||||
for (AlignmentInfo::const_iterator j = align.begin(); j != align.end(); ++j) {
|
||||
out << j->first << "-" << j->second << " ";
|
||||
}
|
||||
out << endl;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -63,7 +103,6 @@ int main(int argc, char** argv)
|
||||
{
|
||||
bool help;
|
||||
string input_file;
|
||||
string output_file;
|
||||
string config_file;
|
||||
|
||||
|
||||
@ -71,15 +110,15 @@ int main(int argc, char** argv)
|
||||
desc.add_options()
|
||||
("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
|
||||
("input-file,i", po::value<string>(&input_file), "Input file")
|
||||
("output-file,o", po::value<string>(&output_file), "Output file")
|
||||
("config-file,f", po::value<string>(&config_file), "Config file")
|
||||
;
|
||||
|
||||
po::options_description cmdline_options;
|
||||
cmdline_options.add(desc);
|
||||
po::variables_map vm;
|
||||
po::store(po::command_line_parser(argc,argv).
|
||||
options(cmdline_options).run(), vm);
|
||||
po::parsed_options parsed = po::command_line_parser(argc,argv).
|
||||
options(cmdline_options).allow_unregistered().run();
|
||||
po::store(parsed, vm);
|
||||
po::notify(vm);
|
||||
if (help) {
|
||||
usage(desc, argv);
|
||||
@ -90,11 +129,6 @@ int main(int argc, char** argv)
|
||||
usage(desc, argv);
|
||||
exit(1);
|
||||
}
|
||||
if (output_file.empty()) {
|
||||
cerr << "ERROR: Please specify an output file" << endl << endl;
|
||||
usage(desc, argv);
|
||||
exit(1);
|
||||
}
|
||||
if (config_file.empty()) {
|
||||
cerr << "ERROR: Please specify a config file" << endl << endl;
|
||||
usage(desc, argv);
|
||||
@ -105,6 +139,19 @@ int main(int argc, char** argv)
|
||||
mosesargs.push_back(argv[0]);
|
||||
mosesargs.push_back("-f");
|
||||
mosesargs.push_back(config_file);
|
||||
for (size_t i = 0; i < parsed.options.size(); ++i) {
|
||||
if (parsed.options[i].position_key == -1 && !parsed.options[i].unregistered) continue;
|
||||
const string& key = parsed.options[i].string_key;
|
||||
if (!key.empty()) {
|
||||
mosesargs.push_back(key);
|
||||
}
|
||||
for (size_t j = 0; j < parsed.options[i].value.size(); ++j) {
|
||||
const string& value = parsed.options[i].value[j];
|
||||
if (!value.empty()) {
|
||||
mosesargs.push_back(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
boost::scoped_ptr<Parameter> params(new Parameter());
|
||||
char** mosesargv = new char*[mosesargs.size()];
|
||||
@ -122,11 +169,27 @@ int main(int argc, char** argv)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::vector<FactorType> & input = staticData.GetInputFactorOrder();
|
||||
|
||||
//Find the phrase table to evaluate with
|
||||
PhraseDictionary* phraseTable = NULL;
|
||||
const vector<FeatureFunction*>& ffs = FeatureFunction::GetFeatureFunctions();
|
||||
for (size_t i = 0; i < ffs.size(); ++i) {
|
||||
PhraseDictionary* maybePhraseTable = dynamic_cast< PhraseDictionary*>(ffs[i]);
|
||||
if (maybePhraseTable) {
|
||||
UTIL_THROW_IF(phraseTable,util::Exception,"Can only score translations with one phrase table");
|
||||
phraseTable = maybePhraseTable;
|
||||
}
|
||||
}
|
||||
UTIL_THROW_IF(!phraseTable,util::Exception,"Unable to find scoring phrase table");
|
||||
|
||||
Sentence sentence;
|
||||
phraseTable->InitializeForInput(sentence);
|
||||
|
||||
//
|
||||
//Load and prune the phrase table. This is taken (with mods) from moses/TranslationModel/RuleTable/LoaderStandard.cpp
|
||||
//
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
string lineOrig;
|
||||
|
||||
@ -152,9 +215,11 @@ int main(int argc, char** argv)
|
||||
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
|
||||
StringPiece sourcePhraseString(*pipes);
|
||||
if (sourcePhraseString != previous) {
|
||||
outputTopN(previous, staticData, cout);
|
||||
outputTopN(previous, phraseTable, input, cout);
|
||||
previous = sourcePhraseString;
|
||||
}
|
||||
}
|
||||
outputTopN(previous, phraseTable, input, cout);
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user