mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-07-14 23:00:29 +03:00
move hg decoding test to reg tests
This commit is contained in:
parent
e34db401ee
commit
89b364bbf0
@ -245,102 +245,3 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
|
||||
BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph)
|
||||
{
|
||||
Vocab vocab;
|
||||
//References
|
||||
ReferenceSet references;
|
||||
references.AddLine(0,"in addition to EU support for businesses , also the administration of national business support will be concentrated in four Centres for Economic Development , Transport and Environment ( ELY Centres ) , starting from mid @-@ September .",vocab);
|
||||
//Load the hypergraph
|
||||
Graph graph(vocab);
|
||||
util::scoped_fd fd(util::OpenReadOrThrow("mert/hgtest/0.gz"));
|
||||
util::FilePiece file(fd.release());
|
||||
ReadGraph(file,graph);
|
||||
|
||||
//prune
|
||||
SparseVector weights;
|
||||
weights.set("OpSequenceModel0_1",0.011187);
|
||||
weights.set("OpSequenceModel0_2",-0.002797);
|
||||
weights.set("OpSequenceModel0_3",0.002797);
|
||||
weights.set("OpSequenceModel0_4",-0.000140);
|
||||
weights.set("OpSequenceModel0_5",0.004195);
|
||||
weights.set("Distortion0",0.041952);
|
||||
weights.set("PhrasePenalty0",0.027968);
|
||||
weights.set("WordPenalty0",-0.139841);
|
||||
weights.set("UnknownWordPenalty0",1.000000);
|
||||
weights.set("LM0",0.069920);
|
||||
weights.set("LexicalReordering0_1",0.041952);
|
||||
weights.set("LexicalReordering0_2",0.041952);
|
||||
weights.set("LexicalReordering0_3",0.041952);
|
||||
weights.set("LexicalReordering0_4",0.041952);
|
||||
weights.set("LexicalReordering0_5",0.041952);
|
||||
weights.set("LexicalReordering0_6",0.041952);
|
||||
weights.set("LexicalReordering0_7",0.041952);
|
||||
weights.set("LexicalReordering0_8",0.041952);
|
||||
weights.set("TranslationModel0_1",0.027968);
|
||||
weights.set("TranslationModel0_2",0.027968);
|
||||
weights.set("TranslationModel0_3",0.027968);
|
||||
weights.set("TranslationModel0_4",0.027968);
|
||||
weights.set("TranslationModel0_5",0.027968);
|
||||
weights.set("TranslationModel0_6",0.027968);
|
||||
weights.set("TranslationModel0_7",0.027968);
|
||||
weights.set("TranslationModel0_8",0.027968);
|
||||
weights.set("TranslationModel0_9",0.027968);
|
||||
weights.set("TranslationModel0_10",0.027968);
|
||||
weights.set("TranslationModel0_11",0.027968);
|
||||
weights.set("TranslationModel0_12",0.027968);
|
||||
weights.set("TranslationModel0_13",0.027968);
|
||||
size_t edgeCount = 500;
|
||||
boost::shared_ptr<Graph> prunedGraph;
|
||||
prunedGraph.reset(new Graph(vocab));
|
||||
graph.Prune(prunedGraph.get(), weights, edgeCount);
|
||||
|
||||
vector<ValType> bg(9);
|
||||
HgHypothesis bestHypo;
|
||||
//best hypothesis
|
||||
Viterbi(*prunedGraph, weights, 0, references, 0, bg, &bestHypo);
|
||||
//check output as expected
|
||||
string expectedStr = "<s> the EU matters , but also the national matters management focus since mid @-@ September four ely @-@ centre . </s>";
|
||||
util::TokenIter<util::SingleCharacter, true> expected(expectedStr, util::SingleCharacter(' '));
|
||||
for (size_t i = 0; i < bestHypo.text.size(); ++i) {
|
||||
//cerr << bestHypo.text[i]->first << " ";
|
||||
BOOST_CHECK_EQUAL(*expected,bestHypo.text[i]->first);
|
||||
++expected;
|
||||
}
|
||||
BOOST_CHECK(!expected);
|
||||
//cerr << endl;
|
||||
//check scores
|
||||
BOOST_CHECK_CLOSE(-80.062,bestHypo.featureVector.get("OpSequenceModel0_1"), 0.001);
|
||||
BOOST_CHECK_CLOSE(2,bestHypo.featureVector.get("OpSequenceModel0_2"), 0.001);
|
||||
BOOST_CHECK_CLOSE(2,bestHypo.featureVector.get("OpSequenceModel0_3"), 0.001);
|
||||
BOOST_CHECK_CLOSE(3,bestHypo.featureVector.get("OpSequenceModel0_4"), 0.001);
|
||||
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("OpSequenceModel0_5"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-6,bestHypo.featureVector.get("Distortion0"), 0.001);
|
||||
BOOST_CHECK_CLOSE(14,bestHypo.featureVector.get("PhrasePenalty0"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-20,bestHypo.featureVector.get("WordPenalty0"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-100,bestHypo.featureVector.get("UnknownWordPenalty0"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-126.616,bestHypo.featureVector.get("LM0"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-5.2238,bestHypo.featureVector.get("LexicalReordering0_1"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-0.29515,bestHypo.featureVector.get("LexicalReordering0_2"), 0.001);
|
||||
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("LexicalReordering0_3"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-0.470004,bestHypo.featureVector.get("LexicalReordering0_4"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-9.28267,bestHypo.featureVector.get("LexicalReordering0_5"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-0.470004,bestHypo.featureVector.get("LexicalReordering0_6"), 0.001);
|
||||
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("LexicalReordering0_7"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-0.402678,bestHypo.featureVector.get("LexicalReordering0_8"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-54.3119,bestHypo.featureVector.get("TranslationModel0_1"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-62.2619,bestHypo.featureVector.get("TranslationModel0_2"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-23.8782,bestHypo.featureVector.get("TranslationModel0_3"), 0.001);
|
||||
BOOST_CHECK_CLOSE(-25.1626,bestHypo.featureVector.get("TranslationModel0_4"), 0.001);
|
||||
BOOST_CHECK_CLOSE(12.9986,bestHypo.featureVector.get("TranslationModel0_5"), 0.001);
|
||||
BOOST_CHECK_CLOSE(3.99959,bestHypo.featureVector.get("TranslationModel0_6"), 0.001);
|
||||
BOOST_CHECK_CLOSE(1.99979,bestHypo.featureVector.get("TranslationModel0_7"), 0.001);
|
||||
BOOST_CHECK_CLOSE(1.99979,bestHypo.featureVector.get("TranslationModel0_8"), 0.001);
|
||||
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("TranslationModel0_9"), 0.001);
|
||||
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("TranslationModel0_10"), 0.001);
|
||||
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("TranslationModel0_11"), 0.001);
|
||||
BOOST_CHECK_CLOSE(0.999896,bestHypo.featureVector.get("TranslationModel0_12"), 0.001);
|
||||
BOOST_CHECK_CLOSE(7.99917,bestHypo.featureVector.get("TranslationModel0_13"), 0.001);
|
||||
}
|
||||
|
||||
|
||||
|
@ -72,7 +72,9 @@ exe pro : pro.cpp mert_lib ..//boost_program_options ;
|
||||
|
||||
exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ..//boost_filesystem ;
|
||||
|
||||
alias programs : mert extractor evaluator pro kbmira sentence-bleu sentence-bleu-nbest ;
|
||||
exe hgdecode : hgdecode.cpp mert_lib ..//boost_program_options ..//boost_filesystem ;
|
||||
|
||||
alias programs : mert extractor evaluator pro kbmira sentence-bleu sentence-bleu-nbest hgdecode ;
|
||||
|
||||
unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
|
108
mert/hgdecode.cpp
Normal file
108
mert/hgdecode.cpp
Normal file
@ -0,0 +1,108 @@
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2015- University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
/**
|
||||
* Used to test that hypergraph decoding works correctly.
|
||||
**/
|
||||
#include <iostream>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
|
||||
#include "HopeFearDecoder.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace MosesTuning;
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
bool help;
|
||||
string denseInitFile;
|
||||
string sparseInitFile;
|
||||
string hypergraphFile;
|
||||
size_t edgeCount = 500;
|
||||
|
||||
po::options_description desc("Allowed options");
|
||||
desc.add_options()
|
||||
("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
|
||||
("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features.")
|
||||
("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
|
||||
("hypergraph,g", po::value<string>(&hypergraphFile), "File containing compressed hypergraph")
|
||||
;
|
||||
|
||||
po::options_description cmdline_options;
|
||||
cmdline_options.add(desc);
|
||||
po::variables_map vm;
|
||||
po::store(po::command_line_parser(argc,argv).
|
||||
options(cmdline_options).run(), vm);
|
||||
po::notify(vm);
|
||||
if (help) {
|
||||
cout << "Usage: " + string(argv[0]) + " [options]" << endl;
|
||||
cout << desc << endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (hypergraphFile.empty()) {
|
||||
cerr << "Error: missing hypergraph file" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Vocab vocab;
|
||||
|
||||
//Add dummy reference
|
||||
ReferenceSet references;
|
||||
references.AddLine(0,"blah blah blah", vocab);
|
||||
|
||||
//Load weights
|
||||
pair<MiraWeightVector*, size_t> ret = InitialiseWeights(denseInitFile, sparseInitFile, "hypergraph", true);
|
||||
boost::scoped_ptr<MiraWeightVector> wv(ret.first);
|
||||
size_t initDenseSize = ret.second;
|
||||
SparseVector weights;
|
||||
wv->ToSparse(&weights, initDenseSize);
|
||||
|
||||
//Load hypergraph
|
||||
Graph graph(vocab);
|
||||
util::scoped_fd fd(util::OpenReadOrThrow(hypergraphFile.c_str()));
|
||||
util::FilePiece file(fd.release());
|
||||
ReadGraph(file,graph);
|
||||
|
||||
boost::shared_ptr<Graph> prunedGraph;
|
||||
prunedGraph.reset(new Graph(vocab));
|
||||
graph.Prune(prunedGraph.get(), weights, edgeCount);
|
||||
|
||||
vector<ValType> bg(9);
|
||||
HgHypothesis bestHypo;
|
||||
//best hypothesis
|
||||
Viterbi(*prunedGraph, weights, 0, references, 0, bg, &bestHypo);
|
||||
|
||||
for (size_t i = 0; i < bestHypo.text.size(); ++i) {
|
||||
cout << bestHypo.text[i]->first << " ";
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
//write weights
|
||||
cerr << "WEIGHTS ";
|
||||
bestHypo.featureVector.write(cerr);
|
||||
cerr << endl;
|
||||
|
||||
}
|
||||
|
BIN
mert/hgtest/0.gz
BIN
mert/hgtest/0.gz
Binary file not shown.
Loading…
Reference in New Issue
Block a user