diff --git a/mert/ForestRescoreTest.cpp b/mert/ForestRescoreTest.cpp index 91c4fe4f3..8d220fb7d 100644 --- a/mert/ForestRescoreTest.cpp +++ b/mert/ForestRescoreTest.cpp @@ -245,102 +245,3 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice) BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]); } -BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph) -{ - Vocab vocab; - //References - ReferenceSet references; - references.AddLine(0,"in addition to EU support for businesses , also the administration of national business support will be concentrated in four Centres for Economic Development , Transport and Environment ( ELY Centres ) , starting from mid @-@ September .",vocab); - //Load the hypergraph - Graph graph(vocab); - util::scoped_fd fd(util::OpenReadOrThrow("mert/hgtest/0.gz")); - util::FilePiece file(fd.release()); - ReadGraph(file,graph); - - //prune - SparseVector weights; - weights.set("OpSequenceModel0_1",0.011187); - weights.set("OpSequenceModel0_2",-0.002797); - weights.set("OpSequenceModel0_3",0.002797); - weights.set("OpSequenceModel0_4",-0.000140); - weights.set("OpSequenceModel0_5",0.004195); - weights.set("Distortion0",0.041952); - weights.set("PhrasePenalty0",0.027968); - weights.set("WordPenalty0",-0.139841); - weights.set("UnknownWordPenalty0",1.000000); - weights.set("LM0",0.069920); - weights.set("LexicalReordering0_1",0.041952); - weights.set("LexicalReordering0_2",0.041952); - weights.set("LexicalReordering0_3",0.041952); - weights.set("LexicalReordering0_4",0.041952); - weights.set("LexicalReordering0_5",0.041952); - weights.set("LexicalReordering0_6",0.041952); - weights.set("LexicalReordering0_7",0.041952); - weights.set("LexicalReordering0_8",0.041952); - weights.set("TranslationModel0_1",0.027968); - weights.set("TranslationModel0_2",0.027968); - weights.set("TranslationModel0_3",0.027968); - weights.set("TranslationModel0_4",0.027968); - weights.set("TranslationModel0_5",0.027968); - weights.set("TranslationModel0_6",0.027968); - weights.set("TranslationModel0_7",0.027968); - weights.set("TranslationModel0_8",0.027968); - weights.set("TranslationModel0_9",0.027968); - weights.set("TranslationModel0_10",0.027968); - weights.set("TranslationModel0_11",0.027968); - weights.set("TranslationModel0_12",0.027968); - weights.set("TranslationModel0_13",0.027968); - size_t edgeCount = 500; - boost::shared_ptr prunedGraph; - prunedGraph.reset(new Graph(vocab)); - graph.Prune(prunedGraph.get(), weights, edgeCount); - - vector bg(9); - HgHypothesis bestHypo; - //best hypothesis - Viterbi(*prunedGraph, weights, 0, references, 0, bg, &bestHypo); - //check output as expected - string expectedStr = " the EU matters , but also the national matters management focus since mid @-@ September four ely @-@ centre . "; - util::TokenIter expected(expectedStr, util::SingleCharacter(' ')); - for (size_t i = 0; i < bestHypo.text.size(); ++i) { - //cerr << bestHypo.text[i]->first << " "; - BOOST_CHECK_EQUAL(*expected,bestHypo.text[i]->first); - ++expected; - } - BOOST_CHECK(!expected); - //cerr << endl; - //check scores - BOOST_CHECK_CLOSE(-80.062,bestHypo.featureVector.get("OpSequenceModel0_1"), 0.001); - BOOST_CHECK_CLOSE(2,bestHypo.featureVector.get("OpSequenceModel0_2"), 0.001); - BOOST_CHECK_CLOSE(2,bestHypo.featureVector.get("OpSequenceModel0_3"), 0.001); - BOOST_CHECK_CLOSE(3,bestHypo.featureVector.get("OpSequenceModel0_4"), 0.001); - BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("OpSequenceModel0_5"), 0.001); - BOOST_CHECK_CLOSE(-6,bestHypo.featureVector.get("Distortion0"), 0.001); - BOOST_CHECK_CLOSE(14,bestHypo.featureVector.get("PhrasePenalty0"), 0.001); - BOOST_CHECK_CLOSE(-20,bestHypo.featureVector.get("WordPenalty0"), 0.001); - BOOST_CHECK_CLOSE(-100,bestHypo.featureVector.get("UnknownWordPenalty0"), 0.001); - BOOST_CHECK_CLOSE(-126.616,bestHypo.featureVector.get("LM0"), 0.001); - BOOST_CHECK_CLOSE(-5.2238,bestHypo.featureVector.get("LexicalReordering0_1"), 0.001); - BOOST_CHECK_CLOSE(-0.29515,bestHypo.featureVector.get("LexicalReordering0_2"), 0.001); - BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("LexicalReordering0_3"), 0.001); - BOOST_CHECK_CLOSE(-0.470004,bestHypo.featureVector.get("LexicalReordering0_4"), 0.001); - BOOST_CHECK_CLOSE(-9.28267,bestHypo.featureVector.get("LexicalReordering0_5"), 0.001); - BOOST_CHECK_CLOSE(-0.470004,bestHypo.featureVector.get("LexicalReordering0_6"), 0.001); - BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("LexicalReordering0_7"), 0.001); - BOOST_CHECK_CLOSE(-0.402678,bestHypo.featureVector.get("LexicalReordering0_8"), 0.001); - BOOST_CHECK_CLOSE(-54.3119,bestHypo.featureVector.get("TranslationModel0_1"), 0.001); - BOOST_CHECK_CLOSE(-62.2619,bestHypo.featureVector.get("TranslationModel0_2"), 0.001); - BOOST_CHECK_CLOSE(-23.8782,bestHypo.featureVector.get("TranslationModel0_3"), 0.001); - BOOST_CHECK_CLOSE(-25.1626,bestHypo.featureVector.get("TranslationModel0_4"), 0.001); - BOOST_CHECK_CLOSE(12.9986,bestHypo.featureVector.get("TranslationModel0_5"), 0.001); - BOOST_CHECK_CLOSE(3.99959,bestHypo.featureVector.get("TranslationModel0_6"), 0.001); - BOOST_CHECK_CLOSE(1.99979,bestHypo.featureVector.get("TranslationModel0_7"), 0.001); - BOOST_CHECK_CLOSE(1.99979,bestHypo.featureVector.get("TranslationModel0_8"), 0.001); - BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("TranslationModel0_9"), 0.001); - BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("TranslationModel0_10"), 0.001); - BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("TranslationModel0_11"), 0.001); - BOOST_CHECK_CLOSE(0.999896,bestHypo.featureVector.get("TranslationModel0_12"), 0.001); - BOOST_CHECK_CLOSE(7.99917,bestHypo.featureVector.get("TranslationModel0_13"), 0.001); -} - - diff --git a/mert/Jamfile b/mert/Jamfile index 51736dace..f4e2cb9a0 100644 --- a/mert/Jamfile +++ b/mert/Jamfile @@ -72,7 +72,9 @@ exe pro : pro.cpp mert_lib ..//boost_program_options ; exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ..//boost_filesystem ; -alias programs : mert extractor evaluator pro kbmira sentence-bleu sentence-bleu-nbest ; +exe hgdecode : hgdecode.cpp mert_lib ..//boost_program_options ..//boost_filesystem ; + +alias programs : mert extractor evaluator pro kbmira sentence-bleu sentence-bleu-nbest hgdecode ; unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ; unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ; diff --git a/mert/hgdecode.cpp b/mert/hgdecode.cpp new file mode 100644 index 000000000..0b1f230f8 --- /dev/null +++ b/mert/hgdecode.cpp @@ -0,0 +1,108 @@ +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2015- University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +/** + * Used to test that hypergraph decoding works correctly. +**/ +#include + +#include +#include + +#include "HopeFearDecoder.h" + +using namespace std; +using namespace MosesTuning; + +namespace po = boost::program_options; + + +int main(int argc, char** argv) +{ + bool help; + string denseInitFile; + string sparseInitFile; + string hypergraphFile; + size_t edgeCount = 500; + + po::options_description desc("Allowed options"); + desc.add_options() + ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit") + ("dense-init,d", po::value(&denseInitFile), "Weight file for dense features.") + ("sparse-init,s", po::value(&sparseInitFile), "Weight file for sparse features") + ("hypergraph,g", po::value(&hypergraphFile), "File containing compressed hypergraph") + ; + + po::options_description cmdline_options; + cmdline_options.add(desc); + po::variables_map vm; + po::store(po::command_line_parser(argc,argv). + options(cmdline_options).run(), vm); + po::notify(vm); + if (help) { + cout << "Usage: " + string(argv[0]) + " [options]" << endl; + cout << desc << endl; + exit(0); + } + + if (hypergraphFile.empty()) { + cerr << "Error: missing hypergraph file" << endl; + exit(1); + } + + Vocab vocab; + + //Add dummy reference + ReferenceSet references; + references.AddLine(0,"blah blah blah", vocab); + + //Load weights + pair ret = InitialiseWeights(denseInitFile, sparseInitFile, "hypergraph", true); + boost::scoped_ptr wv(ret.first); + size_t initDenseSize = ret.second; + SparseVector weights; + wv->ToSparse(&weights, initDenseSize); + + //Load hypergraph + Graph graph(vocab); + util::scoped_fd fd(util::OpenReadOrThrow(hypergraphFile.c_str())); + util::FilePiece file(fd.release()); + ReadGraph(file,graph); + + boost::shared_ptr prunedGraph; + prunedGraph.reset(new Graph(vocab)); + graph.Prune(prunedGraph.get(), weights, edgeCount); + + vector bg(9); + HgHypothesis bestHypo; + //best hypothesis + Viterbi(*prunedGraph, weights, 0, references, 0, bg, &bestHypo); + + for (size_t i = 0; i < bestHypo.text.size(); ++i) { + cout << bestHypo.text[i]->first << " "; + } + cout << endl; + + //write weights + cerr << "WEIGHTS "; + bestHypo.featureVector.write(cerr); + cerr << endl; + +} + diff --git a/mert/hgtest/0.gz b/mert/hgtest/0.gz deleted file mode 100644 index 012f9efbe..000000000 Binary files a/mert/hgtest/0.gz and /dev/null differ