From ccfe8ba018f57169d4f4c9af8fcbd418eed79cf9 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Tue, 10 Nov 2015 21:35:08 +0000 Subject: [PATCH 01/32] remove unused method, and misleading comment --- moses/ThreadPool.cpp | 3 --- moses/ThreadPool.h | 3 --- scripts/ems/experiment.meta | 2 +- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/moses/ThreadPool.cpp b/moses/ThreadPool.cpp index cba459697..deb6acb67 100644 --- a/moses/ThreadPool.cpp +++ b/moses/ThreadPool.cpp @@ -55,9 +55,6 @@ void ThreadPool::Execute() } //Execute job if (task) { - // must read from task before run. otherwise task may be deleted by main thread - // race condition - task->DeleteAfterExecution(); task->Run(); } m_threadAvailable.notify_all(); diff --git a/moses/ThreadPool.h b/moses/ThreadPool.h index b7d459bb2..f5be5e52b 100644 --- a/moses/ThreadPool.h +++ b/moses/ThreadPool.h @@ -53,9 +53,6 @@ class Task { public: virtual void Run() = 0; - virtual bool DeleteAfterExecution() { - return true; - } virtual ~Task() {} }; diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta index 562cf8ec2..29719d878 100644 --- a/scripts/ems/experiment.meta +++ b/scripts/ems/experiment.meta @@ -741,7 +741,7 @@ build-reordering build-ttable in: extracted-phrases lexical-translation-table corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains out: phrase-translation-table - rerun-on-change: translation-factors hierarchical-rule-set score-settings training-options script EVALUATION:report-precision-by-coverage include-word-alignment-in-rules domain-features + rerun-on-change: translation-factors hierarchical-rule-set score-settings training-options script include-word-alignment-in-rules domain-features default-name: model/phrase-table ignore-if: suffix-array mmsapt final-model: yes From 7c088c9a95ee6fdbd211f2b2ae9606ecf1ccba3c Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Tue, 17 Nov 2015 14:19:36 +0100 Subject: [PATCH 02/32] first commit of placeholder changes to moses server --- moses/server/TranslationRequest.cpp | 37 +++++++++++++++++++++++++---- moses/server/TranslationRequest.h | 2 +- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 8f15b30ff..f23b378f6 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -11,6 +11,8 @@ using Moses::StaticData; using Moses::Range; using Moses::ChartHypothesis; using Moses::Phrase; +using Moses::Factor; +using Moses::FactorType; using Moses::Manager; using Moses::SearchGraphNode; using Moses::TrellisPathList; @@ -164,16 +166,43 @@ insertGraphInfo(Manager& manager, map& retData) retData["sg"] = xmlrpc_c::value_array(searchGraphXml); } +// void +// TranslationRequest:: +// output_phrase(ostream& out, Phrase const& phrase) const +// { +// if (!m_options.output.ReportAllFactors) { +// for (size_t i = 0 ; i < phrase.GetSize(); ++i) +// out << *phrase.GetFactor(i, 0) << " "; +// } else out << phrase; +// } + void TranslationRequest:: -output_phrase(ostream& out, Phrase const& phrase) const +output_phrase(ostream& out, const Hypothesis* hypo) const { + Phrase phrase = hypo->GetCurrTargetPhrase(); + if (!m_options.output.ReportAllFactors) { - for (size_t i = 0 ; i < phrase.GetSize(); ++i) - out << *phrase.GetFactor(i, 0) << " "; + FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor(); + std::map placeholders; + if (placeholderFactor != NOT_FOUND) { + // creates map of target position -> factor for placeholders + placeholders = GetPlaceholders(*hypo, placeholderFactor); + } + for (size_t i = 0 ; i < phrase.GetSize(); ++i) { + const Factor *factor = phrase.GetFactor(i, 0); + if (placeholders.size()) { + // do placeholders + std::map::const_iterator iter = placeholders.find(i); + if (iter != placeholders.end()) { + factor = iter->second; + } + } + out << *factor << " "; + } } else out << phrase; } - + void TranslationRequest:: outputNBest(const Manager& manager, map& retData) diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h index b93043b9b..24adb7888 100644 --- a/moses/server/TranslationRequest.h +++ b/moses/server/TranslationRequest.h @@ -68,7 +68,7 @@ TranslationRequest : public virtual Moses::TranslationTask void - output_phrase(std::ostream& out, Moses::Phrase const& phrase) const; + output_phrase(std::ostream& out, const Moses::Hypothesis* hypo) const; void add_phrase_aln_info(Moses::Hypothesis const& h, From 66928f682f55cb6e6b105dd71a358354963e91d3 Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Tue, 17 Nov 2015 15:06:48 +0100 Subject: [PATCH 03/32] second commit of moses server placeholder fix --- moses/server/TranslationRequest.cpp | 47 +++++++---------------------- moses/server/TranslationRequest.h | 8 ++--- 2 files changed, 15 insertions(+), 40 deletions(-) diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index f23b378f6..433e37a27 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -3,6 +3,8 @@ #include "moses/ContextScope.h" #include #include "moses/Util.h" +#include "moses/Hypothesis.h" + namespace MosesServer { using namespace std; @@ -11,8 +13,6 @@ using Moses::StaticData; using Moses::Range; using Moses::ChartHypothesis; using Moses::Phrase; -using Moses::Factor; -using Moses::FactorType; using Moses::Manager; using Moses::SearchGraphNode; using Moses::TrellisPathList; @@ -175,33 +175,6 @@ insertGraphInfo(Manager& manager, map& retData) // out << *phrase.GetFactor(i, 0) << " "; // } else out << phrase; // } - -void -TranslationRequest:: -output_phrase(ostream& out, const Hypothesis* hypo) const -{ - Phrase phrase = hypo->GetCurrTargetPhrase(); - - if (!m_options.output.ReportAllFactors) { - FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor(); - std::map placeholders; - if (placeholderFactor != NOT_FOUND) { - // creates map of target position -> factor for placeholders - placeholders = GetPlaceholders(*hypo, placeholderFactor); - } - for (size_t i = 0 ; i < phrase.GetSize(); ++i) { - const Factor *factor = phrase.GetFactor(i, 0); - if (placeholders.size()) { - // do placeholders - std::map::const_iterator iter = placeholders.find(i); - if (iter != placeholders.end()) { - factor = iter->second; - } - } - out << *factor << " "; - } - } else out << phrase; -} void TranslationRequest:: @@ -222,7 +195,7 @@ outputNBest(const Manager& manager, map& retData) vector const& E = path->GetEdges(); if (!E.size()) continue; std::map nBestXmlItem; - pack_hypothesis(E, "hyp", nBestXmlItem); + pack_hypothesis(manager, E, "hyp", nBestXmlItem); if (m_withScoreBreakdown) { // should the score breakdown be reported in a more structured manner? ostringstream buf; @@ -396,13 +369,15 @@ run_chart_decoder() void TranslationRequest:: -pack_hypothesis(vector const& edges, string const& key, +pack_hypothesis(Moses::Manager& manager, vector const& edges, string const& key, map & dest) const { // target string ostringstream target; - BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) - output_phrase(target, e->GetCurrTargetPhrase()); + BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) + manager.OutputSurface(target, *e, StaticData::Instance().GetOutputFactorOrder(), + options().output.ReportSegmentation, m_options.output.ReportAllFactors); +// output_phrase(target, e->GetTargetPhrase()); XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl); dest[key] = xmlrpc_c::value_string(target.str()); @@ -427,7 +402,7 @@ pack_hypothesis(vector const& edges, string const& key, void TranslationRequest:: -pack_hypothesis(Hypothesis const* h, string const& key, +pack_hypothesis(Moses::Manager& manager, Hypothesis const* h, string const& key, map& dest) const { using namespace std; @@ -450,8 +425,8 @@ run_phrase_decoder() manager.Decode(); - - pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData); + + pack_hypothesis(manager, manager.GetBestHypothesis(), "text", m_retData); if (m_session_id) m_retData["session-id"] = xmlrpc_c::value_int(m_session_id); diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h index 24adb7888..f3984d35a 100644 --- a/moses/server/TranslationRequest.h +++ b/moses/server/TranslationRequest.h @@ -58,17 +58,17 @@ TranslationRequest : public virtual Moses::TranslationTask run_phrase_decoder(); void - pack_hypothesis(std::vector const& edges, + pack_hypothesis(Moses::Manager& manager, std::vector const& edges, std::string const& key, std::map & dest) const; void - pack_hypothesis(Moses::Hypothesis const* h, std::string const& key, + pack_hypothesis(Moses::Manager& manager, Moses::Hypothesis const* h, std::string const& key, std::map & dest) const; - void - output_phrase(std::ostream& out, const Moses::Hypothesis* hypo) const; +// void +// output_phrase(std::ostream& out, Moses::Phrase const& phrase) const; void add_phrase_aln_info(Moses::Hypothesis const& h, From 5e1340cf63b1e263ff7336075bf791642fd616db Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Tue, 17 Nov 2015 15:16:39 +0100 Subject: [PATCH 04/32] fixed moses server placehoders ; correctly implemented the boolean parameter check function for mosesserver request parameters --- moses/server/TranslationRequest.cpp | 13 ++++++++----- moses/server/TranslationRequest.h | 4 ++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 433e37a27..fca1c85a5 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -264,8 +264,11 @@ bool check(std::map const& param, std::string const key) { - std::map::const_iterator m; - return (param.find(key) != param.end()); + std::map::const_iterator m = param.find(key); + if(m == param.end()) return false; + std::string val = string(xmlrpc_c::value_string(m->second)); + if(val == "true" || val == "True" || val == "TRUE" || val == "1") return true; + return false; } void @@ -369,7 +372,7 @@ run_chart_decoder() void TranslationRequest:: -pack_hypothesis(Moses::Manager& manager, vector const& edges, string const& key, +pack_hypothesis(const Moses::Manager& manager, vector const& edges, string const& key, map & dest) const { // target string @@ -402,14 +405,14 @@ pack_hypothesis(Moses::Manager& manager, vector const& edges void TranslationRequest:: -pack_hypothesis(Moses::Manager& manager, Hypothesis const* h, string const& key, +pack_hypothesis(const Moses::Manager& manager, Hypothesis const* h, string const& key, map& dest) const { using namespace std; vector edges; for (; h; h = h->GetPrevHypo()) edges.push_back(h); - pack_hypothesis(edges, key, dest); + pack_hypothesis(manager, edges, key, dest); } diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h index f3984d35a..94d055fd2 100644 --- a/moses/server/TranslationRequest.h +++ b/moses/server/TranslationRequest.h @@ -58,12 +58,12 @@ TranslationRequest : public virtual Moses::TranslationTask run_phrase_decoder(); void - pack_hypothesis(Moses::Manager& manager, std::vector const& edges, + pack_hypothesis(const Moses::Manager& manager, std::vector const& edges, std::string const& key, std::map & dest) const; void - pack_hypothesis(Moses::Manager& manager, Moses::Hypothesis const* h, std::string const& key, + pack_hypothesis(const Moses::Manager& manager, Moses::Hypothesis const* h, std::string const& key, std::map & dest) const; From 220d82041ad1113c2c5a241df100c1feacb19729 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Wed, 18 Nov 2015 10:35:17 +0000 Subject: [PATCH 05/32] gcc-4.4.7 was struggling with boost::intrusive_ptr. Switched to shared_ptr. --- moses/TranslationModel/UG/mm/ug_bitext_sampler.h | 2 +- moses/TranslationModel/UG/mm/ug_prep_phrases.h | 4 ++-- moses/TranslationModel/UG/mmsapt.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/moses/TranslationModel/UG/mm/ug_bitext_sampler.h b/moses/TranslationModel/UG/mm/ug_bitext_sampler.h index a36e0772f..17de9c051 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_sampler.h +++ b/moses/TranslationModel/UG/mm/ug_bitext_sampler.h @@ -52,7 +52,7 @@ BitextSampler : public Moses::reference_counter // const members // SPTR const m_bitext; // keep bitext alive while I am // should be an - iptr const m_bitext; // keep bitext alive as long as I am + SPTR const m_bitext; // keep bitext alive as long as I am size_t const m_plen; // length of lookup phrase bool const m_fwd; // forward or backward direction? SPTR const m_root; // root of suffix array diff --git a/moses/TranslationModel/UG/mm/ug_prep_phrases.h b/moses/TranslationModel/UG/mm/ug_prep_phrases.h index 1c62db2e3..1d04d6680 100644 --- a/moses/TranslationModel/UG/mm/ug_prep_phrases.h +++ b/moses/TranslationModel/UG/mm/ug_prep_phrases.h @@ -16,7 +16,7 @@ struct StatsCollector typedef lru_cache::LRU_Cache< uint64_t, pstats > hcache_t; typedef ThreadSafeContainer > pcache_t; typedef map > lcache_t; - iptr const> bitext; // underlying bitext + SPTR const> bitext; // underlying bitext sampling_method method; // sampling method size_t sample_size; // sample size SPTR bias; // sampling bias @@ -26,7 +26,7 @@ struct StatsCollector SPTR lcache; // local cache ug::ThreadPool* tpool; // thread pool to run jobs on - StatsCollector(iptr > xbitext, + StatsCollector(SPTR > xbitext, SPTR const xbias) : method(ranked_sampling) , sample_size(100) diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h index d30177518..9dbd869df 100644 --- a/moses/TranslationModel/UG/mmsapt.h +++ b/moses/TranslationModel/UG/mmsapt.h @@ -71,7 +71,7 @@ namespace Moses typedef sapt::PhraseScorer pscorer; private: // vector > shards; - iptr btfix; + SPTR btfix; SPTR btdyn; std::string m_bname, m_extra_data, m_bias_file,m_bias_server; std::string L1; @@ -160,7 +160,7 @@ namespace Moses #if PROVIDES_RANKED_SAMPLING void - set_bias_for_ranking(ttasksptr const& ttask, iptr const> bt); + set_bias_for_ranking(ttasksptr const& ttask, SPTR const> bt); #endif private: From 4e8396744cec72d7f2926c2d972b86f3b20cbb93 Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Wed, 18 Nov 2015 16:21:50 +0100 Subject: [PATCH 06/32] added first version of regression testing for moses server --- moses/server/TranslationRequest.cpp | 16 ++------ moses/server/TranslationRequest.h | 4 -- regression-testing/Jamfile | 12 +++++- regression-testing/run-single-test.perl | 52 ++++++++++++++++++++++++- run-regtests.sh | 7 +++- 5 files changed, 69 insertions(+), 22 deletions(-) diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index fca1c85a5..2317d6434 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -165,16 +165,6 @@ insertGraphInfo(Manager& manager, map& retData) } retData["sg"] = xmlrpc_c::value_array(searchGraphXml); } - -// void -// TranslationRequest:: -// output_phrase(ostream& out, Phrase const& phrase) const -// { -// if (!m_options.output.ReportAllFactors) { -// for (size_t i = 0 ; i < phrase.GetSize(); ++i) -// out << *phrase.GetFactor(i, 0) << " "; -// } else out << phrase; -// } void TranslationRequest:: @@ -380,7 +370,7 @@ pack_hypothesis(const Moses::Manager& manager, vector const& BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) manager.OutputSurface(target, *e, StaticData::Instance().GetOutputFactorOrder(), options().output.ReportSegmentation, m_options.output.ReportAllFactors); -// output_phrase(target, e->GetTargetPhrase()); + XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl); dest[key] = xmlrpc_c::value_string(target.str()); @@ -390,7 +380,7 @@ pack_hypothesis(const Moses::Manager& manager, vector const& vector p_aln; BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) - add_phrase_aln_info(*e, p_aln); + add_phrase_aln_info(*e, p_aln); dest["align"] = xmlrpc_c::value_array(p_aln); } @@ -398,7 +388,7 @@ pack_hypothesis(const Moses::Manager& manager, vector const& // word alignment, if requested vector w_aln; BOOST_FOREACH(Hypothesis const* e, edges) - e->OutputLocalWordAlignment(w_aln); + e->OutputLocalWordAlignment(w_aln); dest["word-align"] = xmlrpc_c::value_array(w_aln); } } diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h index 94d055fd2..8d97aed42 100644 --- a/moses/server/TranslationRequest.h +++ b/moses/server/TranslationRequest.h @@ -66,10 +66,6 @@ TranslationRequest : public virtual Moses::TranslationTask pack_hypothesis(const Moses::Manager& manager, Moses::Hypothesis const* h, std::string const& key, std::map & dest) const; - -// void -// output_phrase(std::ostream& out, Moses::Phrase const& phrase) const; - void add_phrase_aln_info(Moses::Hypothesis const& h, std::vector& aInfo) const; diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile index 3f07744c6..f2649908c 100644 --- a/regression-testing/Jamfile +++ b/regression-testing/Jamfile @@ -1,6 +1,7 @@ import option path ; with-regtest = [ option.get "with-regtest" ] ; +with-xmlrpc = [ option.get "with-xmlrpc-c" ] ; if $(with-regtest) { with-regtest = [ path.root $(with-regtest) [ path.pwd ] ] ; @@ -24,7 +25,11 @@ if $(with-regtest) { actions reg_test_decode { $(TOP)/regression-testing/run-single-test.perl --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<) } + actions reg_test_decode_server { + $(TOP)/regression-testing/run-single-test.perl --server --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<) + } reg_test phrase : [ glob $(test-dir)/phrase.* ] : ../moses-cmd//moses : @reg_test_decode ; + reg_test phrase-server : [ glob $(test-dir)/phrase.* ] : ../moses-cmd//moses : @reg_test_decode_server ; reg_test chart : [ glob $(test-dir)/chart.* ] : ../moses-cmd//moses : @reg_test_decode ; actions reg_test_score { @@ -55,5 +60,10 @@ if $(with-regtest) { reg_test misc : [ glob $(test-dir)/misc.* : $(test-dir)/misc.mml* ] : ..//prefix-bin ..//prefix-lib : @reg_test_misc ; reg_test misc-mml : [ glob $(test-dir)/misc.mml* ] : $(TOP)/scripts/ems/support/mml-filter.py $(TOP)/scripts/ems/support/defaultconfig.py : @reg_test_misc ; - alias all : phrase chart mert score extract extractrules misc misc-mml ; + if $(with-xmlrpc) { + alias all : phrase phrase-server chart mert score extract extractrules misc misc-mml ; + } + else { + alias all : phrase chart mert score extract extractrules misc misc-mml ; + } } diff --git a/regression-testing/run-single-test.perl b/regression-testing/run-single-test.perl index 94a247e46..f5ff96e0a 100755 --- a/regression-testing/run-single-test.perl +++ b/regression-testing/run-single-test.perl @@ -2,6 +2,9 @@ # $Id$ +use Encode; +use XMLRPC::Lite; +use utf8; use warnings; use strict; my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; } @@ -17,12 +20,16 @@ my $data_dir; my $BIN_TEST = $script_dir; my $results_dir; my $NBEST = 0; +my $run_server_test = 0; +my $serverport = 16531; +my $url = "http://localhost:$serverport/RPC2"; GetOptions("decoder=s" => \$decoder, "test=s" => \$test_name, "data-dir=s"=> \$data_dir, "test-dir=s"=> \$test_dir, "results-dir=s"=> \$results_dir, + "server"=> \$run_server_test, ) or exit 1; die "Please specify a decoder with --decoder\n" unless $decoder; @@ -72,8 +79,13 @@ if (!-d $truth) { } print "RESULTS AVAILABLE IN: $results\n\n"; - -my ($o, $elapsed, $ec, $sig) = exec_moses($decoder, $local_moses_ini, $input, $results); +my ($o, $elapsed, $ec, $sig); +if($run_server_test) { + ($o, $elapsed, $ec, $sig) = exec_moses($decoder, $local_moses_ini, $input, $results); +} +else { + ($o, $elapsed, $ec, $sig) = exec_moses_server($decoder, $local_moses_ini, $input, $results); +} my $error = ($sig || $ec > 0); if ($error) { open OUT, ">$results/Summary"; @@ -139,6 +151,42 @@ sub exec_moses { return ($o, $elapsed, $ec, $sig); } +sub exec_moses_server { + my ($decoder, $conf, $input, $results) = @_; + my $start_time = time; + my ($o, $ec, $sig); + my $pid = fork(); + if (not defined $pid) { + warn "resources not avilable to fork Moses server\n"; + $ec = 1; # to generate error + $sig = 'SIGABRT'; + } elsif ($pid == 0) { + warn "Starting Moses server...\n"; + ($o, $ec, $sig) = run_command("$decoder --server --server-port $serverport -f $conf --server-log $results/run.stderr"); + # this should not be reached unless the server fails to start + } + else { + sleep 10; + my $proxy = XMLRPC::Lite->proxy($url); + open(TEXTIN, "$input") or die "Can not open the input file to translate with Moses server\n"; + binmode TEXTIN, ':utf8'; + open(TEXTOUT, ">$results/run.stdout"); + binmode TEXTOUT, ':utf8'; + while() + { + chop; + my $encoded = SOAP::Data->type(string => Encode::encode("utf8", $_)); + my %param = ("text" => $encoded); + my $result = $proxy->call("translate",\%param)->result; + print TEXTOUT $result->{'text'} . "\n"; + } + close(TEXTOUT); + kill('-SIGTERM', $pid); + } + my $elapsed = time - $start_time; + return ($o, $elapsed, $ec, $sig); +} + sub run_command { my ($cmd) = @_; my $o = `$cmd`; diff --git a/run-regtests.sh b/run-regtests.sh index 8b64eac22..5b814e444 100755 --- a/run-regtests.sh +++ b/run-regtests.sh @@ -7,8 +7,11 @@ set -e -o pipefail git submodule init git submodule update regtest +RECOMPILE=${RECOMPILE:-"-a"} + # test compilation without xmlrpc-c -./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --no-xmlrpc-c --with-regtest=./regtest -a -q $@ || exit $? +# ./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --no-xmlrpc-c --with-regtest=./regtest $RECOMPILE -q $@ || exit $? # test compilation with xmlrpc-c -./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-regtest=./regtest -a -q $@ +./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-regtest=./regtest $RECOMPILE -q $@ + From fb1195e14cd549f230594bbc2a73c7dbf46952e1 Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Wed, 18 Nov 2015 16:54:34 +0100 Subject: [PATCH 07/32] fixed an error in regression testing for moses server --- regression-testing/Jamfile | 2 +- regression-testing/run-single-test.perl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile index f2649908c..2a1b58364 100644 --- a/regression-testing/Jamfile +++ b/regression-testing/Jamfile @@ -29,7 +29,7 @@ if $(with-regtest) { $(TOP)/regression-testing/run-single-test.perl --server --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<) } reg_test phrase : [ glob $(test-dir)/phrase.* ] : ../moses-cmd//moses : @reg_test_decode ; - reg_test phrase-server : [ glob $(test-dir)/phrase.* ] : ../moses-cmd//moses : @reg_test_decode_server ; + reg_test phrase-server : [ glob $(test-dir)/phrase-server.* ] : ../moses-cmd//moses : @reg_test_decode_server ; reg_test chart : [ glob $(test-dir)/chart.* ] : ../moses-cmd//moses : @reg_test_decode ; actions reg_test_score { diff --git a/regression-testing/run-single-test.perl b/regression-testing/run-single-test.perl index f5ff96e0a..70bb12b65 100755 --- a/regression-testing/run-single-test.perl +++ b/regression-testing/run-single-test.perl @@ -81,10 +81,10 @@ if (!-d $truth) { print "RESULTS AVAILABLE IN: $results\n\n"; my ($o, $elapsed, $ec, $sig); if($run_server_test) { - ($o, $elapsed, $ec, $sig) = exec_moses($decoder, $local_moses_ini, $input, $results); + ($o, $elapsed, $ec, $sig) = exec_moses_server($decoder, $local_moses_ini, $input, $results); } else { - ($o, $elapsed, $ec, $sig) = exec_moses_server($decoder, $local_moses_ini, $input, $results); + ($o, $elapsed, $ec, $sig) = exec_moses($decoder, $local_moses_ini, $input, $results); } my $error = ($sig || $ec > 0); if ($error) { From edacfbb9fd3830382e3e98b7ece4fd830937e657 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Wed, 18 Nov 2015 17:09:52 +0000 Subject: [PATCH 08/32] Bug fix in server: Moses server now handles the placeholder mechanism for translations provided with the input. --- moses/server/TranslationRequest.cpp | 39 +++++++++++++++++------------ moses/server/TranslationRequest.h | 10 +++++--- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 8f15b30ff..642a72f22 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -164,15 +164,15 @@ insertGraphInfo(Manager& manager, map& retData) retData["sg"] = xmlrpc_c::value_array(searchGraphXml); } -void -TranslationRequest:: -output_phrase(ostream& out, Phrase const& phrase) const -{ - if (!m_options.output.ReportAllFactors) { - for (size_t i = 0 ; i < phrase.GetSize(); ++i) - out << *phrase.GetFactor(i, 0) << " "; - } else out << phrase; -} +// void +// TranslationRequest:: +// output_phrase(ostream& out, Phrase const& phrase) const +// { +// if (!m_options.output.ReportAllFactors) { +// for (size_t i = 0 ; i < phrase.GetSize(); ++i) +// out << *phrase.GetFactor(i, 0) << " "; +// } else out << phrase; +// } void TranslationRequest:: @@ -193,7 +193,7 @@ outputNBest(const Manager& manager, map& retData) vector const& E = path->GetEdges(); if (!E.size()) continue; std::map nBestXmlItem; - pack_hypothesis(E, "hyp", nBestXmlItem); + pack_hypothesis(manager, E, "hyp", nBestXmlItem); if (m_withScoreBreakdown) { // should the score breakdown be reported in a more structured manner? ostringstream buf; @@ -367,13 +367,19 @@ run_chart_decoder() void TranslationRequest:: -pack_hypothesis(vector const& edges, string const& key, - map & dest) const +pack_hypothesis(Moses::Manager const& manager, + vector const& edges, + string const& key, map & dest) const { // target string ostringstream target; BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) - output_phrase(target, e->GetCurrTargetPhrase()); + { + // output_phrase(target, e->GetCurrTargetPhrase()); + manager.OutputSurface(target,*e, m_options.output.factor_order, + m_options.output.ReportSegmentation, + m_options.output.ReportAllFactors); + } XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl); dest[key] = xmlrpc_c::value_string(target.str()); @@ -398,14 +404,15 @@ pack_hypothesis(vector const& edges, string const& key, void TranslationRequest:: -pack_hypothesis(Hypothesis const* h, string const& key, +pack_hypothesis(Moses::Manager const& manager, + Hypothesis const* h, string const& key, map& dest) const { using namespace std; vector edges; for (; h; h = h->GetPrevHypo()) edges.push_back(h); - pack_hypothesis(edges, key, dest); + pack_hypothesis(manager, edges, key, dest); } @@ -422,7 +429,7 @@ run_phrase_decoder() manager.Decode(); - pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData); + pack_hypothesis(manager, manager.GetBestHypothesis(), "text", m_retData); if (m_session_id) m_retData["session-id"] = xmlrpc_c::value_int(m_session_id); diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h index b93043b9b..297720f8e 100644 --- a/moses/server/TranslationRequest.h +++ b/moses/server/TranslationRequest.h @@ -58,17 +58,19 @@ TranslationRequest : public virtual Moses::TranslationTask run_phrase_decoder(); void - pack_hypothesis(std::vector const& edges, + pack_hypothesis(Moses::Manager const& manager, + std::vector const& edges, std::string const& key, std::map & dest) const; void - pack_hypothesis(Moses::Hypothesis const* h, std::string const& key, + pack_hypothesis(Moses::Manager const& manager, + Moses::Hypothesis const* h, std::string const& key, std::map & dest) const; - void - output_phrase(std::ostream& out, Moses::Phrase const& phrase) const; + // void + // output_phrase(std::ostream& out, Moses::Phrase const& phrase) const; void add_phrase_aln_info(Moses::Hypothesis const& h, From b002fade504f6f65e2232414f1688625e8f78190 Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Wed, 18 Nov 2015 13:54:54 -0500 Subject: [PATCH 09/32] Minimal buffering for multi_moses.py Speeds things up when using multi-threaded instances --- scripts/generic/multi_moses.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/generic/multi_moses.py b/scripts/generic/multi_moses.py index 01a38d8e6..f47cba1f5 100755 --- a/scripts/generic/multi_moses.py +++ b/scripts/generic/multi_moses.py @@ -90,9 +90,10 @@ def run_instance(cmd_base, threads, tasks, n_best=False): cmd.append('--threads') cmd.append(str(threads)) try: - # Queue of tasks instance is currently working on, limited to the number of - # threads. The queue should be kept full for optimal CPU usage. - work = Queue.Queue(maxsize=threads) + # Queue of tasks instance is currently working on, limited to the number + # of threads * 2 (minimal buffering). The queue should be kept full for + # optimal CPU usage. + work = Queue.Queue(maxsize=(threads * 2)) # Multi-threaded instance moses = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) From 0f16e804b55f0c4cb96cc4139a7b3ad17fdde546 Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Thu, 19 Nov 2015 04:02:28 -0700 Subject: [PATCH 10/32] continuing to fix regression tests --- regression-testing/run-single-test.perl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/regression-testing/run-single-test.perl b/regression-testing/run-single-test.perl index 70bb12b65..0d5b85f09 100755 --- a/regression-testing/run-single-test.perl +++ b/regression-testing/run-single-test.perl @@ -155,14 +155,17 @@ sub exec_moses_server { my ($decoder, $conf, $input, $results) = @_; my $start_time = time; my ($o, $ec, $sig); + $ec = 0; $sig = 0; $o = 0; my $pid = fork(); if (not defined $pid) { warn "resources not avilable to fork Moses server\n"; $ec = 1; # to generate error - $sig = 'SIGABRT'; +# $sig = 'SIGABRT'; } elsif ($pid == 0) { + setpgrp(0, 0); warn "Starting Moses server...\n"; - ($o, $ec, $sig) = run_command("$decoder --server --server-port $serverport -f $conf --server-log $results/run.stderr"); + ($o, $ec, $sig) = run_command("$decoder --server --server-port $serverport -f $conf -verbose 2 --server-log $results/run.stderr.server 2> $results/run.stderr "); + exit; # this should not be reached unless the server fails to start } else { @@ -175,14 +178,16 @@ sub exec_moses_server { while() { chop; - my $encoded = SOAP::Data->type(string => Encode::encode("utf8", $_)); + my $encoded = SOAP::Data->type(string => $_); my %param = ("text" => $encoded); my $result = $proxy->call("translate",\%param)->result; print TEXTOUT $result->{'text'} . "\n"; } close(TEXTOUT); - kill('-SIGTERM', $pid); + kill 9, -$pid; } + kill 9, -$pid; + waitpid $pid, 0; my $elapsed = time - $start_time; return ($o, $elapsed, $ec, $sig); } From d3fb16d39f4d7b1ff48d90dbaf2550ea7dff0a94 Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Thu, 19 Nov 2015 12:03:19 +0100 Subject: [PATCH 11/32] changed debug output for moses server --- moses/server/TranslationRequest.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 2317d6434..c749d6210 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -370,8 +370,8 @@ pack_hypothesis(const Moses::Manager& manager, vector const& BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) manager.OutputSurface(target, *e, StaticData::Instance().GetOutputFactorOrder(), options().output.ReportSegmentation, m_options.output.ReportAllFactors); - - XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl); + XVERBOSE(1, *(manager.GetBestHypothesis()) << std::endl); +// XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl); dest[key] = xmlrpc_c::value_string(target.str()); From aa7ef1738b506ddb57f2205c4774352dc9b50ea6 Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Thu, 19 Nov 2015 12:20:48 +0100 Subject: [PATCH 12/32] another update in Debug output for Moses server --- moses/server/TranslationRequest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index c749d6210..d6a70ecc6 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -370,7 +370,7 @@ pack_hypothesis(const Moses::Manager& manager, vector const& BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) manager.OutputSurface(target, *e, StaticData::Instance().GetOutputFactorOrder(), options().output.ReportSegmentation, m_options.output.ReportAllFactors); - XVERBOSE(1, *(manager.GetBestHypothesis()) << std::endl); + XVERBOSE(1, "BEST TRANLSLATION:" << *(manager.GetBestHypothesis()) << std::endl); // XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl); dest[key] = xmlrpc_c::value_string(target.str()); From f346dcd37fbaf2381c30d1ec58da7ea9a7c21842 Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Thu, 19 Nov 2015 12:42:07 +0100 Subject: [PATCH 13/32] another fix to debug output for moses server --- moses/server/TranslationRequest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index d6a70ecc6..fa9c9c636 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -370,7 +370,7 @@ pack_hypothesis(const Moses::Manager& manager, vector const& BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) manager.OutputSurface(target, *e, StaticData::Instance().GetOutputFactorOrder(), options().output.ReportSegmentation, m_options.output.ReportAllFactors); - XVERBOSE(1, "BEST TRANLSLATION:" << *(manager.GetBestHypothesis()) << std::endl); + XVERBOSE(1, "BEST TRANSLATION: " << *(manager.GetBestHypothesis()) << std::endl); // XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl); dest[key] = xmlrpc_c::value_string(target.str()); From c610d0a6e74d627801c5603b50a7342c47ad651f Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Thu, 19 Nov 2015 13:54:24 +0000 Subject: [PATCH 14/32] Code decluttering. The class StatefuleFeatureFunction now provides an empty dummy implementation of various (virtual) Evaluate... functions. The corresponding empty implementations on derived classes have been removed. --- defer/ExternalFeature.h | 23 +---------- moses/FF/BleuScoreFeature.h | 16 -------- moses/FF/ConstrainedDecoding.h | 18 --------- moses/FF/ControlRecombination.h | 17 -------- moses/FF/CoveredReferenceFeature.cpp | 5 --- moses/FF/CoveredReferenceFeature.h | 8 ---- moses/FF/DistortionScoreProducer.h | 17 -------- .../FF/LexicalReordering/LexicalReordering.h | 22 ---------- moses/FF/OSM-Feature/OpSequenceModel.h | 12 ------ moses/FF/PhraseBoundaryFeature.h | 17 -------- moses/FF/PhraseOrientationFeature.h | 12 ------ moses/FF/SkeletonStatefulFF.cpp | 14 ++++++- moses/FF/SkeletonStatefulFF.h | 40 +++++++++++++------ moses/FF/StatefulFeatureFunction.h | 17 ++++++++ moses/FF/TargetBigramFeature.h | 16 -------- moses/FF/TargetNgramFeature.h | 18 --------- moses/FF/TreeStructureFeature.h | 16 -------- moses/LM/Base.cpp | 9 +++-- moses/LM/Base.h | 12 ------ moses/LM/BilingualLM.cpp | 16 -------- moses/LM/BilingualLM.h | 17 -------- moses/LM/RDLM.h | 13 +----- 22 files changed, 65 insertions(+), 290 deletions(-) diff --git a/defer/ExternalFeature.h b/defer/ExternalFeature.h index 6c0fb829e..3a284852d 100644 --- a/defer/ExternalFeature.h +++ b/defer/ExternalFeature.h @@ -51,27 +51,8 @@ public: void SetParameter(const std::string& key, const std::string& value); - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const { - } - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedFutureScore = NULL) const { - } - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - - FFState* EvaluateWhenApplied( - const Hypothesis& cur_hypo, - const FFState* prev_state, - ScoreComponentCollection* accumulator) const; + FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, + ScoreComponentCollection* accumulator) const; FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h index 266d06b68..753d92cf9 100644 --- a/moses/FF/BleuScoreFeature.h +++ b/moses/FF/BleuScoreFeature.h @@ -123,22 +123,6 @@ public: FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const; - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const { - } - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const { - } bool Enabled() const { return m_enabled; diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h index 3f28c43e1..769edd80f 100644 --- a/moses/FF/ConstrainedDecoding.h +++ b/moses/FF/ConstrainedDecoding.h @@ -42,24 +42,6 @@ public: return true; } - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const { - } - - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const { - } - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h index 04f7d441b..034b1a790 100644 --- a/moses/FF/ControlRecombination.h +++ b/moses/FF/ControlRecombination.h @@ -58,23 +58,6 @@ public: return true; } - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const { - } - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const { - } - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp index d4606a8ec..5e4ada5b0 100644 --- a/moses/FF/CoveredReferenceFeature.cpp +++ b/moses/FF/CoveredReferenceFeature.cpp @@ -30,11 +30,6 @@ bool CoveredReferenceState::operator==(const FFState& other) const } ////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const -{} void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h index e6fc79ff7..aedfb3793 100644 --- a/moses/FF/CoveredReferenceFeature.h +++ b/moses/FF/CoveredReferenceFeature.h @@ -53,10 +53,6 @@ public: return new CoveredReferenceState(); } - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const; void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase @@ -64,10 +60,6 @@ public: , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedScores = NULL) const; - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h index 6e6bfbaeb..cfe0dc005 100644 --- a/moses/FF/DistortionScoreProducer.h +++ b/moses/FF/DistortionScoreProducer.h @@ -47,23 +47,6 @@ public: throw std::logic_error("DistortionScoreProducer not supported in chart decoder, yet"); } - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const { - } - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const { - } }; } diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h index c47e7f037..fa2747c82 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.h +++ b/moses/FF/LexicalReordering/LexicalReordering.h @@ -64,28 +64,6 @@ public: UTIL_THROW2("LexicalReordering is not valid for chart decoder"); } - void - EvaluateWithSourceContext - (const InputType &input, - const InputPath &inputPath, - const TargetPhrase &targetPhrase, - const StackVec *stackVec, - ScoreComponentCollection& scoreBreakdown, - ScoreComponentCollection* estimatedScores = NULL) const - { } - - void - EvaluateTranslationOptionListWithSourceContext - (const InputType &input, const TranslationOptionList &transOptList) const - { } - - void - EvaluateInIsolation(const Phrase &source, - const TargetPhrase &targetPhrase, - ScoreComponentCollection &scoreBreakdown, - ScoreComponentCollection &estimatedScores) const - { } - bool GetHaveDefaultScores() { return m_haveDefaultScores; diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h index b97450d6b..36a901974 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.h +++ b/moses/FF/OSM-Feature/OpSequenceModel.h @@ -37,18 +37,6 @@ public: int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const { - } - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h index 916225b7d..9e84aaeef 100644 --- a/moses/FF/PhraseBoundaryFeature.h +++ b/moses/FF/PhraseBoundaryFeature.h @@ -54,23 +54,6 @@ public: throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet"); } - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const { - } - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const { - } - void SetParameter(const std::string& key, const std::string& value); private: diff --git a/moses/FF/PhraseOrientationFeature.h b/moses/FF/PhraseOrientationFeature.h index 82d3928d9..4d9dc20dc 100644 --- a/moses/FF/PhraseOrientationFeature.h +++ b/moses/FF/PhraseOrientationFeature.h @@ -296,18 +296,6 @@ public: , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedScores) const; - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const - {}; - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const - {} - FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp index 4b077335e..0af74aabd 100644 --- a/moses/FF/SkeletonStatefulFF.cpp +++ b/moses/FF/SkeletonStatefulFF.cpp @@ -15,12 +15,19 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line) ReadParameters(); } + +// An empty implementation of this function is provided by StatefulFeatureFunction. +// Unless you are actually implementing this, please remove it from your +// implementation (and the declaration in the header file to reduce code clutter. void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedScores) const {} +// An empty implementation of this function is provided by StatefulFeatureFunction. +// Unless you are actually implementing this, please remove it from your +// implementation (and the declaration in the header file to reduce code clutter. void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase @@ -29,8 +36,11 @@ void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input , ScoreComponentCollection *estimatedScores) const {} -void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const +// An empty implementation of this function is provided by StatefulFeatureFunction. +// Unless you are actually implementing this, please remove it from your +// implementation (and the declaration in the header file to reduce code clutter. +void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext +(const InputType &input, const TranslationOptionList &translationOptionList) const {} FFState* SkeletonStatefulFF::EvaluateWhenApplied( diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h index bed54753c..196dcd27c 100644 --- a/moses/FF/SkeletonStatefulFF.h +++ b/moses/FF/SkeletonStatefulFF.h @@ -37,19 +37,35 @@ public: return new SkeletonState(0); } - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const; - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const; + // An empty implementation of this function is provided by StatefulFeatureFunction. + // Unless you are actually implementing this, please remove this declaration here + // and the empty skeleton implementation from the corresponding .cpp + // file to reduce code clutter. + void + EvaluateInIsolation(const Phrase &source + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedScores) const; + + // An empty implementation of this function is provided by StatefulFeatureFunction. + // Unless you are actually implementing this, please remove this declaration here + // and the empty skeleton implementation from the corresponding .cpp + // file to reduce code clutter. + void + EvaluateWithSourceContext(const InputType &input + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , const StackVec *stackVec + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedScores = NULL) const; - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const; + // An empty implementation of this function is provided by StatefulFeatureFunction. + // Unless you are actually implementing this, please remove this declaration here + // and the empty skeleton implementation from the corresponding .cpp + // file to reduce code clutter. + void + EvaluateTranslationOptionListWithSourceContext + ( const InputType &input , const TranslationOptionList &translationOptionList) const; FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h index 814818f63..3f4e6f85e 100644 --- a/moses/FF/StatefulFeatureFunction.h +++ b/moses/FF/StatefulFeatureFunction.h @@ -66,6 +66,23 @@ public: return false; } + + virtual void + EvaluateInIsolation + (Phrase const& source, TargetPhrase const& targetPhrase, + ScoreComponentCollection &scoreBreakdown, + ScoreComponentCollection &estimatedScores) const {} + + virtual void + EvaluateWithSourceContext + (InputType const&input, InputPath const& inputPath, TargetPhrase const& targetPhrase, + StackVec const* stackVec, ScoreComponentCollection &scoreBreakdown, + ScoreComponentCollection *estimatedFutureScore = NULL) const {} + + virtual void + EvaluateTranslationOptionListWithSourceContext + (const InputType &input, const TranslationOptionList &translationOptionList) const {} + }; diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h index 200eca060..eacd27656 100644 --- a/moses/FF/TargetBigramFeature.h +++ b/moses/FF/TargetBigramFeature.h @@ -48,22 +48,6 @@ public: ScoreComponentCollection* ) const { throw std::logic_error("TargetBigramFeature not valid in chart decoder"); } - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const { - } - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const { - } - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } void SetParameter(const std::string& key, const std::string& value); diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h index 5bc2fc953..0a4b4aa25 100644 --- a/moses/FF/TargetNgramFeature.h +++ b/moses/FF/TargetNgramFeature.h @@ -215,24 +215,6 @@ public: virtual FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const; - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const { - } - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const { - } - void SetParameter(const std::string& key, const std::string& value); private: diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h index 361e8cc6a..353328466 100644 --- a/moses/FF/TreeStructureFeature.h +++ b/moses/FF/TreeStructureFeature.h @@ -63,22 +63,6 @@ public: void SetParameter(const std::string& key, const std::string& value); - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const {}; - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const {}; - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - - FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, diff --git a/moses/LM/Base.cpp b/moses/LM/Base.cpp index 5fab4b547..9a05ef58e 100644 --- a/moses/LM/Base.cpp +++ b/moses/LM/Base.cpp @@ -69,10 +69,11 @@ void LanguageModel::ReportHistoryOrder(std::ostream &out,const Phrase &phrase) c // out << "ReportHistoryOrder not implemented"; } -void LanguageModel::EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const +void +LanguageModel:: +EvaluateInIsolation(Phrase const& source, TargetPhrase const& targetPhrase, + ScoreComponentCollection &scoreBreakdown, + ScoreComponentCollection &estimatedScores) const { // contains factors used by this LM float fullScore, nGramScore; diff --git a/moses/LM/Base.h b/moses/LM/Base.h index 7ea52e02e..c26c250ed 100644 --- a/moses/LM/Base.h +++ b/moses/LM/Base.h @@ -94,18 +94,6 @@ public: , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedScores) const; - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const { - } - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const { - } - }; } diff --git a/moses/LM/BilingualLM.cpp b/moses/LM/BilingualLM.cpp index a52ccc1f2..b8974bb5e 100644 --- a/moses/LM/BilingualLM.cpp +++ b/moses/LM/BilingualLM.cpp @@ -188,22 +188,6 @@ size_t BilingualLM::getState(const Hypothesis& cur_hypo) const return hashCode; } -void BilingualLM::EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const {} - -void BilingualLM::EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores) const -{ - -} - - FFState* BilingualLM::EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, diff --git a/moses/LM/BilingualLM.h b/moses/LM/BilingualLM.h index 5070c7cec..cb5075fd1 100644 --- a/moses/LM/BilingualLM.h +++ b/moses/LM/BilingualLM.h @@ -119,23 +119,6 @@ public: void Load(); - void EvaluateInIsolation( - const Phrase &source, - const TargetPhrase &targetPhrase, - ScoreComponentCollection &scoreBreakdown, - ScoreComponentCollection &estimatedScores) const; - - void EvaluateWithSourceContext( - const InputType &input, - const InputPath &inputPath, - const TargetPhrase &targetPhrase, - const StackVec *stackVec, - ScoreComponentCollection &scoreBreakdown, - ScoreComponentCollection *estimatedScores = NULL) const; - - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const {}; - FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, diff --git a/moses/LM/RDLM.h b/moses/LM/RDLM.h index 8e169310f..963c1e8d5 100644 --- a/moses/LM/RDLM.h +++ b/moses/LM/RDLM.h @@ -196,18 +196,7 @@ public: } void SetParameter(const std::string& key, const std::string& value); - void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const {}; - void EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedFutureScore = NULL) const {}; - void EvaluateTranslationOptionListWithSourceContext(const InputType &input - , const TranslationOptionList &translationOptionList) const {}; + FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, From c60d23ecb361bec5470d8286d2f789f94bdd601f Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Thu, 19 Nov 2015 07:56:53 -0700 Subject: [PATCH 15/32] updated regression testing to work correctly with moses server tests --- regression-testing/run-single-test.perl | 68 ++++++++++++++++--------- regtest | 2 +- run-regtests.sh | 11 ++-- 3 files changed, 53 insertions(+), 28 deletions(-) diff --git a/regression-testing/run-single-test.perl b/regression-testing/run-single-test.perl index 0d5b85f09..1ff8ddb12 100755 --- a/regression-testing/run-single-test.perl +++ b/regression-testing/run-single-test.perl @@ -3,7 +3,6 @@ # $Id$ use Encode; -use XMLRPC::Lite; use utf8; use warnings; use strict; @@ -12,6 +11,7 @@ use MosesRegressionTesting; use Getopt::Long; use File::Temp qw ( tempfile ); use POSIX qw ( strftime ); +use POSIX ":sys_wait_h"; my @SIGS = qw ( SIGHUP SIGINT SIGQUIT SIGILL SIGTRAP SIGABRT SIGIOT SIGBUS SIGFPE SIGKILL SIGUSR1 SIGSEGV SIGUSR2 SIGPIPE SIGALRM SIGTERM SIGSTKFLT SIGCHLD SIGCONT SIGSTOP SIGTSTP SIGTTIN SIGTTOU SIGURG SIGXCPU SIGXFSZ SIGVTALRM SIGPROF SIGWINCH SIGIO SIGPWR SIGSYS SIGUNUSED SIGRTMIN ); my ($decoder, $test_name); @@ -21,17 +21,30 @@ my $BIN_TEST = $script_dir; my $results_dir; my $NBEST = 0; my $run_server_test = 0; -my $serverport = 16531; +my $serverport = int(rand(9999)) + 10001; my $url = "http://localhost:$serverport/RPC2"; - +my $startupTest = 0; GetOptions("decoder=s" => \$decoder, "test=s" => \$test_name, "data-dir=s"=> \$data_dir, "test-dir=s"=> \$test_dir, "results-dir=s"=> \$results_dir, "server"=> \$run_server_test, + "startuptest"=> \$startupTest ) or exit 1; +if($run_server_test) +{ + eval { + require XMLRPC::Lite; + import XMLRPC::Lite; + }; + if ($@) { + die "Error: XMLRPC::Lite not installed, moses server regression tests will not be run. $@"; + } + exit(1) if($startupTest); +} + die "Please specify a decoder with --decoder\n" unless $decoder; die "Please specify a test to run with --test\n" unless $test_name; @@ -160,35 +173,42 @@ sub exec_moses_server { if (not defined $pid) { warn "resources not avilable to fork Moses server\n"; $ec = 1; # to generate error -# $sig = 'SIGABRT'; } elsif ($pid == 0) { setpgrp(0, 0); - warn "Starting Moses server...\n"; + warn "Starting Moses server on port $serverport ...\n"; ($o, $ec, $sig) = run_command("$decoder --server --server-port $serverport -f $conf -verbose 2 --server-log $results/run.stderr.server 2> $results/run.stderr "); exit; # this should not be reached unless the server fails to start } - else { - sleep 10; - my $proxy = XMLRPC::Lite->proxy($url); - open(TEXTIN, "$input") or die "Can not open the input file to translate with Moses server\n"; - binmode TEXTIN, ':utf8'; - open(TEXTOUT, ">$results/run.stdout"); - binmode TEXTOUT, ':utf8'; - while() - { - chop; - my $encoded = SOAP::Data->type(string => $_); - my %param = ("text" => $encoded); - my $result = $proxy->call("translate",\%param)->result; - print TEXTOUT $result->{'text'} . "\n"; - } - close(TEXTOUT); + while( 1==1 ) # wait until the server is listening for requests + { + sleep 5; + my $str = `grep "Listening on port $serverport" $results/run.stderr`; + last if($str =~ /Listening/); + } + my $proxy = XMLRPC::Lite->proxy($url); + warn "Opening file $input to write to $results\n"; + open(TEXTIN, "$input") or die "Can not open the input file to translate with Moses server\n"; + binmode TEXTIN, ':utf8'; + open(TEXTOUT, ">$results/run.stdout"); + binmode TEXTOUT, ':utf8'; + while() + { + chop; + my $encoded = SOAP::Data->type(string => $_); # NOTE: assuming properly encoded UTF-8 input: check tests before adding them! + my %param = ("text" => $encoded); + my $result = $proxy->call("translate",\%param)->result; + print TEXTOUT $result->{'text'} . "\n"; + } + close(TEXTIN); + close(TEXTOUT); + my $elapsed = time - $start_time; + print STDERR "Finished translating file $input\n"; + if(waitpid($pid, WNOHANG) <= 0) + { + warn "Killing process group $pid of the $decoder --server ... \n"; kill 9, -$pid; } - kill 9, -$pid; - waitpid $pid, 0; - my $elapsed = time - $start_time; return ($o, $elapsed, $ec, $sig); } diff --git a/regtest b/regtest index e07a00c97..f434f7e9b 160000 --- a/regtest +++ b/regtest @@ -1 +1 @@ -Subproject commit e07a00c9733e0fecb8433f1c9d5805d3f0b35c6f +Subproject commit f434f7e9b04057d82b4e5c55bd49962c8a2852be diff --git a/run-regtests.sh b/run-regtests.sh index 5b814e444..e4120ea26 100755 --- a/run-regtests.sh +++ b/run-regtests.sh @@ -7,11 +7,16 @@ set -e -o pipefail git submodule init git submodule update regtest -RECOMPILE=${RECOMPILE:-"-a"} +if [ "$RECOMPILE" == "NO" ] ; then + RECOMPILE= +else + RECOMPILE="-a" +fi # test compilation without xmlrpc-c # ./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --no-xmlrpc-c --with-regtest=./regtest $RECOMPILE -q $@ || exit $? # test compilation with xmlrpc-c -./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-regtest=./regtest $RECOMPILE -q $@ - +if [ ./regression-testing/run-single-test.perl --server --startuptest ] ; then + ./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-regtest=./regtest $RECOMPILE -q $@ +fi From 6c7e69996f5ac1be846d0c04f7cee6444ebfdbb5 Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Thu, 19 Nov 2015 08:03:12 -0700 Subject: [PATCH 16/32] updated the regression testing wrapper script --- regression-testing/run-single-test.perl | 2 +- run-regtests.sh | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/regression-testing/run-single-test.perl b/regression-testing/run-single-test.perl index 1ff8ddb12..c99165247 100755 --- a/regression-testing/run-single-test.perl +++ b/regression-testing/run-single-test.perl @@ -42,7 +42,7 @@ if($run_server_test) if ($@) { die "Error: XMLRPC::Lite not installed, moses server regression tests will not be run. $@"; } - exit(1) if($startupTest); + exit(0) if($startupTest); } die "Please specify a decoder with --decoder\n" unless $decoder; diff --git a/run-regtests.sh b/run-regtests.sh index e4120ea26..2cb73aa2d 100755 --- a/run-regtests.sh +++ b/run-regtests.sh @@ -4,8 +4,8 @@ set -e -o pipefail -git submodule init -git submodule update regtest +# git submodule init +# git submodule update regtest if [ "$RECOMPILE" == "NO" ] ; then RECOMPILE= @@ -14,9 +14,9 @@ else fi # test compilation without xmlrpc-c -# ./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --no-xmlrpc-c --with-regtest=./regtest $RECOMPILE -q $@ || exit $? +./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --no-xmlrpc-c --with-regtest=./regtest $RECOMPILE -q $@ || exit $? # test compilation with xmlrpc-c -if [ ./regression-testing/run-single-test.perl --server --startuptest ] ; then +if ./regression-testing/run-single-test.perl --server --startuptest ; then ./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-regtest=./regtest $RECOMPILE -q $@ fi From 66be9acc4e6da7ddc3878851f9c71b13b499d8cc Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Thu, 19 Nov 2015 08:12:31 -0700 Subject: [PATCH 17/32] restored submodule update in run-regtests.sh --- run-regtests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run-regtests.sh b/run-regtests.sh index 2cb73aa2d..a8a22d292 100755 --- a/run-regtests.sh +++ b/run-regtests.sh @@ -4,8 +4,8 @@ set -e -o pipefail -# git submodule init -# git submodule update regtest +git submodule init +git submodule update regtest if [ "$RECOMPILE" == "NO" ] ; then RECOMPILE= From 694c449b0d373c4d0ca591a984f72804d2acf305 Mon Sep 17 00:00:00 2001 From: Evgeny Matusov Date: Thu, 19 Nov 2015 08:56:20 -0700 Subject: [PATCH 18/32] final change to regression-test Jamfile to run server tests only when --with-xmlrpc-c option is present. --- regression-testing/Jamfile | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile index 2a1b58364..ec6626475 100644 --- a/regression-testing/Jamfile +++ b/regression-testing/Jamfile @@ -25,13 +25,18 @@ if $(with-regtest) { actions reg_test_decode { $(TOP)/regression-testing/run-single-test.perl --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<) } - actions reg_test_decode_server { + + if $(with-xmlrpc) { + actions reg_test_decode_server { $(TOP)/regression-testing/run-single-test.perl --server --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<) + } + reg_test phrase-server : [ glob $(test-dir)/phrase-server.* ] : ../moses-cmd//moses : @reg_test_decode_server ; } + reg_test phrase : [ glob $(test-dir)/phrase.* ] : ../moses-cmd//moses : @reg_test_decode ; - reg_test phrase-server : [ glob $(test-dir)/phrase-server.* ] : ../moses-cmd//moses : @reg_test_decode_server ; + reg_test chart : [ glob $(test-dir)/chart.* ] : ../moses-cmd//moses : @reg_test_decode ; - + actions reg_test_score { $(TOP)/regression-testing/run-test-scorer.perl --scorer=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<) } @@ -60,10 +65,5 @@ if $(with-regtest) { reg_test misc : [ glob $(test-dir)/misc.* : $(test-dir)/misc.mml* ] : ..//prefix-bin ..//prefix-lib : @reg_test_misc ; reg_test misc-mml : [ glob $(test-dir)/misc.mml* ] : $(TOP)/scripts/ems/support/mml-filter.py $(TOP)/scripts/ems/support/defaultconfig.py : @reg_test_misc ; - if $(with-xmlrpc) { - alias all : phrase phrase-server chart mert score extract extractrules misc misc-mml ; - } - else { - alias all : phrase chart mert score extract extractrules misc misc-mml ; - } + alias all : phrase chart mert score extract extractrules misc misc-mml ; } From bf209a35a305f77722251efa2d83364344e5cda0 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Fri, 20 Nov 2015 00:00:42 +0000 Subject: [PATCH 19/32] daily automatic beautifier --- defer/ExternalFeature.h | 2 +- moses/FF/SkeletonStatefulFF.cpp | 6 ++--- moses/FF/SkeletonStatefulFF.h | 38 +++++++++++++++--------------- moses/FF/StatefulFeatureFunction.h | 16 ++++++------- moses/LM/Base.cpp | 6 ++--- 5 files changed, 34 insertions(+), 34 deletions(-) diff --git a/defer/ExternalFeature.h b/defer/ExternalFeature.h index 3a284852d..3755bf4ff 100644 --- a/defer/ExternalFeature.h +++ b/defer/ExternalFeature.h @@ -52,7 +52,7 @@ public: void SetParameter(const std::string& key, const std::string& value); FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, - ScoreComponentCollection* accumulator) const; + ScoreComponentCollection* accumulator) const; FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp index 0af74aabd..2acaf2d2e 100644 --- a/moses/FF/SkeletonStatefulFF.cpp +++ b/moses/FF/SkeletonStatefulFF.cpp @@ -17,7 +17,7 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line) // An empty implementation of this function is provided by StatefulFeatureFunction. -// Unless you are actually implementing this, please remove it from your +// Unless you are actually implementing this, please remove it from your // implementation (and the declaration in the header file to reduce code clutter. void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase @@ -26,7 +26,7 @@ void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source {} // An empty implementation of this function is provided by StatefulFeatureFunction. -// Unless you are actually implementing this, please remove it from your +// Unless you are actually implementing this, please remove it from your // implementation (and the declaration in the header file to reduce code clutter. void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath @@ -37,7 +37,7 @@ void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input {} // An empty implementation of this function is provided by StatefulFeatureFunction. -// Unless you are actually implementing this, please remove it from your +// Unless you are actually implementing this, please remove it from your // implementation (and the declaration in the header file to reduce code clutter. void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext (const InputType &input, const TranslationOptionList &translationOptionList) const diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h index 196dcd27c..7544ddd30 100644 --- a/moses/FF/SkeletonStatefulFF.h +++ b/moses/FF/SkeletonStatefulFF.h @@ -38,32 +38,32 @@ public: } // An empty implementation of this function is provided by StatefulFeatureFunction. - // Unless you are actually implementing this, please remove this declaration here + // Unless you are actually implementing this, please remove this declaration here // and the empty skeleton implementation from the corresponding .cpp // file to reduce code clutter. - void + void EvaluateInIsolation(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedScores) const; - - // An empty implementation of this function is provided by StatefulFeatureFunction. - // Unless you are actually implementing this, please remove this declaration here - // and the empty skeleton implementation from the corresponding .cpp - // file to reduce code clutter. - void - EvaluateWithSourceContext(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , const StackVec *stackVec - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedScores = NULL) const; + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedScores) const; // An empty implementation of this function is provided by StatefulFeatureFunction. - // Unless you are actually implementing this, please remove this declaration here + // Unless you are actually implementing this, please remove this declaration here // and the empty skeleton implementation from the corresponding .cpp // file to reduce code clutter. - void + void + EvaluateWithSourceContext(const InputType &input + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , const StackVec *stackVec + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedScores = NULL) const; + + // An empty implementation of this function is provided by StatefulFeatureFunction. + // Unless you are actually implementing this, please remove this declaration here + // and the empty skeleton implementation from the corresponding .cpp + // file to reduce code clutter. + void EvaluateTranslationOptionListWithSourceContext ( const InputType &input , const TranslationOptionList &translationOptionList) const; diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h index 3f4e6f85e..ac9527108 100644 --- a/moses/FF/StatefulFeatureFunction.h +++ b/moses/FF/StatefulFeatureFunction.h @@ -66,20 +66,20 @@ public: return false; } - - virtual void + + virtual void EvaluateInIsolation - (Phrase const& source, TargetPhrase const& targetPhrase, - ScoreComponentCollection &scoreBreakdown, + (Phrase const& source, TargetPhrase const& targetPhrase, + ScoreComponentCollection &scoreBreakdown, ScoreComponentCollection &estimatedScores) const {} - virtual void + virtual void EvaluateWithSourceContext - (InputType const&input, InputPath const& inputPath, TargetPhrase const& targetPhrase, + (InputType const&input, InputPath const& inputPath, TargetPhrase const& targetPhrase, StackVec const* stackVec, ScoreComponentCollection &scoreBreakdown, ScoreComponentCollection *estimatedFutureScore = NULL) const {} - - virtual void + + virtual void EvaluateTranslationOptionListWithSourceContext (const InputType &input, const TranslationOptionList &translationOptionList) const {} diff --git a/moses/LM/Base.cpp b/moses/LM/Base.cpp index 9a05ef58e..4d4cd3c09 100644 --- a/moses/LM/Base.cpp +++ b/moses/LM/Base.cpp @@ -69,11 +69,11 @@ void LanguageModel::ReportHistoryOrder(std::ostream &out,const Phrase &phrase) c // out << "ReportHistoryOrder not implemented"; } -void +void LanguageModel:: EvaluateInIsolation(Phrase const& source, TargetPhrase const& targetPhrase, - ScoreComponentCollection &scoreBreakdown, - ScoreComponentCollection &estimatedScores) const + ScoreComponentCollection &scoreBreakdown, + ScoreComponentCollection &estimatedScores) const { // contains factors used by this LM float fullScore, nGramScore; From 3ce2f6a55dc41bd56b73b53f66110de9b3ebde0b Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 20 Nov 2015 13:54:18 +0000 Subject: [PATCH 20/32] Use absolute path for external dependencies. --- compile.sh | 3 ++- run-regtests.sh | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/compile.sh b/compile.sh index 043f47c30..01f86bc12 100755 --- a/compile.sh +++ b/compile.sh @@ -3,5 +3,6 @@ # you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake set -e -o pipefail -./bjam --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@ +opt=$(pwd)/opt +./bjam --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@ diff --git a/run-regtests.sh b/run-regtests.sh index 8b64eac22..15eb134a5 100755 --- a/run-regtests.sh +++ b/run-regtests.sh @@ -2,13 +2,14 @@ # this script assumes that all 3rd-party dependencies are installed under ./opt # you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake -set -e -o pipefail +set -e -o pipefail -x -git submodule init -git submodule update regtest +opt=$(pwd)/opt +# git submodule init +# git submodule update regtest # test compilation without xmlrpc-c -./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --no-xmlrpc-c --with-regtest=./regtest -a -q $@ || exit $? +./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --no-xmlrpc-c --with-regtest=./regtest -a -q $@ || exit $? # test compilation with xmlrpc-c -./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-regtest=./regtest -a -q $@ +./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-regtest=./regtest -a -q $@ From c6eca2ec87d9bb88e92bebf7957333ad9d369347 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 20 Nov 2015 13:55:20 +0000 Subject: [PATCH 21/32] Only run regtests for DALM when compiled with --with-dalm=... . --- regression-testing/Jamfile | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile index 3f07744c6..a6834b805 100644 --- a/regression-testing/Jamfile +++ b/regression-testing/Jamfile @@ -24,9 +24,13 @@ if $(with-regtest) { actions reg_test_decode { $(TOP)/regression-testing/run-single-test.perl --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<) } - reg_test phrase : [ glob $(test-dir)/phrase.* ] : ../moses-cmd//moses : @reg_test_decode ; - reg_test chart : [ glob $(test-dir)/chart.* ] : ../moses-cmd//moses : @reg_test_decode ; - + reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ; + reg_test chart : [ glob $(test-dir)/chart.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ; + if [ option.get "with-dalm" : : "yes" ] { + reg_test dalm : [ glob $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ; + } else { + alias dalm ; + } actions reg_test_score { $(TOP)/regression-testing/run-test-scorer.perl --scorer=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<) } @@ -55,5 +59,5 @@ if $(with-regtest) { reg_test misc : [ glob $(test-dir)/misc.* : $(test-dir)/misc.mml* ] : ..//prefix-bin ..//prefix-lib : @reg_test_misc ; reg_test misc-mml : [ glob $(test-dir)/misc.mml* ] : $(TOP)/scripts/ems/support/mml-filter.py $(TOP)/scripts/ems/support/defaultconfig.py : @reg_test_misc ; - alias all : phrase chart mert score extract extractrules misc misc-mml ; + alias all : phrase chart mert score extract extractrules misc misc-mml dalm ; } From f72252944f92fc7864add8024b6de9daca512ac9 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 20 Nov 2015 15:05:01 +0000 Subject: [PATCH 22/32] Bug fix in regression-testing/Jamfile. --- regression-testing/Jamfile | 1 + regtest | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile index 97e21927e..2452b27a5 100644 --- a/regression-testing/Jamfile +++ b/regression-testing/Jamfile @@ -31,6 +31,7 @@ if $(with-regtest) { $(TOP)/regression-testing/run-single-test.perl --server --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<) } reg_test phrase-server : [ glob $(test-dir)/phrase-server.* ] : ../moses-cmd//moses : @reg_test_decode_server ; + } reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ; reg_test chart : [ glob $(test-dir)/chart.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ; if [ option.get "with-dalm" : : "yes" ] { diff --git a/regtest b/regtest index f434f7e9b..37a595fd7 160000 --- a/regtest +++ b/regtest @@ -1 +1 @@ -Subproject commit f434f7e9b04057d82b4e5c55bd49962c8a2852be +Subproject commit 37a595fd7bf41226933c0fdb6fb792bdc877c3fd From 1fa81806c1354fa53316d69cf611edf9c3eb82da Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 20 Nov 2015 17:07:17 +0000 Subject: [PATCH 23/32] Work in progress. --- moses/TranslationModel/UG/check-coverage3.cc | 64 +++++++++++++++++++- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/moses/TranslationModel/UG/check-coverage3.cc b/moses/TranslationModel/UG/check-coverage3.cc index d41e10ef3..c4a83877f 100644 --- a/moses/TranslationModel/UG/check-coverage3.cc +++ b/moses/TranslationModel/UG/check-coverage3.cc @@ -37,7 +37,7 @@ basename(string const path, string const suffix) size_t k = path.size() - suffix.size(); cout << path << " " << suffix << endl; cout << path.substr(0,p) << " " << path.substr(k) << endl; - return path.substr(p, suffix == &path[k] ? k-p : path.size() - p); + return path.substr(p+1, suffix == &path[k] ? k-p-1 : path.size() - p); } int main(int argc, char* argv[]) @@ -47,6 +47,7 @@ int main(int argc, char* argv[]) string line; string ifile = argv[4]; string docname = basename(ifile, string(".") + argv[2] + ".gz"); + id_type docid = B->docname2docid(docname); boost::iostreams::filtering_istream in; ugdiss::open_input_stream(ifile,in); while(getline(in,line)) @@ -57,13 +58,70 @@ int main(int argc, char* argv[]) for (size_t i = 0; i < snt.size(); ++i) { bitext_t::iter m(B->I1.get()); - for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k) + for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k); + for (size_t num_occurrences = m.ca(); m.size(); m.up()) { + if (size_t(m.ca()) == num_occurrences) continue; + num_occurrences = m.ca(); SPTR zilch; BitextSampler s(B.get(), m, zilch, 1000, 1000, sapt::random_sampling); s(); - cout << m.size() << " " << s.stats()->trg.size() << endl; + if (s.stats()->trg.size() == 0) continue; + // if (s.stats()->indoc[docname] > 10) continue; + sapt::pstats::indoc_map_t::const_iterator d + = s.stats()->indoc.find(docid); + size_t indoccnt = d != s.stats()->indoc.end() ? d->second : 0; + cout << m.size() << " : " << m.str(B->V1.get()) << " (" + << s.stats()->trg.size() << " entries; " + << indoccnt << "/" << s.stats()->good + << " samples in domain)" << endl; + vector > ppairs; + PhrasePair::SortDescendingByJointCount sorter; + expand(m,*B,*s.stats(),ppairs,NULL); + sort(ppairs.begin(),ppairs.end(),sorter); + boost::format fmt("%4d/%d/%d |%s| (%4.2f : %4.2f)"); + BOOST_FOREACH(PhrasePair& ppair, ppairs) + { + if (ppair.joint * 100 < ppair.good1) break; + ppair.good2 = ppair.raw2 * float(ppair.good1)/ppair.raw1; + ppair.good2 = max(ppair.good2, ppair.joint); + +#if 0 + cout << "\t" + << (fmt % ppair.joint % ppair.good1 % ppair.good2 + % B->T2->pid2str(B->V2.get(),ppair.p2) + % (float(ppair.joint)/ppair.good1) + % (float(ppair.joint)/ppair.good2) + ) << "\n"; + typedef std::map::const_iterator iter; + for (iter d = ppair.indoc.begin(); d != ppair.indoc.end(); ++d) + { + // if (d != ppair.indoc.begin()) cout << "; "; + cout << (boost::format("\t\t%4d %s") % d->second + % B->docid2name(d->first)) + << endl; + } + cout << endl; +#else + cout << "\t" + << (fmt % ppair.joint % ppair.good1 % ppair.good2 + % B->T2->pid2str(B->V2.get(),ppair.p2) + % (float(ppair.joint)/ppair.good1) + % (float(ppair.joint)/ppair.good2) + ) << " ["; + typedef std::map::const_iterator iter; + for (iter d = ppair.indoc.begin(); d != ppair.indoc.end(); ++d) + { + if (d != ppair.indoc.begin()) cout << "; "; + cout << (boost::format("%s: %d") % B->docid2name(d->first) + % d->second) ; + } + cout << "]" << endl; + +#endif + + } } } } From a76760880465c13893ed670f4ec43ba6208f24e0 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 20 Nov 2015 17:09:18 +0000 Subject: [PATCH 24/32] Bug fix in keeping track of doc ids with uniform random sampling. --- moses/TranslationModel/UG/mm/ug_bitext_sampler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moses/TranslationModel/UG/mm/ug_bitext_sampler.h b/moses/TranslationModel/UG/mm/ug_bitext_sampler.h index a36e0772f..3ecfaac3d 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_sampler.h +++ b/moses/TranslationModel/UG/mm/ug_bitext_sampler.h @@ -275,7 +275,7 @@ consider_sample(TokenPosition const& p) bitvector full_aln(100*100); PhraseExtractionRecord rec(p.sid, p.offset, p.offset + m_plen, !m_fwd, &aln, &full_aln); - int docid = m_bias ? m_bias->GetClass(p.sid) : -1; + int docid = m_bias ? m_bias->GetClass(p.sid) : m_bitext->sid2did(p.sid); if (!m_bitext->find_trg_phr_bounds(rec)) { // no good, probably because phrase is not coherent m_stats->count_sample(docid, 0, rec.po_fwd, rec.po_bwd); From 3f09aa40aa8daa00632c9ae96ecec42da15a39a9 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 20 Nov 2015 17:10:47 +0000 Subject: [PATCH 25/32] Additional functions for determining doc ids and document names from mapped bitexts. --- moses/TranslationModel/UG/mm/ug_bitext.h | 42 ++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/moses/TranslationModel/UG/mm/ug_bitext.h b/moses/TranslationModel/UG/mm/ug_bitext.h index aeb8703dc..4094dc9cc 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext.h +++ b/moses/TranslationModel/UG/mm/ug_bitext.h @@ -217,17 +217,42 @@ namespace sapt write_yawat_alignment ( id_type const sid, iter const* m1, iter const* m2, std::ostream& out ) const; - std::string docname(id_type const sid) const; - + std::string sid2docname(id_type const sid) const; + std::string docid2name(id_type const sid) const; + int docname2docid(std::string const& name) const; + std::vector const* sid2did() const; + int sid2did(uint32_t sid) const; }; #include "ug_bitext_agenda.h" + template + int + Bitext:: + docname2docid(std::string const& name) const + { + std::map::const_iterator m; + m = m_docname2docid.find(name); + if (m != m_docname2docid.end()) return m->second; + return -1; + } + template std::string Bitext:: - docname(id_type const sid) const + docid2name(id_type const did) const + { + if (did < m_docname.size()) + return m_docname[did]; + else + return (boost::format("%d") % did).str(); + } + + template + std::string + Bitext:: + sid2docname(id_type const sid) const { if (sid < m_sid2docid->size() && (*m_sid2docid)[sid] < m_docname.size()) return m_docname[(*m_sid2docid)[sid]]; @@ -243,6 +268,17 @@ namespace sapt return m_sid2docid.get(); } + template + int + Bitext:: + sid2did(uint32_t sid) const + { + if (m_sid2docid) + return m_sid2docid->at(sid); + return -1; + } + + template SPTR Bitext:: From 87ec9f56c74d16e911dbaa3274f9957f244a61f2 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Mon, 23 Nov 2015 12:37:11 +0000 Subject: [PATCH 26/32] bugfix: options refactor broke (l)mbr --- moses/parameters/AllOptions.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/moses/parameters/AllOptions.cpp b/moses/parameters/AllOptions.cpp index cfb925dfb..a7acdadfa 100644 --- a/moses/parameters/AllOptions.cpp +++ b/moses/parameters/AllOptions.cpp @@ -65,6 +65,7 @@ namespace Moses // set m_nbest_options.enabled = true if necessary: nbest.enabled = (nbest.enabled || mira || search.consensus || nbest.nbest_size > 0 + || mbr.enabled || lmbr.enabled || !output.SearchGraph.empty() || !output.SearchGraphExtended.empty() || !output.SearchGraphSLF.empty() From 9a7356bbed19062ff51b794c269e811328bb9feb Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Mon, 23 Nov 2015 16:57:35 +0000 Subject: [PATCH 27/32] Adaptation to renamed functions in Bitext class. --- moses/TranslationModel/UG/bitext-find.cc | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/moses/TranslationModel/UG/bitext-find.cc b/moses/TranslationModel/UG/bitext-find.cc index d02ce0710..4c6e13208 100644 --- a/moses/TranslationModel/UG/bitext-find.cc +++ b/moses/TranslationModel/UG/bitext-find.cc @@ -53,16 +53,16 @@ int main(int argc, char* argv[]) interpret_args(argc, argv); if (Q1.empty() && Q2.empty()) exit(0); - mmbitext B; string w; - B.open(bname, L1, L2); + boost::shared_ptr B(new mmbitext); string w; + B->open(bname, L1, L2); - Bitext::iter m1(B.I1.get(), *B.V1, Q1); + Bitext::iter m1(B->I1.get(), *B->V1, Q1); if (Q1.size() && m1.size() == 0) exit(0); - Bitext::iter m2(B.I2.get(), *B.V2, Q2); + Bitext::iter m2(B->I2.get(), *B->V2, Q2); if (Q2.size() && m2.size() == 0) exit(0); - bitvector check(B.T1->size()); + bitvector check(B->T1->size()); if (Q1.size() == 0 || Q2.size() == 0) check.set(); else (m2.markSentences(check)); @@ -87,23 +87,23 @@ int main(int argc, char* argv[]) size_t s1,s2,e1,e2; int po_fwd=-1,po_bwd=-1; std::vector caln; - // cout << sid << " " << B.docname(sid) << std::endl; - if (!B.find_trg_phr_bounds(sid, off, off+m.size(), + // cout << sid << " " << B->docname(sid) << std::endl; + if (!B->find_trg_phr_bounds(sid, off, off+m.size(), s1,s2,e1,e2,po_fwd,po_bwd, &caln, NULL, &m == &m2)) { // cout << "alignment failure" << std::endl; } - std::cout << sid << " " << B.docname(sid) + std::cout << sid << " " << B->sid2docname(sid) << " dfwd=" << po_fwd << " dbwd=" << po_bwd << "\n"; - write_sentence(*B.T1, sid, *B.V1, std::cout); std::cout << "\n"; - write_sentence(*B.T2, sid, *B.V2, std::cout); std::cout << "\n"; - B.write_yawat_alignment(sid, - m1.size() ? &m1 : NULL, - m2.size() ? &m2 : NULL, std::cout); + write_sentence(*B->T1, sid, *B->V1, std::cout); std::cout << "\n"; + write_sentence(*B->T2, sid, *B->V2, std::cout); std::cout << "\n"; + B->write_yawat_alignment(sid, + m1.size() ? &m1 : NULL, + m2.size() ? &m2 : NULL, std::cout); std::cout << std::endl; } From 6f1e39d64e60b86477ea91b9f2b8dd7163f1a860 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Mon, 23 Nov 2015 16:57:44 +0000 Subject: [PATCH 28/32] Adaptation to renamed functions in Bitext class. --- moses/TranslationModel/UG/check-coverage.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/moses/TranslationModel/UG/check-coverage.cc b/moses/TranslationModel/UG/check-coverage.cc index 28d3e8968..3cf954912 100644 --- a/moses/TranslationModel/UG/check-coverage.cc +++ b/moses/TranslationModel/UG/check-coverage.cc @@ -41,8 +41,8 @@ basename(string const path, string const suffix) int main(int argc, char* argv[]) { - bitext_t B; - B.open(argv[1],argv[2],argv[3]); + boost::shared_ptr B(new bitext_t); + B->open(argv[1],argv[2],argv[3]); string line; string ifile = argv[4]; string docname = basename(ifile, string(".") + argv[2] + ".gz"); @@ -52,10 +52,10 @@ int main(int argc, char* argv[]) { cout << line << " [" << docname << "]" << endl; vector snt; - B.V1->fillIdSeq(line,snt); + B->V1->fillIdSeq(line,snt); for (size_t i = 0; i < snt.size(); ++i) { - bitext_t::iter m(B.I1.get()); + bitext_t::iter m(B->I1.get()); for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k) { if (m.ca() > 500) continue; @@ -65,9 +65,10 @@ int main(int argc, char* argv[]) while (I.next != stop) { m.root->readEntry(I.next,I); - ++cnt[B.docname(I.sid)]; + ++cnt[B->sid2docname(I.sid)]; } - cout << setw(8) << int(m.ca()) << " " << B.V1->toString(&snt[i],&snt[k+1]) << endl; + cout << setw(8) << int(m.ca()) << " " + << B->V1->toString(&snt[i],&snt[k+1]) << endl; typedef pair entry; vector ranked; ranked.reserve(cnt.size()); BOOST_FOREACH(entry const& e, cnt) ranked.push_back(e); From a074efc7cc64205c7752fe8edd59c6e0fd2bb203 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Mon, 23 Nov 2015 16:59:39 +0000 Subject: [PATCH 29/32] Change parameters only when specified during parameter updates in server mode. --- moses/parameters/NBestOptions.cpp | 2 +- moses/parameters/OptionsBaseClass.cpp | 8 ++++++-- moses/parameters/OptionsBaseClass.h | 2 +- moses/parameters/ReportingOptions.cpp | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/moses/parameters/NBestOptions.cpp b/moses/parameters/NBestOptions.cpp index e916c3437..c65f9e852 100644 --- a/moses/parameters/NBestOptions.cpp +++ b/moses/parameters/NBestOptions.cpp @@ -43,7 +43,7 @@ update(std::mapconst& param) params_t::const_iterator si = param.find("nbest"); if (si != param.end()) nbest_size = xmlrpc_c::value_int(si->second); - only_distinct = check(param, "nbest-distinct"); + only_distinct = check(param, "nbest-distinct", only_distinct); enabled = (nbest_size > 0); return true; } diff --git a/moses/parameters/OptionsBaseClass.cpp b/moses/parameters/OptionsBaseClass.cpp index 148aa5d24..a19aaf7a6 100644 --- a/moses/parameters/OptionsBaseClass.cpp +++ b/moses/parameters/OptionsBaseClass.cpp @@ -1,5 +1,7 @@ // -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- #include "OptionsBaseClass.h" +#include "moses/Util.h" + namespace Moses { #ifdef HAVE_XMLRPC_C @@ -15,10 +17,12 @@ namespace Moses { bool OptionsBaseClass:: check(std::map const& param, - std::string const key) + std::string const key, bool dfltval) { std::map::const_iterator m; - return (param.find(key) != param.end()); + m = param.find(key); + if (m == param.end()) return dfltval; + return Scan(xmlrpc_c::value_string(m->second)); } #endif } diff --git a/moses/parameters/OptionsBaseClass.h b/moses/parameters/OptionsBaseClass.h index 71f9fa77a..ddec48192 100644 --- a/moses/parameters/OptionsBaseClass.h +++ b/moses/parameters/OptionsBaseClass.h @@ -13,6 +13,6 @@ namespace Moses #endif bool check(std::map const& param, - std::string const key); + std::string const key, bool dfltval); }; } diff --git a/moses/parameters/ReportingOptions.cpp b/moses/parameters/ReportingOptions.cpp index 640b6a177..10a0cfcb4 100644 --- a/moses/parameters/ReportingOptions.cpp +++ b/moses/parameters/ReportingOptions.cpp @@ -75,7 +75,7 @@ namespace Moses { ReportingOptions:: update(std::mapconst& param) { - ReportAllFactors = check(param, "report-all-factors"); + ReportAllFactors = check(param, "report-all-factors", ReportAllFactors); return true; } #endif From 94cd1f7433d07cd1bf1768faf48664692fff2a4a Mon Sep 17 00:00:00 2001 From: Philipp Koehn Date: Mon, 23 Nov 2015 18:12:56 -0500 Subject: [PATCH 30/32] when building mmsapt phrase table, also use mmsapt reordering table --- scripts/ems/experiment.meta | 2 ++ scripts/ems/experiment.perl | 2 +- scripts/training/train-model.perl | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta index 29719d878..526683363 100644 --- a/scripts/ems/experiment.meta +++ b/scripts/ems/experiment.meta @@ -729,12 +729,14 @@ extract-phrases in: corpus-mml-postfilter=OR=word-alignment scored-corpus out: extracted-phrases rerun-on-change: max-phrase-length translation-factors reordering-factors hierarchical-rule-set extract-settings training-options script use-ghkm domain-features baseline-extract lexicalized-reordering + pass-if: mmsapt only-existence-matters: domain-features default-name: model/extract build-reordering in: extracted-phrases out: reordering-table ignore-unless: lexicalized-reordering + pass-if: mmsapt rerun-on-change: lexicalized-reordering reordering-factors default-name: model/reordering-table final-model: yes diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl index d2dd281e8..fe8a3c9ab 100755 --- a/scripts/ems/experiment.perl +++ b/scripts/ems/experiment.perl @@ -2571,7 +2571,7 @@ sub get_config_tables { $cmd .= ":$numFF" if defined($numFF); $cmd .= " "; - $cmd .= &get_table_name_settings("reordering-factors","reordering-table",$reordering_table) if $reordering_table; + $cmd .= &get_table_name_settings("reordering-factors","reordering-table",$reordering_table) if $reordering_table && !defined($mmsapt); $cmd .= &get_table_name_settings("generation-factors","generation-table",$generation_table) if $generation_table; $cmd .= "-config $config "; diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl index d87099e7e..3a71ea43d 100755 --- a/scripts/training/train-model.perl +++ b/scripts/training/train-model.perl @@ -2141,7 +2141,8 @@ sub create_ini { # sum up... $feature_spec .= "$phrase_table_impl_name name=TranslationModel$i num-features=$basic_weight_count path=$file input-factor=$input_factor output-factor=$output_factor"; - $feature_spec .= " L1=$___F L2=$___E " if defined($_MMSAPT); # extra settings for memory mapped suffix array phrase table + $feature_spec .= " L1=$___F L2=$___E" if defined($_MMSAPT); # extra settings for memory mapped suffix array phrase table + $feature_spec .= " lr-func=LexicalReordering0" if defined($_MMSAPT) && $i==0 && $REORDERING_LEXICAL; $feature_spec .= "\n"; unless ($phrase_table_impl==11) { # suffix array provides its weights at first iteration $weight_spec .= "TranslationModel$i="; @@ -2222,7 +2223,7 @@ sub create_ini { $table_file .= "."; $table_file .= $model->{"filename"}; $table_file .= ".gz"; - $feature_spec .= "LexicalReordering name=LexicalReordering$i num-features=".$model->{"numfeatures"}." type=".$model->{"config"}." input-factor=$input_factor output-factor=$output_factor path=$table_file".(defined($_LEXICAL_REORDERING_DEFAULT_SCORES)?" default-scores=$_LEXICAL_REORDERING_DEFAULT_SCORES":"")."\n"; + $feature_spec .= "LexicalReordering name=LexicalReordering$i num-features=".$model->{"numfeatures"}." type=".$model->{"config"}." input-factor=$input_factor output-factor=$output_factor".((defined($_MMSAPT)&&$i==0)?"":" path=$table_file").(defined($_LEXICAL_REORDERING_DEFAULT_SCORES)?" default-scores=$_LEXICAL_REORDERING_DEFAULT_SCORES":"")."\n"; $weight_spec .= "LexicalReordering$i="; for(my $j=0;$j<$model->{"numfeatures"};$j++) { $weight_spec .= " 0.3"; } $weight_spec .= "\n"; From 710915c088ae8277cfffbbf76e2ecef2e520e93f Mon Sep 17 00:00:00 2001 From: Jeroen Vermeulen Date: Tue, 24 Nov 2015 14:37:18 +0100 Subject: [PATCH 31/32] Python implementation of parallel scoring. Re-implementation of score-parallel.perl. Not a drop-in replacement; the command line is similar but different and uses the standard Python command-line parser. Written without much knowledge of the original script, so documentation in particular may seem nonsensical to experts. If you see something wrong, please help! --- scripts/generic/score_parallel.py | 775 ++++++++++++++++++++++++++++++ 1 file changed, 775 insertions(+) create mode 100755 scripts/generic/score_parallel.py diff --git a/scripts/generic/score_parallel.py b/scripts/generic/score_parallel.py new file mode 100755 index 000000000..b4e4815e9 --- /dev/null +++ b/scripts/generic/score_parallel.py @@ -0,0 +1,775 @@ +#! /usr/bin/env python +# +# This file is part of moses. Its use is licensed under the GNU Lesser General +# Public License version 2.1 or, at your option, any later version. +# +# Script contributed by Precision Translation Tools. + +"""Run Moses `score` jobs in parallel. + +This script is a replacement for `score-parallel.perl`. The two are similar, +but there are differences in usage. In addition, this script can be called +directly from Python code without the need to run it as a separate process. +""" + +from __future__ import ( + absolute_import, + print_function, + unicode_literals, + ) + +__metaclass__ = type + +from argparse import ArgumentParser +from contextlib import contextmanager +from datetime import datetime +import errno +import gzip +from multiprocessing import Pool +import os +import os.path +import pipes +from shutil import rmtree +from subprocess import check_call +import sys +import tempfile + + +def get_unicode_type(): + """Return the Unicode string type appropriate to this Python version.""" + if sys.version_info.major <= 2: + # Unicode string type. In Python 2 this is the "unicode" type, + # while "str" is a binary string type. + return unicode + else: + # Unicode string type. In Python 3 this is the default "str" type. + # The binary string type is now called "bytes". + return str + + +UNICODE_TYPE = get_unicode_type() + + +class CommandLineError(Exception): + """Invalid command line.""" + + +class ProgramFailure(Exception): + """Failure, not a bug, which is reported neatly to the user.""" + + +def parse_args(): + """Parse command line arguments, return as `Namespace`.""" + parser = ArgumentParser(description=__doc__) + parser.add_argument( + '--extract-file', '-e', metavar='PATH', required=True, + help=( + "Path to input file: extract file (e.g. 'extract.sorted.gz' or " + "'extract.inv.sorted.gz'). Required.")) + parser.add_argument( + '--lex-file', '-l', metavar='PATH', required=True, + help=( + "Path to input file: lex file (e.g. 'lex.f2e' or 'lex.e2f'). " + "Required.")) + parser.add_argument( + '--output', '-o', metavar='PATH', required=True, + help=( + "Write phrase table to file PATH (e.g. 'phrase-table.half.f2e' " + "or 'phrase-table.half.e2f'). Required.")) + parser.add_argument( + '--inverse', '-i', action='store_true', + help="Inverse scoring. Defaults to direct scoring.") + parser.add_argument( + '--labels-file', '-L', metavar='PATH', + help="Also write source labels to file PATH.") + parser.add_argument( + '--parts-of-speech', '-p', metavar='PATH', + help="Also write parts-of-speech file to PATH.") + parser.add_argument( + '--flexibility-score', '-F', metavar='PATH', + help="Path to the 'flexibility_score.py' script. Defaults to none.") + parser.add_argument( + '--hierarchical', '-H', action='store_true', + help="Process hierarchical rules.") + parser.add_argument( + '--args', '-a', metavar='ARGUMENTS', + help="Additional arguments for `score` and `flexibility_score`.") + parser.add_argument( + '--sort', '-s', action='store_true', + help="Sort output file.") + parser.add_argument( + '--jobs', '-j', metavar='N', type=int, default=1, + help="Run up to N jobs in parallel. Defaults to %(default)s.") + parser.add_argument( + '--score-exe', '-x', metavar='PROGRAM', + help="Name of, or path to, the 'score' executable.") + parser.add_argument( + '--sort-command', '-S', metavar='COMMAND-LINE', + help=( + "Command line for sorting text files to standard output. " + "Must support operation as a pipe, as well as input files named " + "as command-line arguments.")) + parser.add_argument( + '--gzip-command', '-z', metavar='PROGRAM', + help="Path to a gzip or pigz executable.") + parser.add_argument( + '--verbose', '-v', action='store_true', + help="Print what's going on.") + parser.add_argument( + '--debug', '-d', action='store_true', + help="Don't delete temporary directories when done.") + return parser.parse_args() + + +def normalize_path(optional_path=None): + """Return a cleaned-up version of a given filesystem path, or None. + + Converts the path to the operating system's native conventions, and + removes redundancies like `.`. + + The return value will be `None`, an absolute path, or a relative path, + same as the argument. But it will have redundant path separators, + unnecessary detours through parent directories, and use of the current + directory "." removed. + """ + if optional_path is None: + return None + else: + path = os.path.normpath(optional_path) + path = path.replace('/', os.path.sep) + path = path.replace('\\', os.path.sep) + return path + + +def quote(path): + """Quote and escape a filename for use in a shell command. + + The Windows implementation is very limited and will break on anything + more advanced than a space. + """ + if os.name == 'posix': + return pipes.quote(path) + else: + # TODO: Improve escaping for Windows. + return '"%s"' % path + + +def sanitize_args(args): + """Check `args` for sanity, clean up, and set nontrivial defaults.""" + if args.jobs < 1: + raise CommandLineError("Number of parallel jobs must be 1 or more.") + if args.sort_command is None: + args.sort_command = find_first_executable( + ['neandersort', 'gsort', 'sort']) + if args.sort_command is None: + raise CommandLineError( + "No 'sort' command is available. " + "Choose one using the --sort-command option.") + if args.gzip_command is None: + args.gzip_command = find_first_executable(['pigz', 'gzip']) + if args.gzip_command is None: + raise CommandLineError( + "No 'gzip' or 'pigz' command is available. " + "Choose one using the --gzip-command option.") + if args.score_exe is None: + # Look for "score" executable. It may be in the current project + # directory somewhere, or in the PATH. + moses_dir = os.path.dirname(os.path.dirname( + os.path.abspath(__file__))) + args.score_exe = find_first_executable( + ['score'], + [ + moses_dir, + os.path.join(moses_dir, 'phrase-extract'), + os.path.join(moses_dir, 'binaries'), + ]) + args.extract_file = normalize_path(args.extract_file) + args.lex_file = normalize_path(args.lex_file) + args.output = normalize_path(args.output) + args.labels_file = normalize_path(args.labels_file) + args.parts_of_speech = normalize_path(args.parts_of_speech) + args.flexibility_score = normalize_path(args.flexibility_score) + args.score_exe = normalize_path(args.score_exe) + + +def add_exe_suffix(program): + """Return the full filename for an executable. + + On Windows, this adds a `.exe` suffix to the name. On other + systems, it returns the original name unchanged. + """ + if os.name == 'nt': + # Windows. + return program + '.exe' + else: + # Assume POSIX or similar. + return program + + +def find_executable(exe, extra_path=None): + """Return full path to an executable of the given name, or `None`. + + If the given name is a qualified path to an executable, it will be returned + unchanged. A qualified path where no executable is found results in a + `CommandLineError`. + """ + if extra_path is None: + extra_path = [] + + if os.path.sep in exe: + # The executable name includes a path. Only one place it can be. + if not os.path.isfile(exe) or not os.access(exe, os.X_OK): + raise CommandLineError("Not an executable: '%s'." % exe) + return exe + + for path in extra_path + os.getenv('PATH').split(os.pathsep): + full_path = os.path.join(path, exe) + if os.access(full_path, os.X_OK): + return full_path + return None + + +def find_first_executable(candidates, extra_path=None): + """Find the first available of the given candidate programs. + + :raise ProgramFailure: If none of `candidates` was found. + """ + for program in candidates: + executable = find_executable(add_exe_suffix(program), extra_path) + if executable is not None: + return executable + raise ProgramFailure( + "Could not find any of these executables in path: %s." + % ', '.join(candidates)) + + +def execute_shell(command, verbose=False): + """Run `command` string through the shell. + + Inherits environment, but sets `LC_ALL` to `C` for predictable results, + especially from sort commands. + + This uses a full-featured shell, including pipes, substitution, etc. So + remember to quote/escape arguments where appropriate! + """ + assert isinstance(command, UNICODE_TYPE), ( + "Wrong argument for execute_shell.") + if verbose: + print("Executing: %s" % command) + env = os.environ.copy() + if os.name == 'posix': + env['LC_ALL'] = 'C' + check_call(command, shell=True, env=env) + + +@contextmanager +def tempdir(keep=False): + """Context manager: temporary directory.""" + directory = tempfile.mkdtemp() + yield directory + if not keep: + rmtree(directory) + + +def make_dirs(path): + """Equivalent to `mkdir -p -- path`.""" + try: + os.makedirs(path) + except OSError as error: + if error.errno != errno.EEXIST: + raise + + +def open_file(path, mode='r'): + """Open a file, which may be gzip-compressed.""" + if path.endswith('.gz'): + return gzip.open(path, mode) + else: + return open(path, mode) + + +def count_lines(filename): + """Count the number of lines in `filename` (may be gzip-compressed).""" + count = 0 + with open_file(filename) as stream: + for _ in stream: + count += 1 + return count + + +def set_temp_dir(): + """Set temporary directory to `$MOSES_TEMP_DIR`, if set. + + Create the directory if necessary. + """ + temp_dir = os.getenv('MOSES_TEMP_DIR') + if temp_dir is not None: + make_dirs(temp_dir) + tempfile.tempdir = temp_dir + + +def strip_newline(line): + """Remove trailing carriage return and/or line feed, if present.""" + if line.endswith('\n'): + line = line[:-1] + if line.endswith('\r'): + line = line[:-1] + return line + + +def open_chunk_file(split_dir, chunk_number): + """Open a file to write one chunk of the extract file.""" + return open_file( + os.path.join(split_dir, 'extract.%d.gz' % chunk_number), 'w') + + +def name_context_chunk_file(split_dir, chunk_number): + """Compose file name for one chunk of the extract context file.""" + return os.path.join( + split_dir, 'extract.context.%d.gz' % chunk_number) + + +def extract_source_phrase(line): + """Extract the source phrase from an extract-file line.""" + return line.split(b'|||', 1)[0] + + +def cut_context_file(last_source_phrase, chunk_file, last_line, + context_stream): + """Write one chunk of extract context file into its own file. + + :param last_source_phrase: Last source phrase that should be in the + chunk. Stop processing after this source phrase. + :param chunk_file: Path to the extract context file for this chunk. + :param last_line: Previously read line that may still need writing. + :param context_stream: Extract context file, opened for reading. + :return: Last line read from `context_stream`. This line will still + need processing. + """ + # TODO: Use open_file. + with gzip.open(chunk_file, 'w') as chunk: + if last_line is not None: + chunk.write('%s\n' % last_line) + + # Are we processing our last source phrase yet? + on_last_source_phrase = False + + # Write all lines in context file until we meet last source phrase + # in extract file. + for line in context_stream: + # Reading from a gzip file returns lines *including the newline*. + # Either way, we want to ignore carriage returns as well. + line = strip_newline(line) + source_phrase = extract_source_phrase(line) + if on_last_source_phrase and source_phrase != last_source_phrase: + # First new source phrase after our last one. We're done. + return line + else: + # Still adding lines to our chunk. + chunk.write('%s\n' % line) + if source_phrase == last_source_phrase: + # We're on our last source phrase now. + on_last_source_phrase = True + + +def split_extract_files(split_dir, extract_file, extract_context_file=None, + jobs=1): + """Split extract file into chunks, so we can process them in parallel. + + :param split_dir: A temporary directory where this function can write + temporary files. The caller must ensure that this directory will be + cleaned up after it's done with the files. + :return: An iterable of tuples. Each tuple hols a partial extract file, + and the corresponding context file. The files may be in `split_dir`, + or there may just be the original extract file. + """ + if jobs == 1: + # No splitting needed. Read the original file(s). + return [(extract_file, extract_context_file)] + + # Otherwise: split files. + files = [] + num_lines = count_lines(extract_file) + chunk_size = (num_lines + jobs - 1) / jobs + assert isinstance(chunk_size, int) + + line_count = 0 + chunk_number = 0 + prev_source_phrase = None + last_line_context = None + extract_stream = open_file(extract_file) + chunk_file = open_chunk_file(split_dir, chunk_number) + if extract_context_file is None: + chunk_context_file = None + if extract_context_file is not None: + context_stream = open_file(extract_context_file) + + for line in extract_stream: + line_count += 1 + line = line.decode('utf-8') + line = strip_newline(line) + if line_count >= chunk_size: + # At or over chunk size. Cut off at next source phrase change. + source_phrase = extract_source_phrase(line) + if prev_source_phrase is None: + # Start looking for a different source phrase. + prev_source_phrase = source_phrase + elif source_phrase == prev_source_phrase: + # Can't cut yet. Still working on the same source phrase. + pass + else: + # Hit first new source phrase after chunk limit. Cut new + # file(s). + chunk_file.close() + if extract_context_file is not None: + chunk_context_file = name_context_chunk_file( + split_dir, chunk_number) + last_line_context = cut_context_file( + prev_source_phrase, chunk_context_file, + last_line_context, context_stream) + files.append((chunk_file.name, chunk_context_file)) + + # Start on new chunk. + prev_source_phrase = None + line_count = 0 + chunk_number += 1 + chunk_file = open_chunk_file(split_dir, chunk_number) + chunk_file.write(('%s\n' % line).encode('utf-8')) + + chunk_file.close() + if extract_context_file is not None: + chunk_context_file = name_context_chunk_file(split_dir, chunk_number) + last_line_context = cut_context_file( + prev_source_phrase, chunk_number, last_line_context, + context_stream) + files.append((chunk_file.name, chunk_context_file)) + return files + + +def compose_score_command(extract_file, context_file, half_file, + flex_half_file, args): + """Compose command line text to run one instance of `score`. + + :param extract_file: One chunk of extract file. + :param context_file: If doing flexibility scoring, one chunk of + extract context file. Otherwise, None. + :param half_file: ??? + :param flex_half_file: ??? + :param args: Arguments namespace. + """ + command = [ + args.score_exe, + extract_file, + args.lex_file, + half_file, + ] + if args.args not in (None, ''): + command.append(args.args) + other_args = build_score_args(args) + if other_args != '': + command.append(other_args) + if context_file is not None: + command += [ + '&&', + find_first_executable(['bzcat']), + '|', + quote(args.flexibility_score), + quote(context_file), + ] + if args.inverse: + command.append('--Inverse') + if args.hierarchical: + command.append('--Hierarchical') + command += [ + '|', + quote(args.gzip_command), + '-c', + '>%s' % quote(flex_half_file), + ] + return ' '.join(command) + + +def score_parallel(split_dir, file_pairs, args): + """Run the `score` command in parallel. + + :param split_dir: Temporary directory where we can create split files. + :param file_pairs: Sequence of tuples for the input files, one tuple + per chunk of the work. Each tuple consists of a partial extract + file, and optionally a partial extract context file. + :param args: Arguments namespace. + :return: A list of tuples. Each tuple contains two file paths. The first + is for a partial half-phrase-table file. The second is for the + corresponding partial flex file, if a context file is given; or + `None` otherwise. + """ + partial_files = [] + # Pool of worker processes for executing the partial "score" invocations + # concurrently. + pool = Pool(args.jobs) + try: + for chunk_num, file_pair in enumerate(file_pairs): + half_file = os.path.join( + split_dir, 'phrase-table.half.%06d.gz' % chunk_num) + extract_file, context_file = file_pair + if context_file is None: + flex_half_file = None + else: + flex_half_file = os.path.join( + split_dir, 'phrase-table.half.%06d.flex.gz' % chunk_num) + # Pickling of arguments for the pool is awkward on Windows, so + # keep them simple. Compose the command line in the parent + # process, then hand them to worker processes which execute them. + command_line = compose_score_command( + extract_file, context_file, half_file, flex_half_file, args) + pool.apply_async( + execute_shell, (command_line, ), {'verbose': args.verbose}) + partial_files.append((half_file, flex_half_file)) + pool.close() + except BaseException: + pool.terminate() + raise + finally: + pool.join() + return partial_files + + +def merge_and_sort(files, output, sort_command=None, gzip_exe=None, + verbose=False): + """Merge partial files. + + :param files: List of partial half-phrase-table files. + :param output: Path for resulting combined phrase-table file. + """ +# TODO: The Perl code mentioned "sort" and "flexibility_score" here. +# What do we do with those? + + # Sort whether we're asked to or not, as a way of combining the input + # files. + if sort_command == 'neandersort': + # Neandersort transparently decompresses input and compresses output. + check_call([ + 'neandersort', + '-o', output, + ] + files) + else: + command = ( + "%(gzip)s -c -d %(files)s | " + "%(sort)s | " + "%(gzip)s -c >>%(output)s" + % { + 'gzip': quote(gzip_exe), + 'sort': sort_command, + 'files': ' '.join(map(quote, files)), + 'output': quote(output), + }) + execute_shell(command, verbose=verbose) + + +def build_score_args(args): + """Compose command line for the `score` program.""" + command_line = [] + if args.labels_file: + command_line += [ + '--SourceLabels', + '--SourceLabelCountsLHS', + '--SourceLabelSet', + ] + if args.parts_of_speech: + command_line.append('--PartsOfSpeech') + if args.inverse: + command_line.append('--Inverse') + if args.args is not None: + command_line.append(args.args) + return ' '.join(command_line) + + +def list_existing(paths): + """Return, in the same order, those of the given files which exist.""" + return filter(os.path.exists, paths) + + +def compose_coc_path_for(path): + """Compose COC-file path for the given file.""" + return '%s.coc' % path + + +def read_cocs(path): + """Read COC file at `path`, return contents as tuple of ints.""" + with open(path) as lines: + return tuple( + int(line.rstrip('\r\n')) + for line in lines + ) + + +def add_cocs(original, additional): + """Add two tuples of COCs. Extend as needed.""" + assert not (original is None and additional is None), "No COCs to add!" + if original is None: + return additional + elif additional is None: + return original + else: + common = tuple(lhs + rhs for lhs, rhs in zip(original, additional)) + return ( + common + + tuple(original[len(common):]) + + tuple(additional[len(common):])) + + +def merge_coc(files, output): + """Merge COC files for the given partial files. + + Each COC file is a series of integers, one per line. This reads them, and + adds them up line-wise into one file of the same format: the sum of the + numbers the respective files have at line 1, the sum of the numbers the + respective files have at line 2, and so on. + """ + assert len(files) > 0, "No partial files - no work to do." + extract_files = [extract_file for extract_file, _ in files] + if not os.path.exists(compose_coc_path_for(extract_files[0])): + # Nothing to merge. + return + totals = None +# TODO: Shouldn't we just fail if any of these files is missing? + for coc_path in list_existing(map(compose_coc_path_for, extract_files)): + totals = add_cocs(totals, read_cocs(coc_path)) + + # Write to output file. + with open(output, 'w') as output_stream: + for entry in totals: + output_stream.write('%d\n' % entry) + + +def suffix_line_numbers(infile, outfile): + """Rewrite `infile` to `outfile`; suffix line number to each line. + + The line number is zero-based, and separated from the rest of the line + by a single space. + """ + temp_file = '%s.numbering' % outfile + with open(infile, 'r') as instream, open(outfile, 'w') as outstream: + line_no = 0 + for line in instream: + outstream.write(line) + outstream.write(' %d\n' % line_no) + line_no += 1 + os.rename(temp_file, outfile) + + +def compose_source_labels_path_for(path): + """Return source labels file path for given file.""" + return '%s.syntaxLabels.src' % path + + +def merge_numbered_files(inputs, output, header_lines, sort_command, + verbose=False): + """Sort and merge files `inputs`, add header and line numbers. + + :param inputs: Iterable of input files. + :param output: Output file. + :header_lines: Iterable of header lines. + :sort_command: Command line for sorting input files. + """ + sort_temp = '%s.sorting' % output + with open(sort_temp, 'w') as stream: + for line in header_lines: + stream.write(line) + stream.write('\n') + execute_shell( + "%s %s >>%s" % ( + sort_command, + ' '.join(map(quote, inputs)), + quote(sort_temp)), + verbose=verbose) + suffix_line_numbers(sort_temp, output) + + +def merge_source_labels(files, output, sort_command, verbose=False): + """Merge source labels files.""" +# TODO: Shouldn't we just fail if any of these files is missing? + labels_files = list_existing(map(compose_source_labels_path_for, files)) + header = [ + 'GlueTop', + 'GlueX', + 'SSTART', + 'SEND', + ] + merge_numbered_files( + labels_files, output, header, sort_command, verbose=verbose) + + +def compose_parts_of_speech_path_for(path): + """Return parts-of-speech file path for given file.""" + return '%s.partsOfSpeech' % path + + +def merge_parts_of_speech(files, output, sort_command, verbose=False): + """Merge parts-of-speech files into output.""" +# TODO: Shouldn't we just fail if any of these files is missing? + parts_files = list_existing(map(compose_parts_of_speech_path_for, files)) + header = [ + 'SSTART', + 'SEND', + ] + merge_numbered_files( + parts_files, output, header, sort_command, verbose=verbose) + + +def main(): + """Command-line entry point. Marshals and forwards to `score_parallel`.""" + args = parse_args() + sanitize_args(args) + set_temp_dir() + + if args.flexibility_score is None: + extract_context_file = None + else: + extract_context_file = args.extract_file.replace( + 'extract.', 'extract.context.') + + if args.verbose: + print("Started %s." % datetime.now()) + print("Using '%s' for gzip." % args.gzip_command) + + with tempdir(args.debug) as split_dir: + extract_files = split_extract_files( + split_dir, args.extract_file, + extract_context_file=extract_context_file, jobs=args.jobs) + + scored_files = score_parallel(split_dir, extract_files, args) + + if args.verbose: + sys.stderr.write("Finished score %s.\n" % datetime.now()) + +# TODO: Pass on "sort" and "flexibility-score" arguments? + merge_and_sort( + [phrase_chunk for phrase_chunk, _ in scored_files], args.output, + sort_command=args.sort_command, gzip_exe=args.gzip_command, + verbose=args.verbose) + merge_coc(extract_files, compose_coc_path_for(args.output)) + + if not args.inverse and args.labels_file is not None: + if args.verbose: + print("Merging source labels files.") + merge_source_labels( + extract_files, args.labels_file, + sort_command=args.sort_command, verbose=args.verbose) + + if not args.inverse and args.parts_of_speech is not None: + if args.verbose: + print("Merging parts-of-speech files.") + merge_parts_of_speech( + extract_files, args.parts_of_speech, + sort_command=args.sort_command, verbose=args.verbose) + + +if __name__ == '__main__': + try: + main() + except ProgramFailure as error: + sys.stderr.write('%s\n' % error) + sys.exit(1) + except CommandLineError as error: + sys.stderr.write("Command line error: %s\n" % error) + sys.exit(2) From 8678e0361fdab8cd9f932e594e55b946b38b6521 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 24 Nov 2015 15:10:07 +0000 Subject: [PATCH 32/32] eclipse --- contrib/other-builds/moses/.project | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index 009acf0ba..6c9b33129 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -2430,6 +2430,16 @@ 1 PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.h + + parameters/OOVHandlingOptions.cpp + 1 + PARENT-3-PROJECT_LOC/moses/parameters/OOVHandlingOptions.cpp + + + parameters/OOVHandlingOptions.h + 1 + PARENT-3-PROJECT_LOC/moses/parameters/OOVHandlingOptions.h + parameters/OptionsBaseClass.cpp 1