From 1a26cb84140bde842b0b60c6888e7f169536e849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fulajt=C3=A1r?= Date: Thu, 27 Aug 2015 15:15:32 +0200 Subject: [PATCH 01/50] Added a simple support for the factored systems. --- scripts/analysis/oov.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/analysis/oov.pl b/scripts/analysis/oov.pl index 9756887c9..5228f0f45 100755 --- a/scripts/analysis/oov.pl +++ b/scripts/analysis/oov.pl @@ -176,6 +176,13 @@ sub ngrams { return { md5(encode_utf8($sent)) => 1 }; } else { my @words = split /\s+/, $sent; + + #factors + if ( $sent =~ m/[|]/) { + my $use_index = 0; # default factor is the first one + @words = map { ( split /[|]/, $_ ) [$use_index] } @words; + } + my $out; if ($n == 1) { foreach my $w (@words) { From dd9eb54ec4f1a59bf73a88755f2c7343d9d8dd04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fulajt=C3=A1r?= Date: Mon, 12 Oct 2015 18:47:45 +0200 Subject: [PATCH 02/50] Named group added for the safer 'protected patterns' recognition regexp. In the original code there are the number references used , which might actualy colidate if any group is used inside the $protected_pattern string. for example the protected_pattenr (loaded from file ) : (http[s]?|ftp):\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]+. If we use the number reference, the $2 will reffer to (http[s]?|ftp):, instead to (.*) inside the : while ($t =~ /($protected_pattern)(.*)$/) { Naming patterns resolves this issue. --- scripts/tokenizer/tokenizer.perl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/tokenizer/tokenizer.perl b/scripts/tokenizer/tokenizer.perl index a1eb01c0f..3aaad28a0 100755 --- a/scripts/tokenizer/tokenizer.perl +++ b/scripts/tokenizer/tokenizer.perl @@ -243,9 +243,9 @@ sub tokenize my @protected = (); foreach my $protected_pattern (@protected_patterns) { my $t = $text; - while ($t =~ /($protected_pattern)(.*)$/) { - push @protected, $1; - $t = $2; + while ($t =~ /(?$protected_pattern)(?.*)$/) { + push @protected, $+{PATTERN}; + $t = $+{TAIL}; } } From 28c8ff5ad01f9824405955102807616a376866b1 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Sun, 13 Nov 2016 00:00:33 +0000 Subject: [PATCH 03/50] daily automatic beautifier --- .../PhraseDictionaryMemoryPerSentenceOnDemand.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp index db570968c..4675d06c8 100644 --- a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp @@ -22,7 +22,8 @@ void PhraseDictionaryMemoryPerSentenceOnDemand::Load(AllOptions::ptr const& opts } -TargetPhraseCollection::shared_ptr PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const { +TargetPhraseCollection::shared_ptr PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const +{ Coll &coll = GetColl(); From a0b8c570704ccaccfb5f4fab0e709b27c6a908ed Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 14 Nov 2016 23:43:19 +0000 Subject: [PATCH 04/50] error message --- moses/LM/Implementation.cpp | 3 ++- moses/LM/SkeletonLM.cpp | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp index eb67100ca..c8fc5df32 100644 --- a/moses/LM/Implementation.cpp +++ b/moses/LM/Implementation.cpp @@ -41,7 +41,8 @@ using namespace std; namespace Moses { LanguageModelImplementation::LanguageModelImplementation(const std::string &line) - :LanguageModel(line) +:LanguageModel(line) +,m_nGramOrder(NOT_FOUND) { } diff --git a/moses/LM/SkeletonLM.cpp b/moses/LM/SkeletonLM.cpp index 23958e688..f944de23a 100644 --- a/moses/LM/SkeletonLM.cpp +++ b/moses/LM/SkeletonLM.cpp @@ -11,6 +11,9 @@ SkeletonLM::SkeletonLM(const std::string &line) { ReadParameters(); + UTIL_THROW_IF2(m_nGramOrder == NOT_FOUND, "Must set order"); + UTIL_THROW_IF2(m_nGramOrder <= 1, "Ngram order must be more than 1"); + FactorCollection &factorCollection = FactorCollection::Instance(); // needed by parent language model classes. Why didn't they set these themselves? From 9519dca56f77ee25f8b253324440d6ac5f36b9b8 Mon Sep 17 00:00:00 2001 From: MosesAdmin Date: Tue, 15 Nov 2016 00:00:37 +0000 Subject: [PATCH 05/50] daily automatic beautifier --- moses/LM/Implementation.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp index c8fc5df32..3208c3a7b 100644 --- a/moses/LM/Implementation.cpp +++ b/moses/LM/Implementation.cpp @@ -41,8 +41,8 @@ using namespace std; namespace Moses { LanguageModelImplementation::LanguageModelImplementation(const std::string &line) -:LanguageModel(line) -,m_nGramOrder(NOT_FOUND) + :LanguageModel(line) + ,m_nGramOrder(NOT_FOUND) { } From 7a150bb060e57a1409a5a26643ce87789a08a582 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sun, 20 Nov 2016 20:51:06 +0000 Subject: [PATCH 06/50] eclipse --- contrib/moses2/.cproject | 8 ++++---- contrib/other-builds/OnDiskPt/.cproject | 8 ++++---- contrib/other-builds/moses/.cproject | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/contrib/moses2/.cproject b/contrib/moses2/.cproject index a8e149f99..82b82d591 100644 --- a/contrib/moses2/.cproject +++ b/contrib/moses2/.cproject @@ -11,12 +11,12 @@ + + - - @@ -106,13 +106,13 @@ + + - - diff --git a/contrib/other-builds/OnDiskPt/.cproject b/contrib/other-builds/OnDiskPt/.cproject index e32a5baea..f551380fd 100644 --- a/contrib/other-builds/OnDiskPt/.cproject +++ b/contrib/other-builds/OnDiskPt/.cproject @@ -11,12 +11,12 @@ + + - - @@ -72,13 +72,13 @@ + + - - diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 6945d7ecf..81da1d22b 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -11,11 +11,11 @@ + - @@ -86,12 +86,12 @@ + - From 50bbf7cb2671a71db2cf7686ebabee8a9a2bb9f8 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 22 Nov 2016 10:08:12 +0000 Subject: [PATCH 07/50] eclipse --- .../other-builds/CreateProbingPT2/.cproject | 162 ------------------ .../other-builds/CreateProbingPT2/.project | 38 ---- 2 files changed, 200 deletions(-) delete mode 100644 contrib/other-builds/CreateProbingPT2/.cproject delete mode 100644 contrib/other-builds/CreateProbingPT2/.project diff --git a/contrib/other-builds/CreateProbingPT2/.cproject b/contrib/other-builds/CreateProbingPT2/.cproject deleted file mode 100644 index 51aaee6e6..000000000 --- a/contrib/other-builds/CreateProbingPT2/.cproject +++ /dev/null @@ -1,162 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/other-builds/CreateProbingPT2/.project b/contrib/other-builds/CreateProbingPT2/.project deleted file mode 100644 index 1f8c4bd3e..000000000 --- a/contrib/other-builds/CreateProbingPT2/.project +++ /dev/null @@ -1,38 +0,0 @@ - - - CreateProbingPT2 - - - lm - moses - moses2 - util - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - - - CreateProbingPT2.cpp - 1 - PARENT-2-PROJECT_LOC/moses2/CreateProbingPT2.cpp - - - From 1fe261b5d49d69e6467df275ac0d88213745af61 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 22 Nov 2016 10:35:30 +0000 Subject: [PATCH 08/50] eclipse --- contrib/other-builds/CreateOnDiskPt/.cproject | 9 +++++---- .../other-builds/CreateProbingPT/.cproject | 9 +++++---- contrib/other-builds/consolidate/.cproject | 1 + contrib/other-builds/extractor/.cproject | 5 +++-- contrib/other-builds/moses/.project | 20 +++++++++++++++++++ contrib/other-builds/server/.cproject | 5 +++-- 6 files changed, 37 insertions(+), 12 deletions(-) diff --git a/contrib/other-builds/CreateOnDiskPt/.cproject b/contrib/other-builds/CreateOnDiskPt/.cproject index f52490c1a..95c0a6a01 100644 --- a/contrib/other-builds/CreateOnDiskPt/.cproject +++ b/contrib/other-builds/CreateOnDiskPt/.cproject @@ -5,16 +5,16 @@ + - - + @@ -78,6 +78,7 @@ + @@ -97,16 +98,16 @@ + - - + diff --git a/contrib/other-builds/CreateProbingPT/.cproject b/contrib/other-builds/CreateProbingPT/.cproject index f925736af..ef52fa87a 100644 --- a/contrib/other-builds/CreateProbingPT/.cproject +++ b/contrib/other-builds/CreateProbingPT/.cproject @@ -5,16 +5,16 @@ + - - + @@ -40,6 +40,7 @@ + diff --git a/contrib/other-builds/extractor/.cproject b/contrib/other-builds/extractor/.cproject index 613c41d5c..79805f176 100644 --- a/contrib/other-builds/extractor/.cproject +++ b/contrib/other-builds/extractor/.cproject @@ -14,7 +14,7 @@ - + @@ -40,6 +40,7 @@ + - + diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index c6b7de6f7..0f397df33 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -565,6 +565,16 @@ 1 PARENT-3-PROJECT_LOC/moses/OutputCollector.h + + OutputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/moses/OutputFileStream.cpp + + + OutputFileStream.h + 1 + PARENT-3-PROJECT_LOC/moses/OutputFileStream.h + PCNTools.cpp 1 @@ -2175,6 +2185,16 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h + + TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp + + + TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h + TranslationModel/PhraseDictionaryMultiModel.cpp 1 diff --git a/contrib/other-builds/server/.cproject b/contrib/other-builds/server/.cproject index 281403516..9789dbfb7 100644 --- a/contrib/other-builds/server/.cproject +++ b/contrib/other-builds/server/.cproject @@ -15,7 +15,7 @@ - + @@ -52,6 +52,7 @@ + - + From 99da2f1b7a818f45ac1f64c56a9fffb13c67146d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 22 Nov 2016 11:40:45 +0000 Subject: [PATCH 09/50] missing header file --- contrib/moses2/legacy/OutputCollector.h | 1 + moses/OutputCollector.h | 1 + 2 files changed, 2 insertions(+) diff --git a/contrib/moses2/legacy/OutputCollector.h b/contrib/moses2/legacy/OutputCollector.h index 7529cd352..5504d9add 100644 --- a/contrib/moses2/legacy/OutputCollector.h +++ b/contrib/moses2/legacy/OutputCollector.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "util/exception.hh" diff --git a/moses/OutputCollector.h b/moses/OutputCollector.h index 0d6f37472..797cc85cf 100644 --- a/moses/OutputCollector.h +++ b/moses/OutputCollector.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "Util.h" #include "util/exception.hh" From 87dbd677da21458cd5e616ae857c09385d5d4819 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 23 Nov 2016 13:11:53 +0000 Subject: [PATCH 10/50] missing header file --- contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h b/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h index 5706fca09..b91ef8f6c 100644 --- a/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h +++ b/contrib/moses2/TranslationModel/CompactPT/BlockHashIndex.h @@ -29,6 +29,7 @@ #include #include #include +#include #include "MurmurHash3.h" #include "StringVector.h" From 288af6e42572007283e02f225d919cfc5255d867 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 28 Nov 2016 10:51:56 +0000 Subject: [PATCH 11/50] move function to cpp file --- contrib/moses2/PhraseBased/PhraseImpl.cpp | 12 ++++++++++++ contrib/moses2/PhraseBased/PhraseImpl.h | 12 +----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/contrib/moses2/PhraseBased/PhraseImpl.cpp b/contrib/moses2/PhraseBased/PhraseImpl.cpp index 004792147..00f55a35b 100644 --- a/contrib/moses2/PhraseBased/PhraseImpl.cpp +++ b/contrib/moses2/PhraseBased/PhraseImpl.cpp @@ -10,6 +10,18 @@ using namespace std; namespace Moses2 { +PhraseImpl *PhraseImpl::CreateFromString(MemPool &pool, FactorCollection &vocab, + const System &system, const std::string &str) +{ + std::vector toks = Moses2::Tokenize(str); + size_t size = toks.size(); + PhraseImpl *ret; + + ret = new (pool.Allocate()) PhraseImpl(pool, size); + + ret->PhraseImplTemplate::CreateFromString(vocab, system, toks); + return ret; +} } diff --git a/contrib/moses2/PhraseBased/PhraseImpl.h b/contrib/moses2/PhraseBased/PhraseImpl.h index a7db9a9e5..787cdf58d 100644 --- a/contrib/moses2/PhraseBased/PhraseImpl.h +++ b/contrib/moses2/PhraseBased/PhraseImpl.h @@ -9,17 +9,7 @@ class PhraseImpl: public PhraseImplTemplate { public: static PhraseImpl *CreateFromString(MemPool &pool, FactorCollection &vocab, - const System &system, const std::string &str) - { - std::vector toks = Moses2::Tokenize(str); - size_t size = toks.size(); - PhraseImpl *ret; - - ret = new (pool.Allocate()) PhraseImpl(pool, size); - - ret->PhraseImplTemplate::CreateFromString(vocab, system, toks); - return ret; - } + const System &system, const std::string &str); PhraseImpl(MemPool &pool, size_t size) : PhraseImplTemplate(pool, size) From 0f12557e1928637934b629ab4dd133bee4ff9f32 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 1 Dec 2016 12:55:20 +0000 Subject: [PATCH 12/50] port beam threshold from Moses --- contrib/moses2/HypothesisColl.cpp | 19 +++++++++++++++++++ contrib/moses2/HypothesisColl.h | 3 +++ contrib/moses2/legacy/Parameter.cpp | 4 ++-- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 2af5465e3..a47c7a5de 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -22,6 +22,8 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) : m_coll(MemPoolAllocator(mgr.GetPool())), m_sortedHypos( NULL) { + m_bestScore = -std::numeric_limits::infinity(); + m_worstScore = -std::numeric_limits::infinity(); } const HypothesisBase *HypothesisColl::GetBestHypo() const @@ -50,6 +52,23 @@ void HypothesisColl::Add( Recycler &hypoRecycle, ArcLists &arcLists) { + SCORE futureScore = hypo->GetFutureScore(); + if (futureScore < m_worstScore) { + // beam threshold + hypoRecycle.Recycle(hypo); + return; + } + + if (futureScore > m_bestScore) { + m_bestScore = hypo->GetFutureScore(); + + // this may also affect the worst score + SCORE beamWidth = system.options.search.beam_width; + if ( m_bestScore + beamWidth > m_worstScore ) { + m_worstScore = m_bestScore + beamWidth; + } + } + StackAdd added = Add(hypo); size_t nbestSize = system.options.nbest.nbest_size; diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index cea3bee1b..fa99ccc08 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -60,6 +60,9 @@ protected: _HCType m_coll; mutable Hypotheses *m_sortedHypos; + SCORE m_bestScore; + SCORE m_worstScore; + StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp index ea1b962a8..666eb0e98 100644 --- a/contrib/moses2/legacy/Parameter.cpp +++ b/contrib/moses2/legacy/Parameter.cpp @@ -79,8 +79,8 @@ Parameter::Parameter() desc += "8=tree-to-string (SCFG-based)\n"; desc += "9=forest-to-string"; AddParam(search_opts, "search-algorithm", desc); - //AddParam(search_opts, "beam-threshold", "b", - // "threshold for threshold pruning"); + AddParam(search_opts, "beam-threshold", "b", + "threshold for threshold pruning"); //AddParam(search_opts, "early-discarding-threshold", "edt", // "threshold for constructing hypotheses based on estimate cost"); AddParam(search_opts, "stack", "s", From ef105a1a9b6a9ae731e243c80c760215b765636d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 1 Dec 2016 14:41:30 +0000 Subject: [PATCH 13/50] reset beam variables when clearing ministack. They are reused --- contrib/moses2/HypothesisColl.cpp | 21 +++++++++++++++---- .../CubePruningMiniStack/Search.cpp | 3 +-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index a47c7a5de..a42c755aa 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -18,9 +18,9 @@ using namespace std; namespace Moses2 { -HypothesisColl::HypothesisColl(const ManagerBase &mgr) : - m_coll(MemPoolAllocator(mgr.GetPool())), m_sortedHypos( - NULL) +HypothesisColl::HypothesisColl(const ManagerBase &mgr) +:m_coll(MemPoolAllocator(mgr.GetPool())) +,m_sortedHypos(NULL) { m_bestScore = -std::numeric_limits::infinity(); m_worstScore = -std::numeric_limits::infinity(); @@ -53,17 +53,28 @@ void HypothesisColl::Add( ArcLists &arcLists) { SCORE futureScore = hypo->GetFutureScore(); + /* + cerr << "scores:" + << futureScore << " " + << m_bestScore << " " + << m_worstScore << " " + << GetSize() << " " + << endl; + */ if (futureScore < m_worstScore) { // beam threshold + //cerr << "Discard:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); return; } + //cerr << "OK:" << hypo->Debug(system) << endl; if (futureScore > m_bestScore) { m_bestScore = hypo->GetFutureScore(); // this may also affect the worst score SCORE beamWidth = system.options.search.beam_width; + //cerr << "beamWidth=" << beamWidth << endl; if ( m_bestScore + beamWidth > m_worstScore ) { m_worstScore = m_bestScore + beamWidth; } @@ -112,7 +123,7 @@ StackAdd HypothesisColl::Add(const HypothesisBase *hypo) } } - assert(false); + //assert(false); } const Hypotheses &HypothesisColl::GetSortedAndPruneHypos( @@ -192,6 +203,8 @@ void HypothesisColl::Clear() { m_sortedHypos = NULL; m_coll.clear(); + m_bestScore = -std::numeric_limits::infinity(); + m_worstScore = -std::numeric_limits::infinity(); } std::string HypothesisColl::Debug(const System &system) const diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp index 8598b3494..d74cb7d99 100644 --- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp +++ b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp @@ -73,10 +73,9 @@ void Search::Decode() //cerr << "stackInd=" << stackInd << endl; m_stack.Clear(); Decode(stackInd); - PostDecode(stackInd); + PostDecode(stackInd); //m_stack.DebugCounts(); - //cerr << m_stacks << endl; } } From a269d9ab71c310423b9f1eb02cafcd4d8a554f14 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 1 Dec 2016 15:31:22 +0000 Subject: [PATCH 14/50] rename m_worseScore -> m_minBeamScore --- contrib/moses2/HypothesisColl.cpp | 12 ++++++------ contrib/moses2/HypothesisColl.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index a42c755aa..98673369f 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -23,7 +23,7 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) ,m_sortedHypos(NULL) { m_bestScore = -std::numeric_limits::infinity(); - m_worstScore = -std::numeric_limits::infinity(); + m_minBeamScore = -std::numeric_limits::infinity(); } const HypothesisBase *HypothesisColl::GetBestHypo() const @@ -57,11 +57,11 @@ void HypothesisColl::Add( cerr << "scores:" << futureScore << " " << m_bestScore << " " - << m_worstScore << " " + << m_minBeamScore << " " << GetSize() << " " << endl; */ - if (futureScore < m_worstScore) { + if (futureScore < m_minBeamScore) { // beam threshold //cerr << "Discard:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); @@ -75,8 +75,8 @@ void HypothesisColl::Add( // this may also affect the worst score SCORE beamWidth = system.options.search.beam_width; //cerr << "beamWidth=" << beamWidth << endl; - if ( m_bestScore + beamWidth > m_worstScore ) { - m_worstScore = m_bestScore + beamWidth; + if ( m_bestScore + beamWidth > m_minBeamScore ) { + m_minBeamScore = m_bestScore + beamWidth; } } @@ -204,7 +204,7 @@ void HypothesisColl::Clear() m_sortedHypos = NULL; m_coll.clear(); m_bestScore = -std::numeric_limits::infinity(); - m_worstScore = -std::numeric_limits::infinity(); + m_minBeamScore = -std::numeric_limits::infinity(); } std::string HypothesisColl::Debug(const System &system) const diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index fa99ccc08..46d403544 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -61,7 +61,7 @@ protected: mutable Hypotheses *m_sortedHypos; SCORE m_bestScore; - SCORE m_worstScore; + SCORE m_minBeamScore; StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; From 525d7272e1ae1e173c59090c8c912a302b6fd49a Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 1 Dec 2016 16:43:06 +0000 Subject: [PATCH 15/50] separate m_bestScore and m_minBeamScore. Conflated with m_worseScore --- contrib/moses2/HypothesisColl.cpp | 13 +++++++++++-- contrib/moses2/HypothesisColl.h | 3 +-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 98673369f..13863ad7d 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -24,6 +24,7 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) { m_bestScore = -std::numeric_limits::infinity(); m_minBeamScore = -std::numeric_limits::infinity(); + m_worseScore = std::numeric_limits::infinity(); } const HypothesisBase *HypothesisColl::GetBestHypo() const @@ -52,6 +53,7 @@ void HypothesisColl::Add( Recycler &hypoRecycle, ArcLists &arcLists) { + size_t stackSize = system.options.search.stack_size; SCORE futureScore = hypo->GetFutureScore(); /* cerr << "scores:" @@ -61,8 +63,10 @@ void HypothesisColl::Add( << GetSize() << " " << endl; */ - if (futureScore < m_minBeamScore) { - // beam threshold + if (futureScore < m_minBeamScore + || (GetSize() >= stackSize) && futureScore < m_worseScore ) { + // beam threshold or really bad hypo that won't make the pruning cut + // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point //cerr << "Discard:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); return; @@ -104,6 +108,10 @@ StackAdd HypothesisColl::Add(const HypothesisBase *hypo) // CHECK RECOMBINATION if (addRet.second) { // equiv hypo doesn't exists + if (hypo->GetFutureScore() < m_worseScore) { + m_worseScore = hypo->GetFutureScore(); + } + return StackAdd(true, NULL); } else { @@ -205,6 +213,7 @@ void HypothesisColl::Clear() m_coll.clear(); m_bestScore = -std::numeric_limits::infinity(); m_minBeamScore = -std::numeric_limits::infinity(); + m_worseScore = std::numeric_limits::infinity(); } std::string HypothesisColl::Debug(const System &system) const diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 46d403544..352504cf8 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -60,8 +60,7 @@ protected: _HCType m_coll; mutable Hypotheses *m_sortedHypos; - SCORE m_bestScore; - SCORE m_minBeamScore; + SCORE m_bestScore, m_worseScore, m_minBeamScore; StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; From b572841b09904a2a225224eb4597f5e73e4af5e8 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 13:11:50 +0000 Subject: [PATCH 16/50] delete beam threshold variable, keep worse score discarding --- contrib/moses2/HypothesisColl.cpp | 25 +++++++++++++++++-------- contrib/moses2/HypothesisColl.h | 4 +++- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 13863ad7d..701ff318d 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -22,8 +22,8 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) :m_coll(MemPoolAllocator(mgr.GetPool())) ,m_sortedHypos(NULL) { - m_bestScore = -std::numeric_limits::infinity(); - m_minBeamScore = -std::numeric_limits::infinity(); + //m_bestScore = -std::numeric_limits::infinity(); + //m_minBeamScore = -std::numeric_limits::infinity(); m_worseScore = std::numeric_limits::infinity(); } @@ -63,15 +63,21 @@ void HypothesisColl::Add( << GetSize() << " " << endl; */ - if (futureScore < m_minBeamScore - || (GetSize() >= stackSize) && futureScore < m_worseScore ) { + if (GetSize() >= stackSize && futureScore < m_worseScore) { // beam threshold or really bad hypo that won't make the pruning cut // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point - //cerr << "Discard:" << hypo->Debug(system) << endl; + //cerr << "Discard, really bad score:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); return; } - //cerr << "OK:" << hypo->Debug(system) << endl; + /* + if (futureScore < m_minBeamScore) { + // beam threshold or really bad hypo that won't make the pruning cut + // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point + //cerr << "Discard, below beam:" << hypo->Debug(system) << endl; + hypoRecycle.Recycle(hypo); + return; + } if (futureScore > m_bestScore) { m_bestScore = hypo->GetFutureScore(); @@ -83,6 +89,8 @@ void HypothesisColl::Add( m_minBeamScore = m_bestScore + beamWidth; } } + //cerr << "OK:" << hypo->Debug(system) << endl; + */ StackAdd added = Add(hypo); @@ -211,8 +219,9 @@ void HypothesisColl::Clear() { m_sortedHypos = NULL; m_coll.clear(); - m_bestScore = -std::numeric_limits::infinity(); - m_minBeamScore = -std::numeric_limits::infinity(); + + //m_bestScore = -std::numeric_limits::infinity(); + //m_minBeamScore = -std::numeric_limits::infinity(); m_worseScore = std::numeric_limits::infinity(); } diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 352504cf8..d89f28c7c 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -60,7 +60,9 @@ protected: _HCType m_coll; mutable Hypotheses *m_sortedHypos; - SCORE m_bestScore, m_worseScore, m_minBeamScore; + //SCORE m_bestScore; + SCORE m_worseScore; + //SCORE m_minBeamScore; StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; From a95a461ec75c2d342b07e723eccb1e9e32c6dfc6 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 13:17:55 +0000 Subject: [PATCH 17/50] delete unused param to get rid of annoying but irrelevant warning in clang --- contrib/moses2/SCFG/Manager.cpp | 3 +-- contrib/moses2/SCFG/nbest/NBest.cpp | 7 +++---- contrib/moses2/SCFG/nbest/NBest.h | 3 +-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/contrib/moses2/SCFG/Manager.cpp b/contrib/moses2/SCFG/Manager.cpp index 5f1b731c0..5db4e2a89 100644 --- a/contrib/moses2/SCFG/Manager.cpp +++ b/contrib/moses2/SCFG/Manager.cpp @@ -90,9 +90,8 @@ void Manager::Decode() } } - const Stack *stack; - /* + const Stack *stack; stack = &m_stacks.GetStack(0, 5); cerr << "stack 0,12:" << stack->Debug(system) << endl; */ diff --git a/contrib/moses2/SCFG/nbest/NBest.cpp b/contrib/moses2/SCFG/nbest/NBest.cpp index e8515d49b..99c005ee3 100644 --- a/contrib/moses2/SCFG/nbest/NBest.cpp +++ b/contrib/moses2/SCFG/nbest/NBest.cpp @@ -50,7 +50,7 @@ NBest::NBest( } stringstream strm; - OutputToStream(mgr, strm, nbestColl); + OutputToStream(mgr, strm); m_str = strm.str(); } @@ -83,7 +83,7 @@ NBest::NBest(const SCFG::Manager &mgr, m_scores->PlusEquals(mgr.system, newScores); stringstream strm; - OutputToStream(mgr, strm, nbestColl); + OutputToStream(mgr, strm); m_str = strm.str(); } @@ -141,8 +141,7 @@ void NBest::CreateDeviants( void NBest::OutputToStream( const SCFG::Manager &mgr, - std::stringstream &strm, - const NBestColl &nbestColl) const + std::stringstream &strm) const { const SCFG::Hypothesis &hypo = GetHypo(); //strm << &hypo << " "; diff --git a/contrib/moses2/SCFG/nbest/NBest.h b/contrib/moses2/SCFG/nbest/NBest.h index 3e4c6112a..fa21866bb 100644 --- a/contrib/moses2/SCFG/nbest/NBest.h +++ b/contrib/moses2/SCFG/nbest/NBest.h @@ -81,8 +81,7 @@ protected: void OutputToStream( const SCFG::Manager &mgr, - std::stringstream &strm, - const NBestColl &nbestColl) const; + std::stringstream &strm) const; }; ///////////////////////////////////////////////////////////// From 00d5dfca4d788b02c98b94f8abcb2ccbe89bd018 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 13:25:00 +0000 Subject: [PATCH 18/50] Word -> WORD for template class --- contrib/moses2/TargetPhrase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/moses2/TargetPhrase.h b/contrib/moses2/TargetPhrase.h index 41cbb78c5..50f66326a 100644 --- a/contrib/moses2/TargetPhrase.h +++ b/contrib/moses2/TargetPhrase.h @@ -86,7 +86,7 @@ public: } - void OutputToStream(const System &system, const Phrase &inputPhrase, std::ostream &out) const + void OutputToStream(const System &system, const Phrase &inputPhrase, std::ostream &out) const { // get placeholders FactorType placeholderFactor = system.options.input.placeholder_factor; From 3275f1be13ec997c10b6dfa562e969aea1c3a3eb Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 13:29:52 +0000 Subject: [PATCH 19/50] warning --- contrib/moses2/SCFG/Stack.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/moses2/SCFG/Stack.cpp b/contrib/moses2/SCFG/Stack.cpp index 03347dea0..2ec6ca543 100644 --- a/contrib/moses2/SCFG/Stack.cpp +++ b/contrib/moses2/SCFG/Stack.cpp @@ -76,7 +76,7 @@ Moses2::HypothesisColl &Stack::GetColl(const SCFG::Word &nt) const Hypothesis *Stack::GetBestHypo() const { SCORE bestScore = -std::numeric_limits::infinity(); - const HypothesisBase *bestHypo; + const HypothesisBase *bestHypo = NULL; BOOST_FOREACH(const Coll::value_type &val, m_coll){ const Moses2::HypothesisColl &hypos = *val.second; const Moses2::HypothesisBase *hypo = hypos.GetBestHypo(); From bb36df3a5ef8ef952106da75be30f179b22f2e1b Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 14:34:24 +0000 Subject: [PATCH 20/50] refine worseScore discarding --- contrib/moses2/HypothesisColl.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 701ff318d..520557f68 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -53,7 +53,9 @@ void HypothesisColl::Add( Recycler &hypoRecycle, ArcLists &arcLists) { - size_t stackSize = system.options.search.stack_size; + size_t maxStackSize = system.options.search.stack_size; + //cerr << "stackSize=" << stackSize << endl; + SCORE futureScore = hypo->GetFutureScore(); /* cerr << "scores:" @@ -63,7 +65,7 @@ void HypothesisColl::Add( << GetSize() << " " << endl; */ - if (GetSize() >= stackSize && futureScore < m_worseScore) { + if (GetSize() >= maxStackSize && futureScore < m_worseScore) { // beam threshold or really bad hypo that won't make the pruning cut // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point //cerr << "Discard, really bad score:" << hypo->Debug(system) << endl; @@ -102,8 +104,14 @@ void HypothesisColl::Add( if (!added.added) { hypoRecycle.Recycle(hypo); } - else if (added.other) { - hypoRecycle.Recycle(added.other); + else { + if (added.other) { + hypoRecycle.Recycle(added.other); + } + + if (GetSize() <= maxStackSize && hypo->GetFutureScore() < m_worseScore) { + m_worseScore = futureScore; + } } } @@ -116,10 +124,6 @@ StackAdd HypothesisColl::Add(const HypothesisBase *hypo) // CHECK RECOMBINATION if (addRet.second) { // equiv hypo doesn't exists - if (hypo->GetFutureScore() < m_worseScore) { - m_worseScore = hypo->GetFutureScore(); - } - return StackAdd(true, NULL); } else { From b188c3b649f1fbaffa75f2ee70d2f7a87c0db488 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 15:31:02 +0000 Subject: [PATCH 21/50] check that all files exists --- .../TranslationModel/ProbingPT/ProbingPT.cpp | 16 ++++++++++++++++ .../TranslationModel/ProbingPT/ProbingPT.h | 3 +++ .../TranslationModel/ProbingPT/querying.cpp | 7 ++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index 2c9a5f31a..a83fc6e0d 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -69,6 +69,7 @@ std::pair ProbingPT::ActiveChartEntryProbing::GetKey(const SCFG: //////////////////////////////////////////////////////////////////////////// ProbingPT::ProbingPT(size_t startInd, const std::string &line) :PhraseTable(startInd, line) +,load_method(util::POPULATE_OR_READ) { ReadParameters(); } @@ -149,6 +150,21 @@ void ProbingPT::Load(System &system) CreateCache(system); } +void ProbingPT::SetParameter(const std::string& key, const std::string& value) +{ + if (key == "load") { + if (value == "lazy") { + load_method = util::LAZY; + } + else if (value == "populate") { + load_method = util::POPULATE_OR_READ; + } + else { + UTIL_THROW2("load method not supported" << value); + } + } +} + void ProbingPT::CreateAlignmentMap(System &system, const std::string path) { const std::vector< std::vector > &probingAlignColl = m_engine->getAlignments(); diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h index 6e5c7430c..9b8905843 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.h @@ -15,6 +15,7 @@ #include "../../Vector.h" #include "../../Phrase.h" #include "../../SCFG/ActiveChart.h" +#include "util/mmap.hh" namespace Moses2 { @@ -69,6 +70,7 @@ public: virtual ~ProbingPT(); void Load(System &system); + virtual void SetParameter(const std::string& key, const std::string& value); void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const; uint64_t GetUnk() const @@ -91,6 +93,7 @@ protected: std::vector m_sourceVocab; // factor id -> pt id std::vector< std::pair > m_targetVocab; // pt id -> factor* std::vector m_aligns; + util::LoadMethod load_method; uint64_t m_unkId; QueryEngine *m_engine; diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp index fb8ccef9a..b1f19eb6e 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp @@ -17,10 +17,15 @@ QueryEngine::QueryEngine(const char * filepath) std::string path_to_source_vocabid = basepath + "/source_vocabids"; std::string alignPath = basepath + "/Alignments.dat"; - if (!FileExists(path_to_config)) { + if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) || + !FileExists(path_to_source_vocabid) || !FileExists(alignPath) || + !FileExists(basepath + "TargetColl.dat") || !FileExists(basepath + "TargetVocab.dat") || + !FileExists(basepath + "cache")) { UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config); } + + ///Source phrase vocabids read_map(source_vocabids, path_to_source_vocabid.c_str()); From 114702fcd8a19c64d9bbd4f13de35da73a12b5b1 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 15:54:43 +0000 Subject: [PATCH 22/50] hack kenlm's populate load by catting files into memory. Also check that all files exists --- .../TranslationModel/ProbingPT/ProbingPT.cpp | 5 +- .../TranslationModel/ProbingPT/querying.cpp | 54 ++++++++++++++++--- .../TranslationModel/ProbingPT/querying.hh | 4 +- 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp index a83fc6e0d..26f570641 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/ProbingPT.cpp @@ -81,7 +81,7 @@ ProbingPT::~ProbingPT() void ProbingPT::Load(System &system) { - m_engine = new QueryEngine(m_path.c_str()); + m_engine = new QueryEngine(m_path.c_str(), load_method); m_unkId = 456456546456; @@ -163,6 +163,9 @@ void ProbingPT::SetParameter(const std::string& key, const std::string& value) UTIL_THROW2("load method not supported" << value); } } + else { + PhraseTable::SetParameter(key, value); + } } void ProbingPT::CreateAlignmentMap(System &system, const std::string path) diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp index b1f19eb6e..e47a6d015 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.cpp +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.cpp @@ -7,7 +7,7 @@ using namespace std; namespace Moses2 { -QueryEngine::QueryEngine(const char * filepath) +QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method) { //Create filepaths @@ -17,15 +17,12 @@ QueryEngine::QueryEngine(const char * filepath) std::string path_to_source_vocabid = basepath + "/source_vocabids"; std::string alignPath = basepath + "/Alignments.dat"; - if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) || - !FileExists(path_to_source_vocabid) || !FileExists(alignPath) || - !FileExists(basepath + "TargetColl.dat") || !FileExists(basepath + "TargetVocab.dat") || - !FileExists(basepath + "cache")) { - UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config); + file_exits(basepath); + + if (load_method == util::POPULATE_OR_READ) { + cat_files(basepath); } - - ///Source phrase vocabids read_map(source_vocabids, path_to_source_vocabid.c_str()); @@ -144,5 +141,46 @@ void QueryEngine::read_alignments(const std::string &alignPath) } } +void QueryEngine::file_exits(const std::string &basePath) +{ + if (!FileExists(basePath + "/Alignments.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/Alignments.dat"); + } + if (!FileExists(basePath + "/TargetColl.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetColl.dat"); + } + if (!FileExists(basePath + "/TargetVocab.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetVocab.dat"); + } + if (!FileExists(basePath + "/cache")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/cache"); + } + if (!FileExists(basePath + "/config")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/config"); + } + if (!FileExists(basePath + "/probing_hash.dat")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/probing_hash.dat"); + } + if (!FileExists(basePath + "/source_vocabids")) { + UTIL_THROW2("Require file does not exist in: " << basePath << "/source_vocabids"); + } + + /* + + if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) || + !FileExists(path_to_source_vocabid) || !FileExists(basepath + alignPath) || + !FileExists(basepath + "/TargetColl.dat") || !FileExists(basepath + "/TargetVocab.dat") || + !FileExists(basepath + "/cache")) { + UTIL_THROW2("A required table doesn't exist in: " << basepath); + } + */ +} + +void QueryEngine::cat_files(const std::string &basePath) +{ + system((string("cat ") + basePath + "/TargetColl.dat > /dev/null").c_str()); + system((string("cat ") + basePath + "/probing_hash.dat > /dev/null").c_str()); +} + } diff --git a/contrib/moses2/TranslationModel/ProbingPT/querying.hh b/contrib/moses2/TranslationModel/ProbingPT/querying.hh index aae4b4f09..03d7667a9 100644 --- a/contrib/moses2/TranslationModel/ProbingPT/querying.hh +++ b/contrib/moses2/TranslationModel/ProbingPT/querying.hh @@ -27,13 +27,15 @@ class QueryEngine bool is_reordering; void read_alignments(const std::string &alignPath); + void file_exits(const std::string &basePath); + void cat_files(const std::string &basePath); public: int num_scores; int num_lex_scores; bool logProb; - QueryEngine(const char *); + QueryEngine(const char *, util::LoadMethod load_method); ~QueryEngine(); std::pair query(uint64_t key); From 62e2c852201ebe28559e2a5e707ec90a24c90c51 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 17:05:13 +0000 Subject: [PATCH 23/50] delete batch algorithm --- contrib/moses2/Jamfile | 4 - contrib/moses2/PhraseBased/Batch/Search.cpp | 171 -------------------- contrib/moses2/PhraseBased/Batch/Search.h | 53 ------ contrib/moses2/PhraseBased/Batch/Stack.cpp | 35 ---- contrib/moses2/PhraseBased/Batch/Stack.h | 32 ---- contrib/moses2/PhraseBased/Batch/Stacks.cpp | 67 -------- contrib/moses2/PhraseBased/Batch/Stacks.h | 62 ------- contrib/moses2/PhraseBased/Manager.cpp | 7 +- 8 files changed, 3 insertions(+), 428 deletions(-) delete mode 100644 contrib/moses2/PhraseBased/Batch/Search.cpp delete mode 100644 contrib/moses2/PhraseBased/Batch/Search.h delete mode 100644 contrib/moses2/PhraseBased/Batch/Stack.cpp delete mode 100644 contrib/moses2/PhraseBased/Batch/Stack.h delete mode 100644 contrib/moses2/PhraseBased/Batch/Stacks.cpp delete mode 100644 contrib/moses2/PhraseBased/Batch/Stacks.h diff --git a/contrib/moses2/Jamfile b/contrib/moses2/Jamfile index 850dbcd1f..98e1c1e30 100644 --- a/contrib/moses2/Jamfile +++ b/contrib/moses2/Jamfile @@ -114,10 +114,6 @@ alias deps : ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../mose PhraseBased/CubePruningMiniStack/Search.cpp PhraseBased/CubePruningMiniStack/Stack.cpp - PhraseBased/Batch/Search.cpp - PhraseBased/Batch/Stack.cpp - PhraseBased/Batch/Stacks.cpp - # PhraseBased/CubePruningCardinalStack/Misc.cpp # PhraseBased/CubePruningCardinalStack/Search.cpp # PhraseBased/CubePruningCardinalStack/Stack.cpp diff --git a/contrib/moses2/PhraseBased/Batch/Search.cpp b/contrib/moses2/PhraseBased/Batch/Search.cpp deleted file mode 100644 index 376232199..000000000 --- a/contrib/moses2/PhraseBased/Batch/Search.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* - * SearchNormal.cpp - * - * Created on: 25 Oct 2015 - * Author: hieu - */ - -#include "Search.h" -#include -#include -#include "Stack.h" -#include "../Manager.h" -#include "../TrellisPath.h" -#include "../Sentence.h" -#include "../../TrellisPaths.h" -#include "../../InputPathsBase.h" -#include "../../Phrase.h" -#include "../../System.h" -#include "../../PhraseBased/TargetPhrases.h" - -using namespace std; - -namespace Moses2 -{ -namespace NSBatch -{ - -Search::Search(Manager &mgr) -:Moses2::Search(mgr) -, m_stacks(mgr) -, m_batch(mgr.system.GetBatch(mgr.GetSystemPool())) -{ - // TODO Auto-generated constructor stub - -} - -Search::~Search() -{ - // TODO Auto-generated destructor stub -} - -void Search::Decode() -{ - // init stacks - const Sentence &sentence = static_cast(mgr.GetInput()); - m_stacks.Init(mgr, sentence.GetSize() + 1); - - const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); - Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), - initBitmap); - initHypo->EmptyHypothesisState(mgr.GetInput()); - - m_stacks.Add(initHypo, mgr.GetHypoRecycle(), mgr.arcLists); - - for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) { - Decode(stackInd); - //cerr << m_stacks << endl; - - // delete stack to save mem - if (stackInd < m_stacks.GetSize() - 1) { - m_stacks.Delete(stackInd); - } - //cerr << m_stacks << endl; - } -} - -void Search::Decode(size_t stackInd) -{ - Stack &stack = m_stacks[stackInd]; - if (&stack == &m_stacks.Back()) { - // last stack. don't do anythin - return; - } - - const Hypotheses &hypos = stack.GetSortedAndPruneHypos(mgr, mgr.arcLists); - - const InputPaths &paths = mgr.GetInputPaths(); - - BOOST_FOREACH(const InputPathBase *path, paths){ - BOOST_FOREACH(const HypothesisBase *hypo, hypos) { - Extend(*static_cast(hypo), *static_cast(path)); - } - } - - // process batch - mgr.system.featureFunctions.EvaluateWhenAppliedBatch(m_batch); - - for (size_t i = 0; i < m_batch.size(); ++i) { - Hypothesis *hypo = m_batch[i]; - m_stacks.Add(hypo, mgr.GetHypoRecycle(), mgr.arcLists); - } - m_batch.clear(); -} - -void Search::Extend(const Hypothesis &hypo, const InputPath &path) -{ - const Bitmap &hypoBitmap = hypo.GetBitmap(); - const Range &hypoRange = hypo.GetInputPath().range; - const Range &pathRange = path.range; - - if (!CanExtend(hypoBitmap, hypoRange.GetEndPos(), pathRange)) { - return; - } - - const ReorderingConstraint &reorderingConstraint = mgr.GetInput().GetReorderingConstraint(); - if (!reorderingConstraint.Check(hypoBitmap, pathRange.GetStartPos(), pathRange.GetEndPos())) { - return; - } - //cerr << " YES" << endl; - - // extend this hypo - const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); - //SCORE estimatedScore = mgr.GetEstimatedScores().CalcFutureScore2(bitmap, pathRange.GetStartPos(), pathRange.GetEndPos()); - SCORE estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); - - size_t numPt = mgr.system.mappings.size(); - const TargetPhrases **tpsAllPt = path.targetPhrases; - for (size_t i = 0; i < numPt; ++i) { - const TargetPhrases *tps = tpsAllPt[i]; - if (tps) { - Extend(hypo, *tps, path, newBitmap, estimatedScore); - } - } -} - -void Search::Extend(const Hypothesis &hypo, const TargetPhrases &tps, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore) -{ - BOOST_FOREACH(const TargetPhraseImpl *tp, tps){ - Extend(hypo, *tp, path, newBitmap, estimatedScore); - } -} - -void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore) -{ - Hypothesis *newHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); - newHypo->Init(mgr, hypo, path, tp, newBitmap, estimatedScore); - - m_batch.push_back(newHypo); - //newHypo->EvaluateWhenApplied(); - - //m_stacks.Add(newHypo, mgr.GetHypoRecycle(), mgr.arcLists); - - //m_arcLists.AddArc(stackAdded.added, newHypo, stackAdded.other); - //stack.Prune(mgr.GetHypoRecycle(), mgr.system.stackSize, mgr.system.stackSize * 2); - -} - -const Hypothesis *Search::GetBestHypo() const -{ - const Stack &lastStack = m_stacks.Back(); - return lastStack.GetBestHypo(); -} - -void Search::AddInitialTrellisPaths(TrellisPaths &paths) const -{ - const Stack &lastStack = m_stacks.Back(); - const Hypotheses &hypos = lastStack.GetSortedAndPruneHypos(mgr, mgr.arcLists); - - BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){ - const Hypothesis *hypo = static_cast(hypoBase); - TrellisPath *path = new TrellisPath(hypo, mgr.arcLists); - paths.Add(path); - } -} - -} // namespace -} - diff --git a/contrib/moses2/PhraseBased/Batch/Search.h b/contrib/moses2/PhraseBased/Batch/Search.h deleted file mode 100644 index 4f4b35d2e..000000000 --- a/contrib/moses2/PhraseBased/Batch/Search.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * SearchNormal.h - * - * Created on: 25 Oct 2015 - * Author: hieu - */ -#pragma once - -#include -#include "../../legacy/Range.h" -#include "../../legacy/Bitmap.h" -#include "../../TypeDef.h" -#include "../Search.h" -#include "Stacks.h" - -namespace Moses2 -{ -class Hypothesis; -class InputPath; -class TargetPhrases; -class TargetPhraseImpl; - -namespace NSBatch -{ -class Stacks; - -class Search: public Moses2::Search -{ -public: - Search(Manager &mgr); - virtual ~Search(); - - virtual void Decode(); - const Hypothesis *GetBestHypo() const; - - void AddInitialTrellisPaths(TrellisPaths &paths) const; - -protected: - Stacks m_stacks; - - Batch &m_batch; - - void Decode(size_t stackInd); - void Extend(const Hypothesis &hypo, const InputPath &path); - void Extend(const Hypothesis &hypo, const TargetPhrases &tps, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore); - void Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp, - const InputPath &path, const Bitmap &newBitmap, SCORE estimatedScore); - -}; - -} -} diff --git a/contrib/moses2/PhraseBased/Batch/Stack.cpp b/contrib/moses2/PhraseBased/Batch/Stack.cpp deleted file mode 100644 index ebe6832da..000000000 --- a/contrib/moses2/PhraseBased/Batch/Stack.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Stack.cpp - * - * Created on: 24 Oct 2015 - * Author: hieu - */ -#include -#include "Stack.h" -#include "../Hypothesis.h" -#include "../Manager.h" -#include "../../Scores.h" -#include "../../HypothesisColl.h" - -using namespace std; - -namespace Moses2 -{ - -namespace NSBatch -{ - -Stack::Stack(const Manager &mgr) : - HypothesisColl(mgr) -{ - // TODO Auto-generated constructor stub - -} - -Stack::~Stack() -{ - // TODO Auto-generated destructor stub -} - -} -} diff --git a/contrib/moses2/PhraseBased/Batch/Stack.h b/contrib/moses2/PhraseBased/Batch/Stack.h deleted file mode 100644 index ad8c3c649..000000000 --- a/contrib/moses2/PhraseBased/Batch/Stack.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Stack.h - * - * Created on: 24 Oct 2015 - * Author: hieu - */ -#pragma once - -#include -#include -#include "../Hypothesis.h" -#include "../../TypeDef.h" -#include "../../HypothesisColl.h" -#include "../../legacy/Util2.h" - -namespace Moses2 -{ - -namespace NSBatch -{ -class Stack: public HypothesisColl -{ -public: - Stack(const Manager &mgr); - virtual ~Stack(); - -protected: - -}; - -} -} diff --git a/contrib/moses2/PhraseBased/Batch/Stacks.cpp b/contrib/moses2/PhraseBased/Batch/Stacks.cpp deleted file mode 100644 index 2f46aef6f..000000000 --- a/contrib/moses2/PhraseBased/Batch/Stacks.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Stacks.cpp - * - * Created on: 6 Nov 2015 - * Author: hieu - */ - -#include "Stacks.h" -#include "../Manager.h" -#include "../../System.h" - -using namespace std; - -namespace Moses2 -{ - -namespace NSBatch -{ - -Stacks::Stacks(const Manager &mgr) : - m_mgr(mgr) -{ - // TODO Auto-generated constructor stub - -} - -Stacks::~Stacks() -{ - for (size_t i = 0; i < m_stacks.size(); ++i) { - delete m_stacks[i]; - } -} - -void Stacks::Init(const Manager &mgr, size_t numStacks) -{ - m_stacks.resize(numStacks); - for (size_t i = 0; i < m_stacks.size(); ++i) { - m_stacks[i] = new Stack(mgr); - } -} - -std::string Stacks::Debug(const System &system) const -{ - stringstream out; - for (size_t i = 0; i < GetSize(); ++i) { - const Stack *stack = m_stacks[i]; - if (stack) { - out << stack->GetSize() << " "; - } - else { - out << "N "; - } - } - return out.str(); -} - -void Stacks::Add(Hypothesis *hypo, Recycler &hypoRecycle, - ArcLists &arcLists) -{ - size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered(); - //cerr << "numWordsCovered=" << numWordsCovered << endl; - Stack &stack = *m_stacks[numWordsCovered]; - stack.Add(m_mgr.system, hypo, hypoRecycle, arcLists); -} - -} -} diff --git a/contrib/moses2/PhraseBased/Batch/Stacks.h b/contrib/moses2/PhraseBased/Batch/Stacks.h deleted file mode 100644 index 8cd2d857f..000000000 --- a/contrib/moses2/PhraseBased/Batch/Stacks.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Stacks.h - * - * Created on: 6 Nov 2015 - * Author: hieu - */ - -#pragma once - -#include -#include "Stack.h" -#include "../../Recycler.h" - -namespace Moses2 -{ -class Manager; -class ArcLists; - -namespace NSBatch -{ - -class Stacks -{ -public: - Stacks(const Manager &mgr); - virtual ~Stacks(); - - void Init(const Manager &mgr, size_t numStacks); - - size_t GetSize() const - { - return m_stacks.size(); - } - - const Stack &Back() const - { - return *m_stacks.back(); - } - - Stack &operator[](size_t ind) - { - return *m_stacks[ind]; - } - - void Delete(size_t ind) - { - delete m_stacks[ind]; - m_stacks[ind] = NULL; - } - - void Add(Hypothesis *hypo, Recycler &hypoRecycle, - ArcLists &arcLists); - - std::string Debug(const System &system) const; - -protected: - const Manager &m_mgr; - std::vector m_stacks; -}; - -} -} diff --git a/contrib/moses2/PhraseBased/Manager.cpp b/contrib/moses2/PhraseBased/Manager.cpp index 6fd1b0db3..b89897070 100644 --- a/contrib/moses2/PhraseBased/Manager.cpp +++ b/contrib/moses2/PhraseBased/Manager.cpp @@ -16,7 +16,6 @@ #include "Normal/Search.h" #include "CubePruningMiniStack/Search.h" -#include "Batch/Search.h" /* #include "CubePruningPerMiniStack/Search.h" @@ -95,7 +94,8 @@ void Manager::Init() m_search = new NSNormal::Search(*this); break; case NormalBatch: - m_search = new NSBatch::Search(*this); + //m_search = new NSBatch::Search(*this); + UTIL_THROW2("Not implemented"); break; case CubePruning: case CubePruningMiniStack: @@ -116,8 +116,7 @@ void Manager::Init() break; */ default: - cerr << "Unknown search algorithm" << endl; - abort(); + UTIL_THROW2("Unknown search algorithm"); } } From b373a37d00169cc606c03ed0f1f3abb4ca88882e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 17:26:58 +0000 Subject: [PATCH 24/50] get ready to change GetSortedAndPruneHypos() to non-const --- contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp index d74cb7d99..4ea61e0ba 100644 --- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp +++ b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp @@ -172,6 +172,9 @@ void Search::PostDecode(size_t stackInd) const Bitmap &hypoBitmap = *val.first.first; size_t firstGap = hypoBitmap.GetFirstGapPos(); size_t hypoEndPos = val.first.second; + + Moses2::HypothesisColl &hypos = *val.second; + //cerr << "key=" << hypoBitmap << " " << firstGap << " " << inputSize << endl; // create edges to next hypos from existing hypos @@ -203,7 +206,7 @@ void Search::PostDecode(size_t stackInd) CubeEdges &edges = *m_cubeEdges[numWords]; // sort hypo for a particular bitmap and hypoEndPos - const Hypotheses &sortedHypos = val.second->GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Hypotheses &sortedHypos = hypos.GetSortedAndPruneHypos(mgr, mgr.arcLists); size_t numPt = mgr.system.mappings.size(); for (size_t i = 0; i < numPt; ++i) { @@ -228,7 +231,7 @@ void Search::AddInitialTrellisPaths(TrellisPaths &paths) const { const Stack::Coll &coll = m_stack.GetColl(); BOOST_FOREACH(const Stack::Coll::value_type &val, coll){ - const Moses2::HypothesisColl &hypos = *val.second; + Moses2::HypothesisColl &hypos = *val.second; const Hypotheses &sortedHypos = hypos.GetSortedAndPruneHypos(mgr, mgr.arcLists); BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) { From f7cf9a84edc8a3a17e1eabe32f0bc8f3097ba97c Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 17:29:52 +0000 Subject: [PATCH 25/50] use GetBestHypo() --- contrib/moses2/PhraseBased/Normal/Search.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/contrib/moses2/PhraseBased/Normal/Search.cpp b/contrib/moses2/PhraseBased/Normal/Search.cpp index cb528d17f..5c89eecc6 100644 --- a/contrib/moses2/PhraseBased/Normal/Search.cpp +++ b/contrib/moses2/PhraseBased/Normal/Search.cpp @@ -140,13 +140,7 @@ void Search::Extend(const Hypothesis &hypo, const TargetPhraseImpl &tp, const Hypothesis *Search::GetBestHypo() const { const Stack &lastStack = m_stacks.Back(); - const Hypotheses &sortedHypos = lastStack.GetSortedAndPruneHypos(mgr, - mgr.arcLists); - - const Hypothesis *best = NULL; - if (sortedHypos.size()) { - best = static_cast(sortedHypos[0]); - } + const Hypothesis *best = lastStack.GetBestHypo(); return best; } From fc4fa0f19cda10183a7514e700eff576ed85ec2c Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 18:04:26 +0000 Subject: [PATCH 26/50] add inter-stack pruning --- contrib/moses2/HypothesisColl.cpp | 66 +++++++++++++++++-- contrib/moses2/HypothesisColl.h | 4 +- .../CubePruningMiniStack/Stack.cpp | 2 +- contrib/moses2/PhraseBased/Normal/Stacks.cpp | 2 +- contrib/moses2/SCFG/Stack.cpp | 2 +- 5 files changed, 68 insertions(+), 8 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 520557f68..3770bf982 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -48,13 +48,17 @@ const HypothesisBase *HypothesisColl::GetBestHypo() const } void HypothesisColl::Add( - const System &system, + const ManagerBase &mgr, HypothesisBase *hypo, Recycler &hypoRecycle, ArcLists &arcLists) { - size_t maxStackSize = system.options.search.stack_size; - //cerr << "stackSize=" << stackSize << endl; + size_t maxStackSize = mgr.system.options.search.stack_size; + //cerr << "maxStackSize=" << maxStackSize << endl; + + if (GetSize() * 2 > maxStackSize) { + PruneHypos(mgr, mgr.arcLists); + } SCORE futureScore = hypo->GetFutureScore(); /* @@ -96,7 +100,7 @@ void HypothesisColl::Add( StackAdd added = Add(hypo); - size_t nbestSize = system.options.nbest.nbest_size; + size_t nbestSize = mgr.system.options.nbest.nbest_size; if (nbestSize) { arcLists.AddArc(added.added, hypo, added.other); } @@ -219,6 +223,60 @@ void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr, */ } +void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const +{ + size_t stackSize = mgr.system.options.search.stack_size; + Recycler &recycler = mgr.GetHypoRecycle(); + + /* + cerr << "UNSORTED hypos: "; + BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { + cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " "; + } + cerr << endl; + */ + vector sortedHypos; + size_t ind = 0; + BOOST_FOREACH(const HypothesisBase *hypo, m_coll){ + sortedHypos[ind] = hypo; + ++ind; + } + + vector::iterator iterMiddle; + iterMiddle = + (stackSize == 0 || sortedHypos.size() < stackSize) ? + sortedHypos.end() : sortedHypos.begin() + stackSize; + + std::partial_sort(sortedHypos.begin(), iterMiddle, sortedHypos.end(), + HypothesisFutureScoreOrderer()); + + // prune + if (stackSize && sortedHypos.size() > stackSize) { + for (size_t i = stackSize; i < sortedHypos.size(); ++i) { + HypothesisBase *hypo = const_cast((sortedHypos)[i]); + recycler.Recycle(hypo); + + // delete from arclist + if (mgr.system.options.nbest.nbest_size) { + arcLists.Delete(hypo); + } + + // delete from collection + //Delete(hypo); + } + sortedHypos.resize(stackSize); + } + + /* + cerr << "sorted hypos: "; + for (size_t i = 0; i < sortedHypos.size(); ++i) { + const HypothesisBase *hypo = sortedHypos[i]; + cerr << hypo << " "; + } + cerr << endl; + */ +} + void HypothesisColl::Clear() { m_sortedHypos = NULL; diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index d89f28c7c..0771fa449 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -25,7 +25,7 @@ class HypothesisColl public: HypothesisColl(const ManagerBase &mgr); - void Add(const System &system, + void Add(const ManagerBase &mgr, HypothesisBase *hypo, Recycler &hypoRecycle, ArcLists &arcLists); @@ -67,6 +67,8 @@ protected: StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; + void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; + }; } /* namespace Moses2 */ diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp index 1f456dd93..e2b81f0ba 100644 --- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp +++ b/contrib/moses2/PhraseBased/CubePruningMiniStack/Stack.cpp @@ -47,7 +47,7 @@ void Stack::Add(Hypothesis *hypo, Recycler &hypoRecycle, { HypoCoverage key(&hypo->GetBitmap(), hypo->GetInputPath().range.GetEndPos()); Moses2::HypothesisColl &coll = GetMiniStack(key); - coll.Add(m_mgr.system, hypo, hypoRecycle, arcLists); + coll.Add(m_mgr, hypo, hypoRecycle, arcLists); } const Hypothesis *Stack::GetBestHypo() const diff --git a/contrib/moses2/PhraseBased/Normal/Stacks.cpp b/contrib/moses2/PhraseBased/Normal/Stacks.cpp index 4aab42347..bb7239cf8 100644 --- a/contrib/moses2/PhraseBased/Normal/Stacks.cpp +++ b/contrib/moses2/PhraseBased/Normal/Stacks.cpp @@ -60,7 +60,7 @@ void Stacks::Add(Hypothesis *hypo, Recycler &hypoRecycle, size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered(); //cerr << "numWordsCovered=" << numWordsCovered << endl; Stack &stack = *m_stacks[numWordsCovered]; - stack.Add(m_mgr.system, hypo, hypoRecycle, arcLists); + stack.Add(m_mgr, hypo, hypoRecycle, arcLists); } } diff --git a/contrib/moses2/SCFG/Stack.cpp b/contrib/moses2/SCFG/Stack.cpp index 2ec6ca543..163761a49 100644 --- a/contrib/moses2/SCFG/Stack.cpp +++ b/contrib/moses2/SCFG/Stack.cpp @@ -33,7 +33,7 @@ void Stack::Add(SCFG::Hypothesis *hypo, Recycler &hypoRecycle, //cerr << "lhs=" << lhs << endl; HypothesisColl &coll = GetColl(lhs); - coll.Add(m_mgr.system, hypo, hypoRecycle, arcLists); + coll.Add(m_mgr, hypo, hypoRecycle, arcLists); } size_t Stack::GetSize() const From 9a272ba519757d9d6fcaa4a4ca7fff90b2db9b68 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 5 Dec 2016 22:47:37 +0000 Subject: [PATCH 27/50] add inter-stack pruning --- contrib/moses2/HypothesisColl.cpp | 35 ++++++++++++++++++++----------- contrib/moses2/HypothesisColl.h | 4 +++- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 3770bf982..06be07991 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -54,9 +54,9 @@ void HypothesisColl::Add( ArcLists &arcLists) { size_t maxStackSize = mgr.system.options.search.stack_size; - //cerr << "maxStackSize=" << maxStackSize << endl; - if (GetSize() * 2 > maxStackSize) { + if (GetSize() > maxStackSize * 2) { + //cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl; PruneHypos(mgr, mgr.arcLists); } @@ -223,9 +223,9 @@ void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr, */ } -void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const +void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) { - size_t stackSize = mgr.system.options.search.stack_size; + size_t maxStackSize = mgr.system.options.search.stack_size; Recycler &recycler = mgr.GetHypoRecycle(); /* @@ -235,7 +235,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) cons } cerr << endl; */ - vector sortedHypos; + vector sortedHypos(GetSize()); size_t ind = 0; BOOST_FOREACH(const HypothesisBase *hypo, m_coll){ sortedHypos[ind] = hypo; @@ -244,17 +244,16 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) cons vector::iterator iterMiddle; iterMiddle = - (stackSize == 0 || sortedHypos.size() < stackSize) ? - sortedHypos.end() : sortedHypos.begin() + stackSize; + (maxStackSize == 0 || sortedHypos.size() < maxStackSize) ? + sortedHypos.end() : sortedHypos.begin() + maxStackSize; std::partial_sort(sortedHypos.begin(), iterMiddle, sortedHypos.end(), HypothesisFutureScoreOrderer()); // prune - if (stackSize && sortedHypos.size() > stackSize) { - for (size_t i = stackSize; i < sortedHypos.size(); ++i) { + if (maxStackSize && sortedHypos.size() > maxStackSize) { + for (size_t i = maxStackSize; i < sortedHypos.size(); ++i) { HypothesisBase *hypo = const_cast((sortedHypos)[i]); - recycler.Recycle(hypo); // delete from arclist if (mgr.system.options.nbest.nbest_size) { @@ -262,9 +261,11 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) cons } // delete from collection - //Delete(hypo); + Delete(hypo); + + //recycler.Recycle(hypo); } - sortedHypos.resize(stackSize); + } /* @@ -277,6 +278,16 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) cons */ } +void HypothesisColl::Delete(const HypothesisBase *hypo) +{ + cerr << "hypo=" << hypo << " " << m_coll.size() << endl; + + _HCType::const_iterator iter = m_coll.find(hypo); + UTIL_THROW_IF2(iter == m_coll.end(), "Can't find hypo"); + + m_coll.erase(iter); +} + void HypothesisColl::Clear() { m_sortedHypos = NULL; diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 0771fa449..4e41509de 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -50,6 +50,8 @@ public: return hypo ? &hypo->Cast() : NULL; } + void Delete(const HypothesisBase *hypo); + std::string Debug(const System &system) const; protected: @@ -67,7 +69,7 @@ protected: StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; - void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; + void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists); }; From 6356ccae0315e50898926f01cfdb7d34537ce9e6 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 00:20:41 +0000 Subject: [PATCH 28/50] debug --- contrib/moses2/HypothesisColl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 06be07991..b7037992f 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -263,7 +263,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) // delete from collection Delete(hypo); - //recycler.Recycle(hypo); + recycler.Recycle(hypo); } } @@ -280,7 +280,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) void HypothesisColl::Delete(const HypothesisBase *hypo) { - cerr << "hypo=" << hypo << " " << m_coll.size() << endl; + //cerr << "hypo=" << hypo << " " << m_coll.size() << endl; _HCType::const_iterator iter = m_coll.find(hypo); UTIL_THROW_IF2(iter == m_coll.end(), "Can't find hypo"); From d68211cba912e96a0491ded7ec71b910b8d7a9f9 Mon Sep 17 00:00:00 2001 From: lonevvolf Date: Tue, 6 Dec 2016 09:41:32 +0100 Subject: [PATCH 29/50] Fix for number at the end of a string --- scripts/generic/ph_numbers.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generic/ph_numbers.perl b/scripts/generic/ph_numbers.perl index d49bc98b8..618e6fe15 100755 --- a/scripts/generic/ph_numbers.perl +++ b/scripts/generic/ph_numbers.perl @@ -88,7 +88,7 @@ sub recognize { $isRecognized = 0; } - if ($end == length($input) -1 || substr($input, $end, 1) eq " ") { + if ($end == length($input) || substr($input, $end, 1) eq " ") { # last word, or next char is a space } else { From e8a6677bbbbf0ccdc3f8ae07211fe988020ef7dc Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 13:23:26 +0000 Subject: [PATCH 30/50] bug in state comparison. If 2 states are actually the same object, return true, not false --- .../FF/LexicalReordering/BidirectionalReorderingState.cpp | 2 +- moses/FF/LexicalReordering/BidirectionalReorderingState.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp b/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp index bc27f2a68..8c1b409c3 100644 --- a/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp +++ b/contrib/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp @@ -54,7 +54,7 @@ size_t BidirectionalReorderingState::hash() const bool BidirectionalReorderingState::operator==(const FFState& o) const { - if (&o == this) return 0; + if (&o == this) return true; BidirectionalReorderingState const &other = static_cast(o); diff --git a/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp b/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp index 22f550ba8..5d264e4c8 100644 --- a/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp +++ b/moses/FF/LexicalReordering/BidirectionalReorderingState.cpp @@ -15,7 +15,7 @@ size_t BidirectionalReorderingState::hash() const bool BidirectionalReorderingState::operator==(const FFState& o) const { - if (&o == this) return 0; + if (&o == this) return true; BidirectionalReorderingState const &other = static_cast(o); From 40a2588fd00a68cd7cfead5159bf4a46009ee85b Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 13:29:43 +0000 Subject: [PATCH 31/50] erase object from set --- contrib/moses2/HypothesisColl.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index b7037992f..fb35c00e9 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -282,10 +282,8 @@ void HypothesisColl::Delete(const HypothesisBase *hypo) { //cerr << "hypo=" << hypo << " " << m_coll.size() << endl; - _HCType::const_iterator iter = m_coll.find(hypo); - UTIL_THROW_IF2(iter == m_coll.end(), "Can't find hypo"); - - m_coll.erase(iter); + size_t erased = m_coll.erase(hypo); + UTIL_THROW_IF2(erased != 1, "couldn't erase hypo " << hypo); } void HypothesisColl::Clear() From a6d226c6b64805893b8c6303cbd2cf7c098e1f28 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 14:00:04 +0000 Subject: [PATCH 32/50] update worse score during pruning --- contrib/moses2/HypothesisColl.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index fb35c00e9..3427ebc47 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -250,10 +250,19 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) std::partial_sort(sortedHypos.begin(), iterMiddle, sortedHypos.end(), HypothesisFutureScoreOrderer()); + // update worse score + m_worseScore = std::numeric_limits::infinity(); + for (size_t i = 0; i < maxStackSize; ++i) { + HypothesisBase *hypo = const_cast(sortedHypos[i]); + if (hypo->GetFutureScore() < m_worseScore) { + m_worseScore = hypo->GetFutureScore(); + } + } + // prune if (maxStackSize && sortedHypos.size() > maxStackSize) { for (size_t i = maxStackSize; i < sortedHypos.size(); ++i) { - HypothesisBase *hypo = const_cast((sortedHypos)[i]); + HypothesisBase *hypo = const_cast(sortedHypos[i]); // delete from arclist if (mgr.system.options.nbest.nbest_size) { From ac6f234592675aaffe61205a0d966011c9e469f7 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 09:31:29 -0500 Subject: [PATCH 33/50] update worse score. Best hypos are already sorted using partial sort, don't need to go thru each of them --- contrib/moses2/HypothesisColl.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 3427ebc47..bb575382b 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -251,13 +251,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) HypothesisFutureScoreOrderer()); // update worse score - m_worseScore = std::numeric_limits::infinity(); - for (size_t i = 0; i < maxStackSize; ++i) { - HypothesisBase *hypo = const_cast(sortedHypos[i]); - if (hypo->GetFutureScore() < m_worseScore) { - m_worseScore = hypo->GetFutureScore(); - } - } + m_worseScore = sortedHypos[maxStackSize]->GetFutureScore(); // prune if (maxStackSize && sortedHypos.size() > maxStackSize) { From 8f012ba9c98c6787c67891334aa7812eb52c36d0 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 09:39:59 -0500 Subject: [PATCH 34/50] tweak --- contrib/moses2/HypothesisColl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index bb575382b..841f45abf 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -251,7 +251,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) HypothesisFutureScoreOrderer()); // update worse score - m_worseScore = sortedHypos[maxStackSize]->GetFutureScore(); + m_worseScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); // prune if (maxStackSize && sortedHypos.size() > maxStackSize) { From 30aa185cf24eabe5bbc52fc01f00bcc9af26decb Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 16:02:10 +0000 Subject: [PATCH 35/50] beam pruning --- contrib/moses2/HypothesisColl.cpp | 30 ++++++++++++++++++------------ contrib/moses2/HypothesisColl.h | 5 ++--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 841f45abf..487b9c36b 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -22,9 +22,8 @@ HypothesisColl::HypothesisColl(const ManagerBase &mgr) :m_coll(MemPoolAllocator(mgr.GetPool())) ,m_sortedHypos(NULL) { - //m_bestScore = -std::numeric_limits::infinity(); - //m_minBeamScore = -std::numeric_limits::infinity(); - m_worseScore = std::numeric_limits::infinity(); + m_bestScore = -std::numeric_limits::infinity(); + m_worstScore = std::numeric_limits::infinity(); } const HypothesisBase *HypothesisColl::GetBestHypo() const @@ -69,9 +68,9 @@ void HypothesisColl::Add( << GetSize() << " " << endl; */ - if (GetSize() >= maxStackSize && futureScore < m_worseScore) { + if (GetSize() >= maxStackSize && futureScore < m_worstScore) { // beam threshold or really bad hypo that won't make the pruning cut - // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point + // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point //cerr << "Discard, really bad score:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); return; @@ -79,7 +78,7 @@ void HypothesisColl::Add( /* if (futureScore < m_minBeamScore) { // beam threshold or really bad hypo that won't make the pruning cut - // as more hypos are added, the m_worseScore stat gets out of date and isn't the optimum cut-off point + // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point //cerr << "Discard, below beam:" << hypo->Debug(system) << endl; hypoRecycle.Recycle(hypo); return; @@ -113,8 +112,16 @@ void HypothesisColl::Add( hypoRecycle.Recycle(added.other); } - if (GetSize() <= maxStackSize && hypo->GetFutureScore() < m_worseScore) { - m_worseScore = futureScore; + // update beam variables + if (futureScore > m_bestScore) { + m_bestScore = futureScore; + float beamWidth = mgr.system.options.search.beam_width; + if ( m_bestScore + beamWidth > m_worstScore ) { + m_worstScore = m_bestScore + beamWidth; + } + } + else if (GetSize() <= maxStackSize && hypo->GetFutureScore() < m_worstScore) { + m_worstScore = futureScore; } } } @@ -251,7 +258,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) HypothesisFutureScoreOrderer()); // update worse score - m_worseScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); + m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); // prune if (maxStackSize && sortedHypos.size() > maxStackSize) { @@ -294,9 +301,8 @@ void HypothesisColl::Clear() m_sortedHypos = NULL; m_coll.clear(); - //m_bestScore = -std::numeric_limits::infinity(); - //m_minBeamScore = -std::numeric_limits::infinity(); - m_worseScore = std::numeric_limits::infinity(); + m_bestScore = -std::numeric_limits::infinity(); + m_worstScore = std::numeric_limits::infinity(); } std::string HypothesisColl::Debug(const System &system) const diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 4e41509de..b65fd8855 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -62,9 +62,8 @@ protected: _HCType m_coll; mutable Hypotheses *m_sortedHypos; - //SCORE m_bestScore; - SCORE m_worseScore; - //SCORE m_minBeamScore; + SCORE m_bestScore; + SCORE m_worstScore; StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; From 7e922f7a5da186a22c7754b1d201e18ab94baf61 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 16:08:16 +0000 Subject: [PATCH 36/50] beam pruning even with nbest --- contrib/moses2/HypothesisColl.cpp | 56 ++++++++++--------------------- 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 487b9c36b..d6bfd4a73 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -75,27 +75,6 @@ void HypothesisColl::Add( hypoRecycle.Recycle(hypo); return; } - /* - if (futureScore < m_minBeamScore) { - // beam threshold or really bad hypo that won't make the pruning cut - // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point - //cerr << "Discard, below beam:" << hypo->Debug(system) << endl; - hypoRecycle.Recycle(hypo); - return; - } - - if (futureScore > m_bestScore) { - m_bestScore = hypo->GetFutureScore(); - - // this may also affect the worst score - SCORE beamWidth = system.options.search.beam_width; - //cerr << "beamWidth=" << beamWidth << endl; - if ( m_bestScore + beamWidth > m_minBeamScore ) { - m_minBeamScore = m_bestScore + beamWidth; - } - } - //cerr << "OK:" << hypo->Debug(system) << endl; - */ StackAdd added = Add(hypo); @@ -104,28 +83,29 @@ void HypothesisColl::Add( arcLists.AddArc(added.added, hypo, added.other); } else { - if (!added.added) { - hypoRecycle.Recycle(hypo); + if (added.added) { + if (added.other) { + hypoRecycle.Recycle(added.other); + } } else { - if (added.other) { - hypoRecycle.Recycle(added.other); - } - - // update beam variables - if (futureScore > m_bestScore) { - m_bestScore = futureScore; - float beamWidth = mgr.system.options.search.beam_width; - if ( m_bestScore + beamWidth > m_worstScore ) { - m_worstScore = m_bestScore + beamWidth; - } - } - else if (GetSize() <= maxStackSize && hypo->GetFutureScore() < m_worstScore) { - m_worstScore = futureScore; - } + hypoRecycle.Recycle(hypo); } } + // update beam variables + if (added.added) { + if (futureScore > m_bestScore) { + m_bestScore = futureScore; + float beamWidth = mgr.system.options.search.beam_width; + if ( m_bestScore + beamWidth > m_worstScore ) { + m_worstScore = m_bestScore + beamWidth; + } + } + else if (GetSize() <= maxStackSize && futureScore < m_worstScore) { + m_worstScore = futureScore; + } + } } StackAdd HypothesisColl::Add(const HypothesisBase *hypo) From 7cdff3a148c4429f11507707880c4dbd7ae050bb Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 18:10:56 +0000 Subject: [PATCH 37/50] use arrays, not vector --- contrib/moses2/HypothesisColl.cpp | 41 ++++++++++++++++--------------- contrib/moses2/HypothesisColl.h | 2 +- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index d6bfd4a73..48f5ef8d2 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -56,7 +56,8 @@ void HypothesisColl::Add( if (GetSize() > maxStackSize * 2) { //cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl; - PruneHypos(mgr, mgr.arcLists); + const HypothesisBase *sortedHypos[GetSize()]; + PruneHypos(mgr, mgr.arcLists, sortedHypos); } SCORE futureScore = hypo->GetFutureScore(); @@ -210,9 +211,13 @@ void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr, */ } -void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) +void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) { size_t maxStackSize = mgr.system.options.search.stack_size; + assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that + assert(GetSize() > maxStackSize); + //assert(sortedHypos.size() == GetSize()); + Recycler &recycler = mgr.GetHypoRecycle(); /* @@ -222,40 +227,36 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) } cerr << endl; */ - vector sortedHypos(GetSize()); size_t ind = 0; BOOST_FOREACH(const HypothesisBase *hypo, m_coll){ sortedHypos[ind] = hypo; ++ind; } - vector::iterator iterMiddle; - iterMiddle = - (maxStackSize == 0 || sortedHypos.size() < maxStackSize) ? - sortedHypos.end() : sortedHypos.begin() + maxStackSize; + const HypothesisBase **iterMiddle = sortedHypos + maxStackSize; - std::partial_sort(sortedHypos.begin(), iterMiddle, sortedHypos.end(), + std::partial_sort( + sortedHypos, + iterMiddle, + sortedHypos + GetSize(), HypothesisFutureScoreOrderer()); // update worse score m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); // prune - if (maxStackSize && sortedHypos.size() > maxStackSize) { - for (size_t i = maxStackSize; i < sortedHypos.size(); ++i) { - HypothesisBase *hypo = const_cast(sortedHypos[i]); + for (size_t i = maxStackSize; i < GetSize(); ++i) { + HypothesisBase *hypo = const_cast(sortedHypos[i]); - // delete from arclist - if (mgr.system.options.nbest.nbest_size) { - arcLists.Delete(hypo); - } - - // delete from collection - Delete(hypo); - - recycler.Recycle(hypo); + // delete from arclist + if (mgr.system.options.nbest.nbest_size) { + arcLists.Delete(hypo); } + // delete from collection + Delete(hypo); + + recycler.Recycle(hypo); } /* diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index b65fd8855..ac9a32a2c 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -68,7 +68,7 @@ protected: StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; - void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists); + void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos); }; From 883f2e4f143447c86714adf8233e846b0e60a57e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 6 Dec 2016 18:36:44 +0000 Subject: [PATCH 38/50] get ready to merge similar sorting functions --- contrib/moses2/Array.h | 3 ++ contrib/moses2/HypothesisColl.cpp | 54 +++++++++++++++++------------ contrib/moses2/HypothesisColl.h | 3 +- contrib/moses2/legacy/Parameter.cpp | 2 +- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/contrib/moses2/Array.h b/contrib/moses2/Array.h index c8552f5e7..59b003135 100644 --- a/contrib/moses2/Array.h +++ b/contrib/moses2/Array.h @@ -56,6 +56,9 @@ public: return m_arr[ind]; } + T *GetArray() + { return m_arr; } + size_t hash() const { size_t seed = 0; diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 48f5ef8d2..185b8bdc8 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -56,8 +56,7 @@ void HypothesisColl::Add( if (GetSize() > maxStackSize * 2) { //cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl; - const HypothesisBase *sortedHypos[GetSize()]; - PruneHypos(mgr, mgr.arcLists, sortedHypos); + PruneHypos(mgr, mgr.arcLists); } SCORE futureScore = hypo->GetFutureScore(); @@ -211,15 +210,42 @@ void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr, */ } -void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) +void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) +{ + size_t maxStackSize = mgr.system.options.search.stack_size; + + Recycler &recycler = mgr.GetHypoRecycle(); + + const HypothesisBase *sortedHypos[GetSize()]; + PruneHypos(mgr, mgr.arcLists, sortedHypos); + + // update worse score + m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); + + // prune + for (size_t i = maxStackSize; i < GetSize(); ++i) { + HypothesisBase *hypo = const_cast(sortedHypos[i]); + + // delete from arclist + if (mgr.system.options.nbest.nbest_size) { + arcLists.Delete(hypo); + } + + // delete from collection + Delete(hypo); + + recycler.Recycle(hypo); + } + +} + +void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) const { size_t maxStackSize = mgr.system.options.search.stack_size; assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that assert(GetSize() > maxStackSize); //assert(sortedHypos.size() == GetSize()); - Recycler &recycler = mgr.GetHypoRecycle(); - /* cerr << "UNSORTED hypos: "; BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { @@ -241,24 +267,6 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, cons sortedHypos + GetSize(), HypothesisFutureScoreOrderer()); - // update worse score - m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); - - // prune - for (size_t i = maxStackSize; i < GetSize(); ++i) { - HypothesisBase *hypo = const_cast(sortedHypos[i]); - - // delete from arclist - if (mgr.system.options.nbest.nbest_size) { - arcLists.Delete(hypo); - } - - // delete from collection - Delete(hypo); - - recycler.Recycle(hypo); - } - /* cerr << "sorted hypos: "; for (size_t i = 0; i < sortedHypos.size(); ++i) { diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index ac9a32a2c..4b0008858 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -68,7 +68,8 @@ protected: StackAdd Add(const HypothesisBase *hypo); void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; - void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos); + void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists); + void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) const; }; diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp index 666eb0e98..553f82f3b 100644 --- a/contrib/moses2/legacy/Parameter.cpp +++ b/contrib/moses2/legacy/Parameter.cpp @@ -84,7 +84,7 @@ Parameter::Parameter() //AddParam(search_opts, "early-discarding-threshold", "edt", // "threshold for constructing hypotheses based on estimate cost"); AddParam(search_opts, "stack", "s", - "maximum stack size for histogram pruning. 0 = unlimited stack size"); + "maximum stack size for histogram pruning. CANNOT USE 0 = unlimited stack size"); //AddParam(search_opts, "stack-diversity", "sd", // "minimum number of hypothesis of each coverage in stack (default 0)"); From ab28a3fc8c092cfbdb98a2ae92ce708e938697c4 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 7 Dec 2016 00:16:38 +0000 Subject: [PATCH 39/50] can prune stack < max stack size --- contrib/moses2/HypothesisColl.cpp | 18 +++++++++++++++--- contrib/moses2/legacy/Parameter.cpp | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 185b8bdc8..327e3dcd6 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -242,8 +242,8 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) const { size_t maxStackSize = mgr.system.options.search.stack_size; - assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that - assert(GetSize() > maxStackSize); + //assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that + //assert(GetSize() > maxStackSize); //assert(sortedHypos.size() == GetSize()); /* @@ -259,7 +259,19 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, cons ++ind; } - const HypothesisBase **iterMiddle = sortedHypos + maxStackSize; + size_t indMiddle; + if (maxStackSize == 0) { + indMiddle = GetSize(); + } + else if (GetSize() > maxStackSize) { + indMiddle = maxStackSize; + } + else { + // GetSize() <= maxStackSize + indMiddle = GetSize(); + } + + const HypothesisBase **iterMiddle = sortedHypos + indMiddle; std::partial_sort( sortedHypos, diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp index 553f82f3b..666eb0e98 100644 --- a/contrib/moses2/legacy/Parameter.cpp +++ b/contrib/moses2/legacy/Parameter.cpp @@ -84,7 +84,7 @@ Parameter::Parameter() //AddParam(search_opts, "early-discarding-threshold", "edt", // "threshold for constructing hypotheses based on estimate cost"); AddParam(search_opts, "stack", "s", - "maximum stack size for histogram pruning. CANNOT USE 0 = unlimited stack size"); + "maximum stack size for histogram pruning. 0 = unlimited stack size"); //AddParam(search_opts, "stack-diversity", "sd", // "minimum number of hypothesis of each coverage in stack (default 0)"); From 8ef7db569a82604fc523aa19416e2dcabd0faa43 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 7 Dec 2016 00:38:11 +0000 Subject: [PATCH 40/50] merge similar sorting functions --- contrib/moses2/HypothesisColl.cpp | 73 +++++++++---------------------- contrib/moses2/HypothesisColl.h | 3 +- 2 files changed, 21 insertions(+), 55 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 327e3dcd6..8bad53888 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -147,13 +147,25 @@ const Hypotheses &HypothesisColl::GetSortedAndPruneHypos( m_sortedHypos = new (pool.Allocate()) Hypotheses(pool, m_coll.size()); - size_t ind = 0; - BOOST_FOREACH(const HypothesisBase *hypo, m_coll){ - (*m_sortedHypos)[ind] = hypo; - ++ind; - } + SortHypos(mgr, m_sortedHypos->GetArray()); + + // prune + Recycler &recycler = mgr.GetHypoRecycle(); + + size_t maxStackSize = mgr.system.options.search.stack_size; + if (maxStackSize && m_sortedHypos->size() > maxStackSize) { + for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) { + HypothesisBase *hypo = const_cast((*m_sortedHypos)[i]); + recycler.Recycle(hypo); + + // delete from arclist + if (mgr.system.options.nbest.nbest_size) { + arcLists.Delete(hypo); + } + } + m_sortedHypos->resize(maxStackSize); + } - SortAndPruneHypos(mgr, arcLists); } return *m_sortedHypos; @@ -165,51 +177,6 @@ const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos() const return *m_sortedHypos; } -void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr, - ArcLists &arcLists) const -{ - size_t stackSize = mgr.system.options.search.stack_size; - Recycler &recycler = mgr.GetHypoRecycle(); - - /* - cerr << "UNSORTED hypos: "; - BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { - cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " "; - } - cerr << endl; - */ - Hypotheses::iterator iterMiddle; - iterMiddle = - (stackSize == 0 || m_sortedHypos->size() < stackSize) ? - m_sortedHypos->end() : m_sortedHypos->begin() + stackSize; - - std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(), - HypothesisFutureScoreOrderer()); - - // prune - if (stackSize && m_sortedHypos->size() > stackSize) { - for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) { - HypothesisBase *hypo = const_cast((*m_sortedHypos)[i]); - recycler.Recycle(hypo); - - // delete from arclist - if (mgr.system.options.nbest.nbest_size) { - arcLists.Delete(hypo); - } - } - m_sortedHypos->resize(stackSize); - } - - /* - cerr << "sorted hypos: "; - for (size_t i = 0; i < m_sortedHypos->size(); ++i) { - const HypothesisBase *hypo = (*m_sortedHypos)[i]; - cerr << hypo << " "; - } - cerr << endl; - */ -} - void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) { size_t maxStackSize = mgr.system.options.search.stack_size; @@ -217,7 +184,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) Recycler &recycler = mgr.GetHypoRecycle(); const HypothesisBase *sortedHypos[GetSize()]; - PruneHypos(mgr, mgr.arcLists, sortedHypos); + SortHypos(mgr, sortedHypos); // update worse score m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); @@ -239,7 +206,7 @@ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) } -void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) const +void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const { size_t maxStackSize = mgr.system.options.search.stack_size; //assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 4b0008858..8b8cb8f85 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -66,10 +66,9 @@ protected: SCORE m_worstScore; StackAdd Add(const HypothesisBase *hypo); - void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const; void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists); - void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists, const HypothesisBase **sortedHypos) const; + void SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const; }; From bc6c1f41601159f8f7de7930b748aae22d085ffb Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 7 Dec 2016 10:09:38 +0000 Subject: [PATCH 41/50] cleanup --- contrib/moses2/HypothesisColl.cpp | 8 +------- contrib/moses2/HypothesisColl.h | 4 +--- .../moses2/PhraseBased/CubePruningMiniStack/Search.cpp | 4 ++-- contrib/moses2/PhraseBased/Normal/Search.cpp | 4 ++-- contrib/moses2/SCFG/nbest/KBestExtractor.cpp | 2 +- contrib/moses2/TranslationModel/PhraseTable.cpp | 2 +- 6 files changed, 8 insertions(+), 16 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 8bad53888..4566cc925 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -137,7 +137,7 @@ StackAdd HypothesisColl::Add(const HypothesisBase *hypo) //assert(false); } -const Hypotheses &HypothesisColl::GetSortedAndPruneHypos( +const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos( const ManagerBase &mgr, ArcLists &arcLists) const { @@ -171,12 +171,6 @@ const Hypotheses &HypothesisColl::GetSortedAndPruneHypos( return *m_sortedHypos; } -const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos() const -{ - UTIL_THROW_IF2(m_sortedHypos == NULL, "m_sortedHypos must be sorted beforehand"); - return *m_sortedHypos; -} - void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) { size_t maxStackSize = mgr.system.options.search.stack_size; diff --git a/contrib/moses2/HypothesisColl.h b/contrib/moses2/HypothesisColl.h index 8b8cb8f85..81a3b25c3 100644 --- a/contrib/moses2/HypothesisColl.h +++ b/contrib/moses2/HypothesisColl.h @@ -35,12 +35,10 @@ public: void Clear(); - const Hypotheses &GetSortedAndPruneHypos( + const Hypotheses &GetSortedAndPrunedHypos( const ManagerBase &mgr, ArcLists &arcLists) const; - const Hypotheses &GetSortedAndPrunedHypos() const; - const HypothesisBase *GetBestHypo() const; template diff --git a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp index 4ea61e0ba..94baafeb9 100644 --- a/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp +++ b/contrib/moses2/PhraseBased/CubePruningMiniStack/Search.cpp @@ -206,7 +206,7 @@ void Search::PostDecode(size_t stackInd) CubeEdges &edges = *m_cubeEdges[numWords]; // sort hypo for a particular bitmap and hypoEndPos - const Hypotheses &sortedHypos = hypos.GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists); size_t numPt = mgr.system.mappings.size(); for (size_t i = 0; i < numPt; ++i) { @@ -232,7 +232,7 @@ void Search::AddInitialTrellisPaths(TrellisPaths &paths) const const Stack::Coll &coll = m_stack.GetColl(); BOOST_FOREACH(const Stack::Coll::value_type &val, coll){ Moses2::HypothesisColl &hypos = *val.second; - const Hypotheses &sortedHypos = hypos.GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Hypotheses &sortedHypos = hypos.GetSortedAndPrunedHypos(mgr, mgr.arcLists); BOOST_FOREACH(const HypothesisBase *hypoBase, sortedHypos) { const Hypothesis *hypo = static_cast(hypoBase); diff --git a/contrib/moses2/PhraseBased/Normal/Search.cpp b/contrib/moses2/PhraseBased/Normal/Search.cpp index 5c89eecc6..f7e26f907 100644 --- a/contrib/moses2/PhraseBased/Normal/Search.cpp +++ b/contrib/moses2/PhraseBased/Normal/Search.cpp @@ -73,7 +73,7 @@ void Search::Decode(size_t stackInd) return; } - const Hypotheses &hypos = stack.GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Hypotheses &hypos = stack.GetSortedAndPrunedHypos(mgr, mgr.arcLists); //cerr << "hypos=" << hypos.size() << endl; const InputPaths &paths = mgr.GetInputPaths(); @@ -147,7 +147,7 @@ const Hypothesis *Search::GetBestHypo() const void Search::AddInitialTrellisPaths(TrellisPaths &paths) const { const Stack &lastStack = m_stacks.Back(); - const Hypotheses &hypos = lastStack.GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Hypotheses &hypos = lastStack.GetSortedAndPrunedHypos(mgr, mgr.arcLists); BOOST_FOREACH(const HypothesisBase *hypoBase, hypos){ const Hypothesis *hypo = static_cast(hypoBase); diff --git a/contrib/moses2/SCFG/nbest/KBestExtractor.cpp b/contrib/moses2/SCFG/nbest/KBestExtractor.cpp index 87b9c86d5..ae7ec8634 100644 --- a/contrib/moses2/SCFG/nbest/KBestExtractor.cpp +++ b/contrib/moses2/SCFG/nbest/KBestExtractor.cpp @@ -42,7 +42,7 @@ void KBestExtractor::OutputToStream(std::stringstream &strm) UTIL_THROW_IF2(lastStack.GetColl().size() != 1, "Only suppose to be 1 hypo coll in last stack"); UTIL_THROW_IF2(lastStack.GetColl().begin()->second == NULL, "NULL hypo collection"); - const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos(); + const Hypotheses &hypos = lastStack.GetColl().begin()->second->GetSortedAndPrunedHypos(m_mgr, m_mgr.arcLists); UTIL_THROW_IF2(hypos.size() != 1, "Only suppose to be 1 hypo in collection"); const HypothesisBase *hypo = hypos[0]; diff --git a/contrib/moses2/TranslationModel/PhraseTable.cpp b/contrib/moses2/TranslationModel/PhraseTable.cpp index 6c2d6eaf2..ccc871b08 100644 --- a/contrib/moses2/TranslationModel/PhraseTable.cpp +++ b/contrib/moses2/TranslationModel/PhraseTable.cpp @@ -154,7 +154,7 @@ void PhraseTable::LookupNT( BOOST_FOREACH (const SCFG::Stack::Coll::value_type &valPair, stackColl) { const SCFG::Word &ntSought = valPair.first; const Moses2::HypothesisColl *hypos = valPair.second; - const Moses2::Hypotheses &sortedHypos = hypos->GetSortedAndPruneHypos(mgr, mgr.arcLists); + const Moses2::Hypotheses &sortedHypos = hypos->GetSortedAndPrunedHypos(mgr, mgr.arcLists); //cerr << "ntSought=" << ntSought << ntSought.isNonTerminal << endl; LookupGivenWord(pool, mgr, prevPath, ntSought, &sortedHypos, subPhraseRange, outPath); } From dd1532637b1b28dfdcfd115979ae9b513bc8f729 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Dec 2016 18:09:52 +0000 Subject: [PATCH 42/50] delete debugging message --- contrib/moses2/TranslationModel/PhraseTable.cpp | 2 +- contrib/other-builds/moses/.cproject | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/moses2/TranslationModel/PhraseTable.cpp b/contrib/moses2/TranslationModel/PhraseTable.cpp index ccc871b08..c790147bb 100644 --- a/contrib/moses2/TranslationModel/PhraseTable.cpp +++ b/contrib/moses2/TranslationModel/PhraseTable.cpp @@ -87,8 +87,8 @@ void PhraseTable::Lookup(const Manager &mgr, InputPathsBase &inputPaths) const if (SatisfyBackoff(mgr, *path)) { TargetPhrases *tpsPtr = Lookup(mgr, mgr.GetPool(), *path); - cerr << "tpsPtr=" << tpsPtr << " "; /* + cerr << "tpsPtr=" << tpsPtr << " "; if (tps.get()) { cerr << tps.get()->GetSize(); } diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 81da1d22b..e1a30c7af 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -11,7 +11,7 @@ - + @@ -86,7 +86,7 @@ - + From 304a6652f77a43f127206e58a95f7e517d32a0d4 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Dec 2016 18:16:30 +0000 Subject: [PATCH 43/50] debugging message --- contrib/moses2/PhraseBased/Normal/Search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/moses2/PhraseBased/Normal/Search.cpp b/contrib/moses2/PhraseBased/Normal/Search.cpp index f7e26f907..7c5026a7c 100644 --- a/contrib/moses2/PhraseBased/Normal/Search.cpp +++ b/contrib/moses2/PhraseBased/Normal/Search.cpp @@ -60,7 +60,7 @@ void Search::Decode() if (stackInd < m_stacks.GetSize() - 1) { m_stacks.Delete(stackInd); } - //cerr << m_stacks << endl; + //cerr << m_stacks.Debug(mgr.system) << endl; } } From 25b87e14debb2bd711b3ec45a85c815518bb570b Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 16 Dec 2016 15:58:50 +0000 Subject: [PATCH 44/50] unused method --- contrib/moses2/Weights.cpp | 10 ---------- contrib/moses2/Weights.h | 2 -- 2 files changed, 12 deletions(-) diff --git a/contrib/moses2/Weights.cpp b/contrib/moses2/Weights.cpp index d0d923e93..643847eee 100644 --- a/contrib/moses2/Weights.cpp +++ b/contrib/moses2/Weights.cpp @@ -36,16 +36,6 @@ void Weights::Init(const FeatureFunctions &ffs) m_weights.resize(totalNumScores, 1); } -std::ostream &Weights::Debug(std::ostream &out, const System &system) const -{ - const FeatureFunctions &ffs = system.featureFunctions; - size_t numScores = ffs.GetNumScores(); - for (size_t i = 0; i < numScores; ++i) { - out << m_weights[i] << " "; - } - -} - std::vector Weights::GetWeights(const FeatureFunction &ff) const { std::vector ret(m_weights.begin() + ff.GetStartInd(), m_weights.begin() + ff.GetStartInd() + ff.GetNumScores()); diff --git a/contrib/moses2/Weights.h b/contrib/moses2/Weights.h index d822ff923..c3c2cee62 100644 --- a/contrib/moses2/Weights.h +++ b/contrib/moses2/Weights.h @@ -27,8 +27,6 @@ public: return m_weights[ind]; } - std::ostream &Debug(std::ostream &out, const System &system) const; - std::vector GetWeights(const FeatureFunction &ff) const; void SetWeights(const FeatureFunctions &ffs, const std::string &ffName, const std::vector &weights); From e72bc47910ae58ab98a8104ca1161a020001dd19 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 21 Dec 2016 18:04:22 +0000 Subject: [PATCH 45/50] eclipse --- contrib/other-builds/moses/.cproject | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 81da1d22b..491caa587 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -11,7 +11,7 @@ - + @@ -26,15 +26,15 @@ - - - - @@ -86,7 +86,7 @@ - + From c6c3bc84b7673618f379482cbc6b708f55a9ecd3 Mon Sep 17 00:00:00 2001 From: alvations Date: Fri, 23 Dec 2016 14:21:20 +0800 Subject: [PATCH 46/50] Changed \p{Hyphen} to \p{LineBreak} Using Perl v5.18.2, it's reporting this warning: **Use of 'Hyphen' in \p{} or \P{} is deprecated because: Supplanted by Line_Break property values; see www.unicode.org/reports/tr14** --- scripts/generic/mteval-v13a.pl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/generic/mteval-v13a.pl b/scripts/generic/mteval-v13a.pl index bdc2d9479..2e5d29ad5 100755 --- a/scripts/generic/mteval-v13a.pl +++ b/scripts/generic/mteval-v13a.pl @@ -19,6 +19,8 @@ binmode STDERR, ":utf8"; # version 13a # * modified the scoring functions to prevent division-by-zero errors when a system segment is empty # * affected methods: 'bleu_score' and 'bleu_score_smoothing' +# * use \p{Line_Breaks} instead of \p{Hyphen} when stripping end-of-line hyphenation and join lines +# * because \p{Hyphen} is deprecated since 2016-06-01, see http://www.unicode.org/reports/tr14/#Hyphen # # version 13 # * Uses a XML parser to read data (only when extension is .xml) @@ -948,7 +950,7 @@ sub tokenization_international my ($norm_text) = @_; $norm_text =~ s///g; # strip "skipped" tags - $norm_text =~ s/\p{Hyphen}\p{Zl}//g; # strip end-of-line hyphenation and join lines + $norm_text =~ s/\p{Line_Break}\p{Zl}//g; # strip end-of-line hyphenation and join lines $norm_text =~ s/\p{Zl}/ /g; # join lines # replace entities From c6eec7335faf2cf6b1231a9622fa99ec747c3244 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 27 Dec 2016 13:55:03 +0000 Subject: [PATCH 47/50] distortion-limit=-1 is unlimited distortion --- contrib/moses2/FF/Distortion.cpp | 2 ++ contrib/moses2/HypothesisColl.cpp | 10 ++++++++-- contrib/moses2/LM/LanguageModel.cpp | 2 ++ contrib/moses2/PhraseBased/Search.cpp | 4 ++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/contrib/moses2/FF/Distortion.cpp b/contrib/moses2/FF/Distortion.cpp index 4220b731f..343e1d21f 100644 --- a/contrib/moses2/FF/Distortion.cpp +++ b/contrib/moses2/FF/Distortion.cpp @@ -118,6 +118,8 @@ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr, DistortionState_traditional &stateCast = static_cast(state); stateCast.Set(hypo.GetInputPath().range, hypo.GetBitmap().GetFirstGapPos()); + + //cerr << "hypo=" << hypo.Debug(mgr.system) << endl; } SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr, diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index 4566cc925..def18a16a 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -60,18 +60,18 @@ void HypothesisColl::Add( } SCORE futureScore = hypo->GetFutureScore(); + /* cerr << "scores:" << futureScore << " " << m_bestScore << " " - << m_minBeamScore << " " << GetSize() << " " << endl; */ if (GetSize() >= maxStackSize && futureScore < m_worstScore) { // beam threshold or really bad hypo that won't make the pruning cut // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point - //cerr << "Discard, really bad score:" << hypo->Debug(system) << endl; + cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl; hypoRecycle.Recycle(hypo); return; } @@ -111,14 +111,18 @@ void HypothesisColl::Add( StackAdd HypothesisColl::Add(const HypothesisBase *hypo) { std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo); + //cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl; // CHECK RECOMBINATION if (addRet.second) { // equiv hypo doesn't exists + //cerr << "Added " << hypo << endl; return StackAdd(true, NULL); } else { HypothesisBase *hypoExisting = const_cast(*addRet.first); + //cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl; + if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) { // incoming hypo is better than the one we have const HypothesisBase * const &hypoExisting1 = *addRet.first; @@ -126,10 +130,12 @@ StackAdd HypothesisColl::Add(const HypothesisBase *hypo) const_cast(hypoExisting1); hypoExisting2 = hypo; + //cerr << "Added " << hypo << " dicard existing " << hypoExisting2 << endl; return StackAdd(true, hypoExisting); } else { // already storing the best hypo. discard incoming hypo + //cerr << "Keep existing " << hypoExisting << " dicard new " << hypo << endl; return StackAdd(false, hypoExisting); } } diff --git a/contrib/moses2/LM/LanguageModel.cpp b/contrib/moses2/LM/LanguageModel.cpp index 8a6fe3b39..d4eeb7b1e 100644 --- a/contrib/moses2/LM/LanguageModel.cpp +++ b/contrib/moses2/LM/LanguageModel.cpp @@ -146,6 +146,7 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, SCORE &estimatedScore) const { + cerr << "start LanguageModel::EvaluateInIsolation" << endl; if (targetPhrase.GetSize() == 0) { return; } @@ -174,6 +175,7 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, SCORE weightedScore = Scores::CalcWeightedScore(system, *this, nonFullScore); estimatedScore += weightedScore; + cerr << "end LanguageModel::EvaluateInIsolation" << endl; } void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, diff --git a/contrib/moses2/PhraseBased/Search.cpp b/contrib/moses2/PhraseBased/Search.cpp index 6c35aa151..1a85e15f5 100644 --- a/contrib/moses2/PhraseBased/Search.cpp +++ b/contrib/moses2/PhraseBased/Search.cpp @@ -39,6 +39,10 @@ bool Search::CanExtend(const Bitmap &hypoBitmap, size_t hypoRangeEndPos, return false; } + if (mgr.system.options.reordering.max_distortion == -1) { + return true; + } + if (mgr.system.options.reordering.max_distortion >= 0) { // distortion limit int distortion = ComputeDistortionDistance(hypoRangeEndPos, From e14a71c3ea7483a2f7d96007b9bb88ea576226af Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 28 Dec 2016 10:57:19 +0000 Subject: [PATCH 48/50] debugging messages --- contrib/moses2/HypothesisColl.cpp | 2 +- contrib/moses2/LM/LanguageModel.cpp | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/contrib/moses2/HypothesisColl.cpp b/contrib/moses2/HypothesisColl.cpp index def18a16a..a75113d58 100644 --- a/contrib/moses2/HypothesisColl.cpp +++ b/contrib/moses2/HypothesisColl.cpp @@ -71,7 +71,7 @@ void HypothesisColl::Add( if (GetSize() >= maxStackSize && futureScore < m_worstScore) { // beam threshold or really bad hypo that won't make the pruning cut // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point - cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl; + //cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl; hypoRecycle.Recycle(hypo); return; } diff --git a/contrib/moses2/LM/LanguageModel.cpp b/contrib/moses2/LM/LanguageModel.cpp index d4eeb7b1e..3e0c39d20 100644 --- a/contrib/moses2/LM/LanguageModel.cpp +++ b/contrib/moses2/LM/LanguageModel.cpp @@ -146,7 +146,6 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, const TargetPhraseImpl &targetPhrase, Scores &scores, SCORE &estimatedScore) const { - cerr << "start LanguageModel::EvaluateInIsolation" << endl; if (targetPhrase.GetSize() == 0) { return; } @@ -174,8 +173,6 @@ void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, scores.PlusEquals(system, *this, score); SCORE weightedScore = Scores::CalcWeightedScore(system, *this, nonFullScore); estimatedScore += weightedScore; - - cerr << "end LanguageModel::EvaluateInIsolation" << endl; } void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase &source, From c30b28f43b902e48e399ab5cf6c60f6f62c1fb50 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 29 Dec 2016 12:03:59 +0000 Subject: [PATCH 49/50] Edge case of wall at the end of sentence /Mike Ladwig --- contrib/moses2/PhraseBased/ReorderingConstraint.cpp | 1 + contrib/moses2/PhraseBased/Sentence.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/moses2/PhraseBased/ReorderingConstraint.cpp b/contrib/moses2/PhraseBased/ReorderingConstraint.cpp index d2211d817..cff09cc24 100644 --- a/contrib/moses2/PhraseBased/ReorderingConstraint.cpp +++ b/contrib/moses2/PhraseBased/ReorderingConstraint.cpp @@ -60,6 +60,7 @@ void ReorderingConstraint::FinalizeWalls() void ReorderingConstraint::SetWall( size_t pos, bool value ) { //cerr << "SETTING reordering wall at position " << pos << std::endl; + UTIL_THROW_IF2(pos >= m_size, "Wall over length of sentence: " << pos << " >= " << m_size); m_wall[pos] = value; m_active = true; } diff --git a/contrib/moses2/PhraseBased/Sentence.cpp b/contrib/moses2/PhraseBased/Sentence.cpp index d0c728530..dbedf878e 100644 --- a/contrib/moses2/PhraseBased/Sentence.cpp +++ b/contrib/moses2/PhraseBased/Sentence.cpp @@ -84,7 +84,7 @@ Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab, for(size_t i=0; iGetNodeName(), "wall") == 0) { - UTIL_THROW_IF2(xmlOption->startPos >= ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please + UTIL_THROW_IF2(xmlOption->startPos > ret->GetSize(), "wall is beyond the sentence"); // no buggy walls, please reorderingConstraint.SetWall(xmlOption->startPos - 1, true); } else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) { From 347980fd426dcbc2cc2126c187f22473f819a3d4 Mon Sep 17 00:00:00 2001 From: Doried Abd-Allah Date: Thu, 29 Dec 2016 15:16:07 +0200 Subject: [PATCH 50/50] fixing errors while building with oxlm: renamed iptr to boost_iptr because of a conflict with eigen library required by oxlm, added boost_serialization to oxlm requirements in jamroot --- Jamroot | 1 - moses/TranslationModel/UG/mm/ug_typedefs.h | 2 +- moses/TranslationModel/UG/test-ranked-phrase-lookup.cc | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Jamroot b/Jamroot index 7a7be5c93..1c4d68abd 100644 --- a/Jamroot +++ b/Jamroot @@ -183,7 +183,6 @@ requirements += [ option.get "with-mm" : : MAX_NUM_FACTORS=4 ] ; requirements += [ option.get "unlabelled-source" : : UNLABELLED_SOURCE ] ; if [ option.get "with-oxlm" ] { - external-lib boost_serialization ; external-lib gomp ; requirements += boost_serialization ; requirements += gomp ; diff --git a/moses/TranslationModel/UG/mm/ug_typedefs.h b/moses/TranslationModel/UG/mm/ug_typedefs.h index fc9d7faef..4815846a2 100644 --- a/moses/TranslationModel/UG/mm/ug_typedefs.h +++ b/moses/TranslationModel/UG/mm/ug_typedefs.h @@ -36,7 +36,7 @@ namespace sapt #ifndef SPTR #define SPTR boost::shared_ptr #endif -#define iptr boost::intrusive_ptr +#define boost_iptr boost::intrusive_ptr #define scoptr boost::scoped_ptr #define rcast reinterpret_cast #endif diff --git a/moses/TranslationModel/UG/test-ranked-phrase-lookup.cc b/moses/TranslationModel/UG/test-ranked-phrase-lookup.cc index 613e46360..ff9f1c722 100644 --- a/moses/TranslationModel/UG/test-ranked-phrase-lookup.cc +++ b/moses/TranslationModel/UG/test-ranked-phrase-lookup.cc @@ -76,7 +76,7 @@ int main(int argc, char* argv[]) { typedef vector > pplist_t; interpret_args(argc, argv); - iptr Bptr(new mmbitext); + boost_iptr Bptr(new mmbitext); mmbitext& B = *Bptr;// static_cast(Bptr.get()); B.open(bname, L1, L2); B.V1->setDynamic(true);