From 2ac93eca5c1e1e762696691def389610cd607cd8 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Sun, 15 Jun 2014 21:11:33 +0100 Subject: [PATCH 01/19] PScoreBwd can now scales backward marginals according as specified by user. --- moses/TranslationModel/UG/mm/custom-pt.cc | 2 +- moses/TranslationModel/UG/mmsapt.cpp | 38 ++++++++++++++----- moses/TranslationModel/UG/mmsapt.h | 1 + .../UG/mmsapt_phrase_scorers.h | 13 +++++-- 4 files changed, 41 insertions(+), 13 deletions(-) diff --git a/moses/TranslationModel/UG/mm/custom-pt.cc b/moses/TranslationModel/UG/mm/custom-pt.cc index 086ef42a6..1c1e0893c 100644 --- a/moses/TranslationModel/UG/mm/custom-pt.cc +++ b/moses/TranslationModel/UG/mm/custom-pt.cc @@ -131,7 +131,7 @@ int main(int argc, char* argv[]) size_t i; i = calc_pfwd.init(0,.05,'g'); - i = calc_pbwd.init(i,.05); + i = calc_pbwd.init(i,.05,'g'); i = calc_lex.init(i,base+L1+"-"+L2+".lex"); i = apply_wp.init(i); diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp index 730a9dc42..65a7a06ad 100644 --- a/moses/TranslationModel/UG/mmsapt.cpp +++ b/moses/TranslationModel/UG/mmsapt.cpp @@ -60,7 +60,10 @@ namespace Moses , m_lex_alpha(1.0) , withLogCountFeatures(false) , withCoherence(true) - , m_pfwd_features("g"), withPbwd(true), poolCounts(true) + , m_pfwd_features("g") + , m_pbwd_features("g") + , withPbwd(true) + , poolCounts(true) , ofactor(1,0) , m_tpc_ctr(0) { @@ -125,13 +128,16 @@ namespace Moses if ((m = param.find("pfwd")) != param.end()) m_pfwd_features = (m->second == "0" ? "" : m->second); - - if (m_pfwd_features == "1") + + if (m_pfwd_features == "1") // legacy; deprecated m_pfwd_features[0] = m_pfwd_denom; if ((m = param.find("pbwd")) != param.end()) - withPbwd = m->second != "0"; - + m_pbwd_features = (m->second == "0" ? "" : m->second); + + if (m_pbwd_features == "1") + m_pbwd_features = "r"; // lecagy; deprecated + if ((m = param.find("lexalpha")) != param.end()) m_lex_alpha = atof(m->second.c_str()); @@ -208,14 +214,28 @@ namespace Moses ffvec.push_back(ff); } - if (withPbwd) - { + for (size_t i = 0; i < m_pbwd_features.size(); ++i) + { + UTIL_THROW_IF2(m_pbwd_features[i] != 'g' && + m_pbwd_features[i] != 'r' && + m_pbwd_features[i] != 's', + "Can't handle pbwd feature type '" + << m_pbwd_features[i] << "'."); sptr > ff(new PScorePbwd()); size_t k = num_feats; - num_feats = ff->init(num_feats,lbop); - for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k)); + num_feats = ff->init(num_feats,lbop,m_pbwd_features[i]); + for (;k < num_feats; ++k) m_feature_names.push_back(ff->fname(k)); ffvec.push_back(ff); } + + // if (withPbwd) + // { + // sptr > ff(new PScorePbwd()); + // size_t k = num_feats; + // num_feats = ff->init(num_feats,lbop); + // for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k)); + // ffvec.push_back(ff); + // } if (withLogCountFeatures) { diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h index d2c5d251b..b6be36131 100644 --- a/moses/TranslationModel/UG/mmsapt.h +++ b/moses/TranslationModel/UG/mmsapt.h @@ -79,6 +79,7 @@ namespace Moses bool withLogCountFeatures; // add logs of counts as features? bool withCoherence; string m_pfwd_features; // which pfwd functions to use + string m_pbwd_features; // which pbwd functions to use vector m_feature_names; // names of features activated vector > m_active_ff_fix; // activated feature functions (fix) vector > m_active_ff_dyn; // activated feature functions (dyn) diff --git a/moses/TranslationModel/UG/mmsapt_phrase_scorers.h b/moses/TranslationModel/UG/mmsapt_phrase_scorers.h index 36c134da2..6e852b44b 100644 --- a/moses/TranslationModel/UG/mmsapt_phrase_scorers.h +++ b/moses/TranslationModel/UG/mmsapt_phrase_scorers.h @@ -101,6 +101,7 @@ namespace Moses { PScorePbwd : public PhraseScorer { float conf; + char denom; public: PScorePbwd() { @@ -108,12 +109,13 @@ namespace Moses { } int - init(int const i, float const c) + init(int const i, float const c, char d) { conf = c; + denom = d; this->m_index = i; ostringstream buf; - buf << format("pbwd%.3f") % c; + buf << format("pbwd-%c%.3f") % denom % c; this->m_feature_names.push_back(buf.str()); return i + this->m_num_feats; } @@ -123,7 +125,12 @@ namespace Moses { vector * dest = NULL) const { if (!dest) dest = &pp.fvals; - (*dest)[this->m_index] = log(lbop(max(pp.raw2,pp.joint),pp.joint,conf)); + // we use the denominator specification to scale the raw counts on the + // target side; the clean way would be to counter-sample + uint32_t r2 = pp.raw2; + if (denom == 'g') r2 = round(r2 * float(pp.good1) / pp.raw1); + else if (denom == 's') r2 = round(r2 * float(pp.sample1) / pp.raw1); + (*dest)[this->m_index] = log(lbop(max(r2, pp.joint),pp.joint,conf)); } }; From 15630258d038fc8a2b115179b3e81391bb03acfb Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 25 Jun 2014 00:00:46 -0400 Subject: [PATCH 02/19] redo eclipse projects --- contrib/other-builds/extract-ghkm/.cproject | 137 ------------ contrib/other-builds/extract-ghkm/.project | 209 ------------------ .../other-builds/extract-ordering/.cproject | 134 ----------- .../other-builds/extract-ordering/.project | 74 ------- contrib/other-builds/extract-rules/.cproject | 136 ------------ contrib/other-builds/extract-rules/.gitignore | 1 - contrib/other-builds/extract-rules/.project | 134 ----------- contrib/other-builds/extract/.cproject | 134 ----------- contrib/other-builds/extract/.project | 74 ------- 9 files changed, 1033 deletions(-) delete mode 100644 contrib/other-builds/extract-ghkm/.cproject delete mode 100644 contrib/other-builds/extract-ghkm/.project delete mode 100644 contrib/other-builds/extract-ordering/.cproject delete mode 100644 contrib/other-builds/extract-ordering/.project delete mode 100644 contrib/other-builds/extract-rules/.cproject delete mode 100644 contrib/other-builds/extract-rules/.gitignore delete mode 100644 contrib/other-builds/extract-rules/.project delete mode 100644 contrib/other-builds/extract/.cproject delete mode 100644 contrib/other-builds/extract/.project diff --git a/contrib/other-builds/extract-ghkm/.cproject b/contrib/other-builds/extract-ghkm/.cproject deleted file mode 100644 index 61ea19161..000000000 --- a/contrib/other-builds/extract-ghkm/.cproject +++ /dev/null @@ -1,137 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/other-builds/extract-ghkm/.project b/contrib/other-builds/extract-ghkm/.project deleted file mode 100644 index b7c40f069..000000000 --- a/contrib/other-builds/extract-ghkm/.project +++ /dev/null @@ -1,209 +0,0 @@ - - - extract-ghkm - - - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - - - Alignment.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.cpp - - - Alignment.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.h - - - AlignmentGraph.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.cpp - - - AlignmentGraph.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.h - - - ComposedRule.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.cpp - - - ComposedRule.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.h - - - Exception.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Exception.h - - - ExtractGHKM.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.cpp - - - ExtractGHKM.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.h - - - InputFileStream.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp - - - InputFileStream.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h - - - Jamfile - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Jamfile - - - Main.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Main.cpp - - - Node.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.cpp - - - Node.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.h - - - Options.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Options.h - - - OutputFileStream.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp - - - OutputFileStream.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h - - - ParseTree.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.cpp - - - ParseTree.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.h - - - ScfgRule.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.cpp - - - ScfgRule.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.h - - - ScfgRuleWriter.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp - - - ScfgRuleWriter.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.h - - - Span.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.cpp - - - Span.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.h - - - Subgraph.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.cpp - - - Subgraph.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.h - - - SyntaxTree.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp - - - SyntaxTree.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.h - - - XmlTree.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.cpp - - - XmlTree.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h - - - XmlTreeParser.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.cpp - - - XmlTreeParser.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.h - - - tables-core.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp - - - tables-core.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h - - - diff --git a/contrib/other-builds/extract-ordering/.cproject b/contrib/other-builds/extract-ordering/.cproject deleted file mode 100644 index 1d4522e27..000000000 --- a/contrib/other-builds/extract-ordering/.cproject +++ /dev/null @@ -1,134 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/other-builds/extract-ordering/.project b/contrib/other-builds/extract-ordering/.project deleted file mode 100644 index f95b064b7..000000000 --- a/contrib/other-builds/extract-ordering/.project +++ /dev/null @@ -1,74 +0,0 @@ - - - extract-ordering - - - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - - - InputFileStream.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp - - - InputFileStream.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h - - - OutputFileStream.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp - - - OutputFileStream.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h - - - SentenceAlignment.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp - - - SentenceAlignment.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h - - - extract-ordering-main.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-ordering-main.cpp - - - tables-core.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp - - - tables-core.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h - - - diff --git a/contrib/other-builds/extract-rules/.cproject b/contrib/other-builds/extract-rules/.cproject deleted file mode 100644 index c1fa1a0cb..000000000 --- a/contrib/other-builds/extract-rules/.cproject +++ /dev/null @@ -1,136 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/other-builds/extract-rules/.gitignore b/contrib/other-builds/extract-rules/.gitignore deleted file mode 100644 index 98bbc3165..000000000 --- a/contrib/other-builds/extract-rules/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/Debug diff --git a/contrib/other-builds/extract-rules/.project b/contrib/other-builds/extract-rules/.project deleted file mode 100644 index 29ffed2a9..000000000 --- a/contrib/other-builds/extract-rules/.project +++ /dev/null @@ -1,134 +0,0 @@ - - - extract-rules - - - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - - - ExtractedRule.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/ExtractedRule.h - - - Hole.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/Hole.h - - - HoleCollection.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.cpp - - - HoleCollection.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.h - - - InputFileStream.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp - - - InputFileStream.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h - - - OutputFileStream.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp - - - OutputFileStream.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h - - - RuleExtractionOptions.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/RuleExtractionOptions.h - - - SentenceAlignment.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp - - - SentenceAlignment.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h - - - SentenceAlignmentWithSyntax.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.cpp - - - SentenceAlignmentWithSyntax.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h - - - SyntaxTree.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp - - - SyntaxTree.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.h - - - XmlTree.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.cpp - - - XmlTree.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h - - - extract-rules-main.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-rules-main.cpp - - - gzfilebuf.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/gzfilebuf.h - - - tables-core.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp - - - tables-core.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h - - - diff --git a/contrib/other-builds/extract/.cproject b/contrib/other-builds/extract/.cproject deleted file mode 100644 index 83bc724c3..000000000 --- a/contrib/other-builds/extract/.cproject +++ /dev/null @@ -1,134 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/other-builds/extract/.project b/contrib/other-builds/extract/.project deleted file mode 100644 index 93ede441b..000000000 --- a/contrib/other-builds/extract/.project +++ /dev/null @@ -1,74 +0,0 @@ - - - extract - - - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - - - InputFileStream.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp - - - InputFileStream.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h - - - OutputFileStream.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp - - - OutputFileStream.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h - - - SentenceAlignment.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp - - - SentenceAlignment.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h - - - extract-main.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/extract-main.cpp - - - tables-core.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp - - - tables-core.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h - - - From d949774f411ca54c72d3bb7e1843584cf9c090d9 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 25 Jun 2014 00:02:40 -0400 Subject: [PATCH 03/19] redo eclipse projects --- contrib/other-builds/score/.cproject | 133 --------------------------- contrib/other-builds/score/.project | 116 ----------------------- 2 files changed, 249 deletions(-) delete mode 100644 contrib/other-builds/score/.cproject delete mode 100644 contrib/other-builds/score/.project diff --git a/contrib/other-builds/score/.cproject b/contrib/other-builds/score/.cproject deleted file mode 100644 index f51f35ef5..000000000 --- a/contrib/other-builds/score/.cproject +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/other-builds/score/.project b/contrib/other-builds/score/.project deleted file mode 100644 index 05564d0f9..000000000 --- a/contrib/other-builds/score/.project +++ /dev/null @@ -1,116 +0,0 @@ - - - score - - - moses - util - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - - - DomainFeature.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/DomainFeature.cpp - - - DomainFeature.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/DomainFeature.h - - - ExtractionPhrasePair.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/ExtractionPhrasePair.cpp - - - ExtractionPhrasePair.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/ExtractionPhrasePair.h - - - InputFileStream.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp - - - InputFileStream.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h - - - InternalStructFeature.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InternalStructFeature.cpp - - - InternalStructFeature.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/InternalStructFeature.h - - - OutputFileStream.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp - - - OutputFileStream.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h - - - ScoreFeature.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/ScoreFeature.cpp - - - ScoreFeature.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/ScoreFeature.h - - - exception.cc - 1 - PARENT-3-PROJECT_LOC/util/exception.cc - - - exception.hh - 1 - PARENT-3-PROJECT_LOC/util/exception.hh - - - score-main.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/score-main.cpp - - - tables-core.cpp - 1 - PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp - - - tables-core.h - 1 - PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h - - - From 1b9267adf39f9b144de62b0b31b684918b4b54b4 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 25 Jun 2014 00:03:59 -0400 Subject: [PATCH 04/19] redo eclipse projects --- contrib/other-builds/mira/.cproject | 177 ---------------------------- contrib/other-builds/mira/.project | 81 ------------- 2 files changed, 258 deletions(-) delete mode 100644 contrib/other-builds/mira/.cproject delete mode 100644 contrib/other-builds/mira/.project diff --git a/contrib/other-builds/mira/.cproject b/contrib/other-builds/mira/.cproject deleted file mode 100644 index b80748286..000000000 --- a/contrib/other-builds/mira/.cproject +++ /dev/null @@ -1,177 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/other-builds/mira/.project b/contrib/other-builds/mira/.project deleted file mode 100644 index 03838731f..000000000 --- a/contrib/other-builds/mira/.project +++ /dev/null @@ -1,81 +0,0 @@ - - - mira - - - mert_lib - moses - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - - - Decoder.cpp - 1 - PARENT-3-PROJECT_LOC/mira/Decoder.cpp - - - Decoder.h - 1 - PARENT-3-PROJECT_LOC/mira/Decoder.h - - - Hildreth.cpp - 1 - PARENT-3-PROJECT_LOC/mira/Hildreth.cpp - - - Hildreth.h - 1 - PARENT-3-PROJECT_LOC/mira/Hildreth.h - - - HypothesisQueue.cpp - 1 - PARENT-3-PROJECT_LOC/mira/HypothesisQueue.cpp - - - HypothesisQueue.h - 1 - PARENT-3-PROJECT_LOC/mira/HypothesisQueue.h - - - Main.cpp - 1 - PARENT-3-PROJECT_LOC/mira/Main.cpp - - - Main.h - 1 - PARENT-3-PROJECT_LOC/mira/Main.h - - - MiraOptimiser.cpp - 1 - PARENT-3-PROJECT_LOC/mira/MiraOptimiser.cpp - - - Perceptron.cpp - 1 - PARENT-3-PROJECT_LOC/mira/Perceptron.cpp - - - From d7cbef5cbe4ed3aa02d85d9c321cee372b0a6c5a Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 25 Jun 2014 07:04:11 -0400 Subject: [PATCH 05/19] minor format change in consolidate --- contrib/other-builds/score/.cproject | 141 +++++++++++++++++++++++++++ contrib/other-builds/score/.project | 36 +++++++ phrase-extract/consolidate-main.cpp | 3 +- 3 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 contrib/other-builds/score/.cproject create mode 100644 contrib/other-builds/score/.project diff --git a/contrib/other-builds/score/.cproject b/contrib/other-builds/score/.cproject new file mode 100644 index 000000000..bb5179ba8 --- /dev/null +++ b/contrib/other-builds/score/.cproject @@ -0,0 +1,141 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/contrib/other-builds/score/.project b/contrib/other-builds/score/.project new file mode 100644 index 000000000..f4fdf4eca --- /dev/null +++ b/contrib/other-builds/score/.project @@ -0,0 +1,36 @@ + + + score + + + moses + util + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.core.ccnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + + + score-main.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/score-main.cpp + + + diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp index 43d912b81..a2174805c 100644 --- a/phrase-extract/consolidate-main.cpp +++ b/phrase-extract/consolidate-main.cpp @@ -332,8 +332,9 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC } // arbitrary key-value pairs + fileConsolidated << " ||| "; if (itemDirect.size() >= 6) { - fileConsolidated << " ||| " << itemDirect[5]; + fileConsolidated << itemDirect[5]; } fileConsolidated << endl; From 19e4504a57f027cac1692df74f32d2533ba323ed Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 25 Jun 2014 15:16:37 -0400 Subject: [PATCH 06/19] eclipse --- contrib/other-builds/score/.cproject | 36 ++++++++++++-- contrib/other-builds/score/.project | 70 ++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 4 deletions(-) diff --git a/contrib/other-builds/score/.cproject b/contrib/other-builds/score/.cproject index bb5179ba8..1d42fd585 100644 --- a/contrib/other-builds/score/.cproject +++ b/contrib/other-builds/score/.cproject @@ -37,19 +37,47 @@ diff --git a/contrib/other-builds/score/.project b/contrib/other-builds/score/.project index f4fdf4eca..10e713124 100644 --- a/contrib/other-builds/score/.project +++ b/contrib/other-builds/score/.project @@ -27,10 +27,80 @@ org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + DomainFeature.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/DomainFeature.cpp + + + DomainFeature.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/DomainFeature.h + + + ExtractionPhrasePair.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/ExtractionPhrasePair.cpp + + + ExtractionPhrasePair.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/ExtractionPhrasePair.h + + + InputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp + + + InputFileStream.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h + + + InternalStructFeature.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/InternalStructFeature.cpp + + + InternalStructFeature.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/InternalStructFeature.h + + + OutputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp + + + OutputFileStream.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h + + + ScoreFeature.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/ScoreFeature.cpp + + + ScoreFeature.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/ScoreFeature.h + score-main.cpp 1 PARENT-3-PROJECT_LOC/phrase-extract/score-main.cpp + + tables-core.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp + + + tables-core.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h + From 5d4083a3cfa195523934356c0e87e50b88037f4d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 25 Jun 2014 15:39:07 -0400 Subject: [PATCH 07/19] eclipse --- contrib/other-builds/extract/.cproject | 133 +++++++++++++++++++++++++ contrib/other-builds/extract/.project | 74 ++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 contrib/other-builds/extract/.cproject create mode 100644 contrib/other-builds/extract/.project diff --git a/contrib/other-builds/extract/.cproject b/contrib/other-builds/extract/.cproject new file mode 100644 index 000000000..54c91657b --- /dev/null +++ b/contrib/other-builds/extract/.cproject @@ -0,0 +1,133 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/contrib/other-builds/extract/.project b/contrib/other-builds/extract/.project new file mode 100644 index 000000000..93ede441b --- /dev/null +++ b/contrib/other-builds/extract/.project @@ -0,0 +1,74 @@ + + + extract + + + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.core.ccnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + + + InputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp + + + InputFileStream.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h + + + OutputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp + + + OutputFileStream.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h + + + SentenceAlignment.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp + + + SentenceAlignment.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h + + + extract-main.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/extract-main.cpp + + + tables-core.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp + + + tables-core.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h + + + From 52eb4ef57b719cbb5b71d7a2d49d4abacdc8f1fa Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 25 Jun 2014 15:53:14 -0400 Subject: [PATCH 08/19] eclipse --- contrib/other-builds/extract-rules/.cproject | 125 +++++++++++++++++++ contrib/other-builds/extract-rules/.project | 104 +++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 contrib/other-builds/extract-rules/.cproject create mode 100644 contrib/other-builds/extract-rules/.project diff --git a/contrib/other-builds/extract-rules/.cproject b/contrib/other-builds/extract-rules/.cproject new file mode 100644 index 000000000..5591c45d4 --- /dev/null +++ b/contrib/other-builds/extract-rules/.cproject @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/contrib/other-builds/extract-rules/.project b/contrib/other-builds/extract-rules/.project new file mode 100644 index 000000000..d640499a8 --- /dev/null +++ b/contrib/other-builds/extract-rules/.project @@ -0,0 +1,104 @@ + + + extract-rules + + + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.core.ccnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + + + InputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp + + + InputFileStream.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h + + + OutputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp + + + OutputFileStream.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h + + + SentenceAlignment.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp + + + SentenceAlignment.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h + + + SentenceAlignmentWithSyntax.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.cpp + + + SentenceAlignmentWithSyntax.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h + + + SyntaxTree.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp + + + SyntaxTree.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.h + + + XmlTree.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.cpp + + + XmlTree.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h + + + extract-main.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/extract-main.cpp + + + tables-core.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp + + + tables-core.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h + + + From 629a201152871c7bd9c377d7dcb6c73035fcdb1c Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 25 Jun 2014 15:59:34 -0400 Subject: [PATCH 09/19] eclipse --- contrib/other-builds/extract-ghkm/.cproject | 125 ++++++++++++++++++++ contrib/other-builds/extract-ghkm/.project | 119 +++++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 contrib/other-builds/extract-ghkm/.cproject create mode 100644 contrib/other-builds/extract-ghkm/.project diff --git a/contrib/other-builds/extract-ghkm/.cproject b/contrib/other-builds/extract-ghkm/.cproject new file mode 100644 index 000000000..0a829b697 --- /dev/null +++ b/contrib/other-builds/extract-ghkm/.cproject @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/contrib/other-builds/extract-ghkm/.project b/contrib/other-builds/extract-ghkm/.project new file mode 100644 index 000000000..f9570120b --- /dev/null +++ b/contrib/other-builds/extract-ghkm/.project @@ -0,0 +1,119 @@ + + + extract-ghkm + + + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.core.ccnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + + + Hole.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/Hole.h + + + HoleCollection.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.cpp + + + HoleCollection.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.h + + + InputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp + + + InputFileStream.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h + + + OutputFileStream.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp + + + OutputFileStream.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h + + + SentenceAlignment.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp + + + SentenceAlignment.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h + + + SentenceAlignmentWithSyntax.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.cpp + + + SentenceAlignmentWithSyntax.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h + + + SyntaxTree.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp + + + SyntaxTree.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.h + + + XmlTree.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.cpp + + + XmlTree.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h + + + extract-rules-main.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/extract-rules-main.cpp + + + tables-core.cpp + 1 + PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp + + + tables-core.h + 1 + PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h + + + From fee06b2d957bf8171cb12b741c5a35cdeffbb86e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 25 Jun 2014 16:04:04 -0400 Subject: [PATCH 10/19] eclipse --- contrib/other-builds/consolidate/.cproject | 123 ++++++++++----------- 1 file changed, 58 insertions(+), 65 deletions(-) diff --git a/contrib/other-builds/consolidate/.cproject b/contrib/other-builds/consolidate/.cproject index 3c70ed365..c3ff9b60a 100644 --- a/contrib/other-builds/consolidate/.cproject +++ b/contrib/other-builds/consolidate/.cproject @@ -1,8 +1,8 @@ - - + + @@ -14,41 +14,41 @@ - - - - - - - - - - - + + @@ -69,31 +69,31 @@ - - - - - - - - + - - + + - - + + - - + + - - + + - - - - - - - - + From ee6e1407385c96425d9614c1e5f0ba7b8eecf943 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 25 Jun 2014 16:14:25 -0400 Subject: [PATCH 11/19] eclipse --- contrib/other-builds/manual-label/.cproject | 123 ++++++++++---------- 1 file changed, 62 insertions(+), 61 deletions(-) diff --git a/contrib/other-builds/manual-label/.cproject b/contrib/other-builds/manual-label/.cproject index 2efd96e70..8e0dcc8e2 100644 --- a/contrib/other-builds/manual-label/.cproject +++ b/contrib/other-builds/manual-label/.cproject @@ -1,54 +1,54 @@ - - + + - + - - - - - - - - - - - + + - + - - - - - - - - + - - + + - - + + - - + + - - + + + From dba4caa7c0de9d6b364653a05f23cef7c2ed0b7f Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 26 Jun 2014 09:34:16 -0400 Subject: [PATCH 12/19] register PhraseDictionaryFuzzyMatch --- moses/FF/Factory.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 56295805d..aa48b11bb 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -10,6 +10,7 @@ #include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h" #include "moses/TranslationModel/PhraseDictionaryScope3.h" #include "moses/TranslationModel/PhraseDictionaryTransliteration.h" +#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h" #include "moses/FF/LexicalReordering/LexicalReordering.h" @@ -154,6 +155,18 @@ FeatureRegistry::FeatureRegistry() #define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >()); // Feature with different name than class. #define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >()); + + MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor); + MOSES_FNAME(PhraseDictionaryOnDisk); + MOSES_FNAME(PhraseDictionaryMemory); + MOSES_FNAME(PhraseDictionaryScope3); + MOSES_FNAME(PhraseDictionaryMultiModel); + MOSES_FNAME(PhraseDictionaryMultiModelCounts); + MOSES_FNAME(PhraseDictionaryALSuffixArray); + MOSES_FNAME(PhraseDictionaryDynSuffixArray); + MOSES_FNAME(PhraseDictionaryTransliteration); + MOSES_FNAME(PhraseDictionaryFuzzyMatch); + MOSES_FNAME(GlobalLexicalModel); //MOSES_FNAME(GlobalLexicalModelUnlimited); This was commented out in the original MOSES_FNAME(SourceWordDeletionFeature); @@ -170,15 +183,6 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME2("Distortion", DistortionScoreProducer); MOSES_FNAME2("WordPenalty", WordPenaltyProducer); MOSES_FNAME(InputFeature); - MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor); - MOSES_FNAME(PhraseDictionaryOnDisk); - MOSES_FNAME(PhraseDictionaryMemory); - MOSES_FNAME(PhraseDictionaryScope3); - MOSES_FNAME(PhraseDictionaryMultiModel); - MOSES_FNAME(PhraseDictionaryMultiModelCounts); - MOSES_FNAME(PhraseDictionaryALSuffixArray); - MOSES_FNAME(PhraseDictionaryDynSuffixArray); - MOSES_FNAME(PhraseDictionaryTransliteration); MOSES_FNAME(OpSequenceModel); MOSES_FNAME(PhrasePenalty); MOSES_FNAME2("UnknownWordPenalty", UnknownWordPenaltyProducer); From e4c35b9896ee374c15a060b6ba2526ecb1fc28dc Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 27 Jun 2014 01:02:23 +0100 Subject: [PATCH 13/19] Replaced TRACE_ERR(..) with VERBOSE(1,...) in PDTAimp.h. --- moses/PDTAimp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moses/PDTAimp.h b/moses/PDTAimp.h index 999fbb1e0..2a7943ce2 100644 --- a/moses/PDTAimp.h +++ b/moses/PDTAimp.h @@ -233,7 +233,7 @@ public: //InputFileStream in(filePath); //m_dict->Create(in,filePath); } - TRACE_ERR( "reading bin ttable\n"); + VERBOSE(1,"reading bin ttable\n"); // m_dict->Read(filePath); bool res=m_dict->Read(filePath); if (!res) { From 40ab68b3d26b390f241c3f5248297697c9d980d3 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 27 Jun 2014 01:06:00 +0100 Subject: [PATCH 14/19] Adapted lookup_mmsapt to binary phrase tables; renamed the utility to ptable-lookup --- moses/TranslationModel/UG/Jamfile | 4 +- moses/TranslationModel/UG/ptable-lookup.cc | 127 +++++++++++++++++++++ 2 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 moses/TranslationModel/UG/ptable-lookup.cc diff --git a/moses/TranslationModel/UG/Jamfile b/moses/TranslationModel/UG/Jamfile index 547928423..ecd175a65 100644 --- a/moses/TranslationModel/UG/Jamfile +++ b/moses/TranslationModel/UG/Jamfile @@ -9,8 +9,8 @@ $(TOP)/moses/TranslationModel/UG//mmsapt $(TOP)/util//kenutil ; -exe lookup_mmsapt : -lookup_mmsapt.cc +exe ptable-lookup : +ptable-lookup.cc $(TOP)/moses//moses $(TOP)/moses/TranslationModel/UG/generic//generic $(TOP)//boost_iostreams diff --git a/moses/TranslationModel/UG/ptable-lookup.cc b/moses/TranslationModel/UG/ptable-lookup.cc new file mode 100644 index 000000000..106505f05 --- /dev/null +++ b/moses/TranslationModel/UG/ptable-lookup.cc @@ -0,0 +1,127 @@ +#include "mmsapt.h" +#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h" +#include +#include +#include +#include +#include +#include + +using namespace Moses; +using namespace bitext; +using namespace std; +using namespace boost; + +vector fo(1,FactorType(0)); + +class SimplePhrase : public Moses::Phrase +{ + vector const m_fo; // factor order +public: + SimplePhrase(): m_fo(1,FactorType(0)) {} + + void init(string const& s) + { + istringstream buf(s); string w; + while (buf >> w) + { + Word wrd; + this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false); + } + } +}; + +class TargetPhraseIndexSorter +{ + TargetPhraseCollection const& my_tpc; + CompareTargetPhrase cmp; +public: + TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {} + bool operator()(size_t a, size_t b) const + { + return cmp(*my_tpc[a], *my_tpc[b]); + } +}; + +int main(int argc, char* argv[]) +{ + Parameter params; + if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(¶ms, argv[0])) + exit(1); + + StaticData const& global = StaticData::Instance(); + global.SetVerboseLevel(0); + vector ifo = global.GetInputFactorOrder(); + + PhraseDictionary* PT = PhraseDictionary::GetColl()[0]; + Mmsapt* mmsapt = dynamic_cast(PT); + PhraseDictionaryTreeAdaptor* pdta = dynamic_cast(PT); + // vector const& ffs = FeatureFunction::GetFeatureFunctions(); + + if (!mmsapt && !pdta) + { + cerr << "Phrase table implementation not supported by this utility." << endl; + exit(1); + } + + string line; + while (true) + { + Sentence phrase; + if (!phrase.Read(cin,ifo)) break; + if (pdta) + { + pdta->InitializeForInput(phrase); + // do we also need to call CleanupAfterSentenceProcessing at the end? + } + Phrase& p = phrase; + + cout << p << endl; + TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p); + if (!trg) continue; + vector order(trg->GetSize()); + for (size_t i = 0; i < order.size(); ++i) order[i] = i; + sort(order.begin(),order.end(),TargetPhraseIndexSorter(*trg)); + size_t k = 0; + // size_t precision = + cout.precision(2); + + vector fname; + if (mmsapt) + { + fname = mmsapt->GetFeatureNames(); + cout << " "; + BOOST_FOREACH(string const& fn, fname) + cout << " " << format("%10.10s") % fn; + cout << endl; + } + + BOOST_FOREACH(size_t i, order) + { + Phrase const& phr = static_cast(*(*trg)[i]); + cout << setw(3) << ++k << " " << phr << endl; + ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown(); + ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT); + FVector const& scores = scc.GetScoresVector(); + cout << " "; + for (size_t k = idx.first; k < idx.second; ++k) + { + if (mmsapt && fname[k-idx.first].substr(0,3) == "log") + { + if(scores[k] < 0) + cout << " " << format("%10d") % round(exp(-scores[k])); + else + cout << " " << format("%10d") % round(exp(scores[k])); + } + else + cout << " " << format("%10.8f") % exp(scores[k]); + } + cout << endl; + } + PT->Release(trg); + } + exit(0); +} + + + From 9dfb3540411223a0563577d287e3d9ea341ca595 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 27 Jun 2014 01:07:23 +0100 Subject: [PATCH 15/19] Phrase probability smoothing can now be turned off in ug_bitext.cc by setting the confidence threshold to 0. --- moses/TranslationModel/UG/mm/ug_bitext.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/moses/TranslationModel/UG/mm/ug_bitext.cc b/moses/TranslationModel/UG/mm/ug_bitext.cc index c4f5175f3..8dbbdcb92 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext.cc +++ b/moses/TranslationModel/UG/mm/ug_bitext.cc @@ -255,9 +255,10 @@ namespace Moses float lbop(size_t const tries, size_t const succ, float const confidence) { - return - boost::math::binomial_distribution<>:: - find_lower_bound_on_p(tries, succ, confidence); + return (confidence == 0 + ? float(succ)/tries + : (boost::math::binomial_distribution<>:: + find_lower_bound_on_p(tries, succ, confidence))); } PhrasePair const& From 1e50ac587d306573b7d3cb5c57c99303970562d7 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 27 Jun 2014 01:09:50 +0100 Subject: [PATCH 16/19] Replaced TRACE_ERR(..) with VERBOSE(1,...) in PhraseDictionaryTree.cpp --- moses/TranslationModel/PhraseDictionaryTree.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/moses/TranslationModel/PhraseDictionaryTree.cpp b/moses/TranslationModel/PhraseDictionaryTree.cpp index 68dd5a59f..c8b7cb5d2 100644 --- a/moses/TranslationModel/PhraseDictionaryTree.cpp +++ b/moses/TranslationModel/PhraseDictionaryTree.cpp @@ -3,6 +3,7 @@ #include "moses/FeatureVector.h" #include "moses/TranslationModel/PhraseDictionaryTree.h" #include "util/exception.hh" +#include "moses/StaticData.h" #include #include @@ -233,7 +234,8 @@ public: typedef PhraseDictionaryTree::PrefixPtr PPtr; void GetTargetCandidates(PPtr p,TgtCands& tgtCands) { - UTIL_THROW_IF2(p == NULL, "Error"); + UTIL_THROW_IF2(p == 0L, "Error"); + // UTIL_THROW_IF2(p == NULL, "Error"); if(p.imp->isRoot()) return; OFF_T tCandOffset=p.imp->ptr()->getData(p.imp->idx); @@ -278,7 +280,8 @@ public: } PPtr Extend(PPtr p,const std::string& w) { - UTIL_THROW_IF2(p == NULL, "Error"); + UTIL_THROW_IF2(p == 0L, "Error"); + // UTIL_THROW_IF2(p == NULL, "Error"); if(w.empty() || w==EPSILON) return p; @@ -349,8 +352,8 @@ int PDTimp::Read(const std::string& fn) sv.Read(ifsv); tv.Read(iftv); - TRACE_ERR("binary phrasefile loaded, default OFF_T: "<Read(fn); } From 9eaf506ace04c599c73c838d13fcd902fcecffa1 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 27 Jun 2014 01:10:14 +0100 Subject: [PATCH 17/19] Renamed lookup_mmsapt.cc to ptable-lookup.cc. --- moses/TranslationModel/UG/lookup_mmsapt.cc | 104 --------------------- 1 file changed, 104 deletions(-) delete mode 100644 moses/TranslationModel/UG/lookup_mmsapt.cc diff --git a/moses/TranslationModel/UG/lookup_mmsapt.cc b/moses/TranslationModel/UG/lookup_mmsapt.cc deleted file mode 100644 index e295f1012..000000000 --- a/moses/TranslationModel/UG/lookup_mmsapt.cc +++ /dev/null @@ -1,104 +0,0 @@ -#include "mmsapt.h" -#include -#include -#include -#include -#include -#include - -using namespace Moses; -using namespace bitext; -using namespace std; -using namespace boost; - -vector fo(1,FactorType(0)); - -class SimplePhrase : public Moses::Phrase -{ - vector const m_fo; // factor order -public: - SimplePhrase(): m_fo(1,FactorType(0)) {} - - void init(string const& s) - { - istringstream buf(s); string w; - while (buf >> w) - { - Word wrd; - this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false); - } - } -}; - -class TargetPhraseIndexSorter -{ - TargetPhraseCollection const& my_tpc; - CompareTargetPhrase cmp; -public: - TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {} - bool operator()(size_t a, size_t b) const - { - return cmp(*my_tpc[a], *my_tpc[b]); - } -}; - -int main(int argc, char* argv[]) -{ - Parameter params; - if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(¶ms, argv[0])) - exit(1); - - Mmsapt* PT = NULL; - BOOST_FOREACH(PhraseDictionary* pd, PhraseDictionary::GetColl()) - if ((PT = dynamic_cast(pd))) break; - vector const& fname = PT->GetFeatureNames(); - - // vector const& ffs = FeatureFunction::GetFeatureFunctions(); - - string line; - while (getline(cin,line)) - { - SimplePhrase p; p.init(line); - cout << p << endl; - TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p); - if (!trg) continue; - vector order(trg->GetSize()); - for (size_t i = 0; i < order.size(); ++i) order[i] = i; - sort(order.begin(),order.end(),TargetPhraseIndexSorter(*trg)); - size_t k = 0; - // size_t precision = - cout.precision(2); - - BOOST_FOREACH(size_t i, order) - { - Phrase const& phr = static_cast(*(*trg)[i]); - cout << setw(3) << ++k << " " << phr << endl; - ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown(); - ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT); - FVector const& scores = scc.GetScoresVector(); - cout << " "; - for (size_t k = idx.first; k < idx.second; ++k) - cout << " " << format("%10.10s") % fname[k-idx.first]; - cout << endl; - cout << " "; - for (size_t k = idx.first; k < idx.second; ++k) - { - if (fname[k-idx.first].substr(0,3) == "log") - { - if(scores[k] < 0) - cout << " " << format("%10d") % round(exp(-scores[k])); - else - cout << " " << format("%10d") % round(exp(scores[k])); - } - else - cout << " " << format("%10.8f") % exp(scores[k]); - } - cout << endl; - } - PT->Release(trg); - } - exit(0); -} - - - From 73081786bcefdd0b8978df5909bf638e4bd019e3 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Sat, 28 Jun 2014 20:08:41 +0100 Subject: [PATCH 18/19] Name change: lookup_mmsapt -> ptable-lookup --- Jamroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jamroot b/Jamroot index 687d1de7a..283b4dd6f 100644 --- a/Jamroot +++ b/Jamroot @@ -152,7 +152,7 @@ build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses if [ option.get "with-mm" : : "yes" ] { alias mm : - moses/TranslationModel/UG//lookup_mmsapt + moses/TranslationModel/UG//ptable-lookup moses/TranslationModel/UG/mm//mtt-build moses/TranslationModel/UG/mm//mtt-dump moses/TranslationModel/UG/mm//symal2mam From 556e1123660eb551de74418e89245a008f1fe0f5 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Mon, 30 Jun 2014 00:32:11 +0100 Subject: [PATCH 19/19] Major bug fix in Mmsapt.combine_pstats. --- moses/TranslationModel/UG/mmsapt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp index 65a7a06ad..dc9945472 100644 --- a/moses/TranslationModel/UG/mmsapt.cpp +++ b/moses/TranslationModel/UG/mmsapt.cpp @@ -576,9 +576,9 @@ namespace Moses else pool.update(a->first,a->second); BOOST_FOREACH(sptr const& ff, m_active_ff_dyn) (*ff)(btb,pool,&ppfix.fvals); + if (ppfix.p2) + tpcoll->Add(createTargetPhrase(src,bta,ppfix)); } - if (ppfix.p2) - tpcoll->Add(createTargetPhrase(src,bta,ppfix)); } return (statsa || statsb); }