From 125e9a8569037bb1d7a0613bc5dde7cd370bf2c8 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sat, 5 Oct 2013 10:48:01 +0100 Subject: [PATCH 01/12] add debug argument --- phrase-extract/PhraseExtractionOptions.h | 5 ++++- phrase-extract/extract-main.cpp | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/phrase-extract/PhraseExtractionOptions.h b/phrase-extract/PhraseExtractionOptions.h index d70c2a117..574b9afc1 100644 --- a/phrase-extract/PhraseExtractionOptions.h +++ b/phrase-extract/PhraseExtractionOptions.h @@ -52,6 +52,7 @@ private: public: std::vector placeholders; + bool debug; PhraseExtractionOptions(const int initmaxPhraseLength): maxPhraseLength(initmaxPhraseLength), @@ -67,7 +68,9 @@ public: includeSentenceIdFlag(false), onlyOutputSpanInfo(false), gzOutput(false), - flexScoreFlag(false) {} + flexScoreFlag(false), + debug(false) +{} //functions for initialization of options void initAllModelsOutputFlag(const bool initallModelsOutputFlag) { diff --git a/phrase-extract/extract-main.cpp b/phrase-extract/extract-main.cpp index 0b21eb19a..49e01a50c 100644 --- a/phrase-extract/extract-main.cpp +++ b/phrase-extract/extract-main.cpp @@ -170,6 +170,8 @@ int main(int argc, char* argv[]) exit(1); } options.initInstanceWeightsFile(argv[++i]); + } else if (strcmp(argv[i], "--Debug") == 0) { + options.debug = true; } else if(strcmp(argv[i],"--model") == 0) { if (i+1 >= argc) { cerr << "extract: syntax error, no model's information provided to the option --model " << endl; @@ -719,6 +721,12 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, return; } + if (m_options.debug) { + outextractstr << "sentenceID=" << sentence.sentenceID << " "; + outextractstrInv << "sentenceID=" << sentence.sentenceID << " "; + outextractstrOrientation << "sentenceID=" << sentence.sentenceID << " "; + } + for(int fi=startF; fi<=endF; fi++) { if (m_options.isTranslationFlag()) outextractstr << sentence.source[fi] << " "; if (m_options.isOrientationFlag()) outextractstrOrientation << sentence.source[fi] << " "; From 17887a27969e83f4100bd0f4af98986e33999fbe Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 15 Nov 2013 10:55:38 +0000 Subject: [PATCH 02/12] replace nth_element() with macro that execute sort() instead for gcc 4.8.1 & 4.8.2 --- mert/Util.h | 9 +++++++++ mert/pro.cpp | 3 ++- moses/ChartHypothesis.cpp | 2 +- moses/ChartTranslationOptionList.cpp | 4 ++-- moses/Hypothesis.cpp | 2 +- moses/PDTAimp.h | 2 +- moses/PartialTranslOptColl.cpp | 2 +- moses/TargetPhraseCollection.cpp | 2 +- .../CompactPT/PhraseDictionaryCompact.cpp | 2 +- moses/TranslationOptionCollection.cpp | 2 +- moses/Util.h | 9 +++++++++ search/nbest.cc | 3 ++- 12 files changed, 31 insertions(+), 11 deletions(-) diff --git a/mert/Util.h b/mert/Util.h index 5c9c635ab..7e6926d19 100644 --- a/mert/Util.h +++ b/mert/Util.h @@ -31,6 +31,15 @@ namespace MosesTuning #define TRACE_ERR(str) { } #endif +#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2) +// gcc nth_element() bug +#define NTH_ELEMENT3(begin, middle, end) std::sort(begin, end) +#define NTH_ELEMENT4(begin, middle, end, orderer) std::sort(begin, end, orderer) +#else +#define NTH_ELEMENT3(begin, middle, end) std::nth_element(begin, middle, end) +#define NTH_ELEMENT4(begin, middle, end, orderer) std::nth_element(begin, middle, end, orderer) +#endif + const char kDefaultDelimiterSymbol[] = " "; int verboselevel(); diff --git a/mert/pro.cpp b/mert/pro.cpp index b8cf81ca3..7660fe7d0 100644 --- a/mert/pro.cpp +++ b/mert/pro.cpp @@ -42,6 +42,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "FeatureDataIterator.h" #include "ScoreDataIterator.h" #include "BleuScorer.h" +#include "Util.h" using namespace std; using namespace MosesTuning; @@ -232,7 +233,7 @@ int main(int argc, char** argv) float sample_threshold = -1.0; if (samples.size() > n_samples) { - nth_element(scores.begin(), scores.begin() + (n_samples-1), scores.end()); + NTH_ELEMENT3(scores.begin(), scores.begin() + (n_samples-1), scores.end()); sample_threshold = 0.99999-scores[n_samples-1]; } diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp index 1cff9046d..034905158 100644 --- a/moses/ChartHypothesis.cpp +++ b/moses/ChartHypothesis.cpp @@ -245,7 +245,7 @@ void ChartHypothesis::CleanupArcList() if (!distinctNBest && m_arcList->size() > nBestSize) { // prune arc list only if there too many arcs - nth_element(m_arcList->begin() + NTH_ELEMENT4(m_arcList->begin() , m_arcList->begin() + nBestSize - 1 , m_arcList->end() , CompareChartChartHypothesisTotalScore()); diff --git a/moses/ChartTranslationOptionList.cpp b/moses/ChartTranslationOptionList.cpp index c12035f18..1297e5f37 100644 --- a/moses/ChartTranslationOptionList.cpp +++ b/moses/ChartTranslationOptionList.cpp @@ -102,7 +102,7 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc, // Prune if bursting if (m_size == m_ruleLimit * 2) { - std::nth_element(m_collection.begin(), + NTH_ELEMENT4(m_collection.begin(), m_collection.begin() + m_ruleLimit - 1, m_collection.begin() + m_size, ChartTranslationOptionOrderer()); @@ -128,7 +128,7 @@ void ChartTranslationOptionList::ApplyThreshold() assert(m_size < m_ruleLimit * 2); // Reduce the list to the best m_ruleLimit options. The remaining // options can be overwritten on subsequent calls to Add(). - std::nth_element(m_collection.begin(), + NTH_ELEMENT4(m_collection.begin(), m_collection.begin()+m_ruleLimit, m_collection.begin()+m_size, ChartTranslationOptionOrderer()); diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp index ba7953e62..e3140948e 100644 --- a/moses/Hypothesis.cpp +++ b/moses/Hypothesis.cpp @@ -335,7 +335,7 @@ void Hypothesis::CleanupArcList() if (!distinctNBest && m_arcList->size() > nBestSize * 5) { // prune arc list only if there too many arcs - nth_element(m_arcList->begin() + NTH_ELEMENT4(m_arcList->begin() , m_arcList->begin() + nBestSize - 1 , m_arcList->end() , CompareHypothesisTotalScore()); diff --git a/moses/PDTAimp.h b/moses/PDTAimp.h index bb077945f..cc4e96cd1 100644 --- a/moses/PDTAimp.h +++ b/moses/PDTAimp.h @@ -324,7 +324,7 @@ public: m_obj->m_tableLimit : costs.size()); // find the nth phrase according to future cost - std::nth_element(costs.begin(),nth ,costs.end()); + NTH_ELEMENT3(costs.begin(),nth ,costs.end()); // add n top phrases to the return list for(std::vector >::iterator diff --git a/moses/PartialTranslOptColl.cpp b/moses/PartialTranslOptColl.cpp index 7accf3a76..709075c66 100644 --- a/moses/PartialTranslOptColl.cpp +++ b/moses/PartialTranslOptColl.cpp @@ -82,7 +82,7 @@ void PartialTranslOptColl::Prune() // TRACE_ERR( "pruning partial translation options from size " << m_list.size() << std::endl); // find nth element - nth_element(m_list.begin(), + NTH_ELEMENT4(m_list.begin(), m_list.begin() + m_maxSize, m_list.end(), ComparePartialTranslationOption); diff --git a/moses/TargetPhraseCollection.cpp b/moses/TargetPhraseCollection.cpp index 31dc98be8..c899e241d 100644 --- a/moses/TargetPhraseCollection.cpp +++ b/moses/TargetPhraseCollection.cpp @@ -58,7 +58,7 @@ void TargetPhraseCollection::NthElement(size_t tableLimit) nth = (tableLimit && tableLimit <= m_collection.size() ? m_collection.begin() + tableLimit : m_collection.end()); - std::nth_element(m_collection.begin(), nth, m_collection.end(), CompareTargetPhrase()); + NTH_ELEMENT4(m_collection.begin(), nth, m_collection.end(), CompareTargetPhrase()); } void TargetPhraseCollection::Prune(bool adhereTableLimit, size_t tableLimit) diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp index 876e334eb..e3c931589 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp @@ -129,7 +129,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s TargetPhraseVector::iterator nth = (m_tableLimit == 0 || tpv->size() < m_tableLimit) ? tpv->end() : tpv->begin() + m_tableLimit; - std::nth_element(tpv->begin(), nth, tpv->end(), CompareTargetPhrase()); + NTH_ELEMENT4(tpv->begin(), nth, tpv->end(), CompareTargetPhrase()); for(TargetPhraseVector::iterator it = tpv->begin(); it != nth; it++) { TargetPhrase *tp = new TargetPhrase(*it); phraseColl->Add(tp); diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp index 80c0d12dd..e51f3f450 100644 --- a/moses/TranslationOptionCollection.cpp +++ b/moses/TranslationOptionCollection.cpp @@ -107,7 +107,7 @@ void TranslationOptionCollection::Prune() if (m_maxNoTransOptPerCoverage > 0 && fullList.size() > m_maxNoTransOptPerCoverage) { // sort in vector - nth_element(fullList.begin(), fullList.begin() + m_maxNoTransOptPerCoverage, fullList.end(), CompareTranslationOption); + NTH_ELEMENT4(fullList.begin(), fullList.begin() + m_maxNoTransOptPerCoverage, fullList.end(), CompareTranslationOption); totalPruned += fullList.size() - m_maxNoTransOptPerCoverage; // delete the rest diff --git a/moses/Util.h b/moses/Util.h index 5517598f1..422449d43 100644 --- a/moses/Util.h +++ b/moses/Util.h @@ -58,6 +58,15 @@ namespace Moses #define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } } #define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level) +#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2) +// gcc nth_element() bug +#define NTH_ELEMENT3(begin, middle, end) std::sort(begin, end) +#define NTH_ELEMENT4(begin, middle, end, orderer) std::sort(begin, end, orderer) +#else +#define NTH_ELEMENT3(begin, middle, end) std::nth_element(begin, middle, end) +#define NTH_ELEMENT4(begin, middle, end, orderer) std::nth_element(begin, middle, end, orderer) +#endif + //! delete white spaces at beginning and end of string const std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r"); const std::string ToLower(const std::string& str); diff --git a/search/nbest.cc b/search/nbest.cc index ec3322c97..acfc08049 100644 --- a/search/nbest.cc +++ b/search/nbest.cc @@ -1,6 +1,7 @@ #include "search/nbest.hh" #include "util/pool.hh" +#include "moses/Util.h" #include #include @@ -16,7 +17,7 @@ NBestList::NBestList(std::vector &partials, util::Pool &entry_pool, std::vector::iterator end; if (partials.size() > keep) { end = partials.begin() + keep; - std::nth_element(partials.begin(), end, partials.end(), std::greater()); + NTH_ELEMENT4(partials.begin(), end, partials.end(), std::greater()); } else { end = partials.end(); } From 94bf2733fe5cd1c9acb1d162873f955c13a7f981 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 15 Nov 2013 10:56:13 +0000 Subject: [PATCH 03/12] not ready for prime time --- contrib/basic-decoder/.cproject | 149 ------------ contrib/basic-decoder/.dep.inc | 5 - contrib/basic-decoder/.gitignore | 3 - contrib/basic-decoder/.project | 27 --- .../FF/DistortionScoreProducer.cpp | 82 ------- .../FF/DistortionScoreProducer.h | 35 --- contrib/basic-decoder/FF/FeatureFunction.cpp | 131 ----------- contrib/basic-decoder/FF/FeatureFunction.h | 74 ------ contrib/basic-decoder/FF/LM/InternalLM.cpp | 144 ------------ contrib/basic-decoder/FF/LM/InternalLM.h | 41 ---- contrib/basic-decoder/FF/LM/LM.cpp | 164 ------------- contrib/basic-decoder/FF/LM/LM.h | 43 ---- contrib/basic-decoder/FF/LM/SRILM.cpp | 139 ----------- contrib/basic-decoder/FF/LM/SRILM.h | 38 --- contrib/basic-decoder/FF/PhrasePenalty.cpp | 23 -- contrib/basic-decoder/FF/PhrasePenalty.h | 18 -- .../FF/StatefulFeatureFunction.cpp | 42 ---- .../FF/StatefulFeatureFunction.h | 36 --- .../FF/StatelessFeatureFunction.cpp | 16 -- .../FF/StatelessFeatureFunction.h | 20 -- .../FF/TranslationModel/Memory/Node.cpp | 43 ---- .../FF/TranslationModel/Memory/Node.h | 32 --- .../FF/TranslationModel/PhraseTable.cpp | 26 --- .../FF/TranslationModel/PhraseTable.h | 32 --- .../FF/TranslationModel/PhraseTableMemory.cpp | 110 --------- .../FF/TranslationModel/PhraseTableMemory.h | 37 --- .../TranslationModel/UnknownWordPenalty.cpp | 70 ------ .../FF/TranslationModel/UnknownWordPenalty.h | 32 --- .../basic-decoder/FF/WordPenaltyProducer.cpp | 17 -- .../basic-decoder/FF/WordPenaltyProducer.h | 20 -- contrib/basic-decoder/Global.cpp | 172 -------------- contrib/basic-decoder/Global.h | 59 ----- contrib/basic-decoder/InputFileStream.cpp | 61 ----- contrib/basic-decoder/InputFileStream.h | 46 ---- contrib/basic-decoder/InputPath.cpp | 19 -- contrib/basic-decoder/InputPath.h | 51 ---- contrib/basic-decoder/Main.cpp | 57 ----- contrib/basic-decoder/Makefile | 32 --- contrib/basic-decoder/MyVocab.cpp | 47 ---- contrib/basic-decoder/MyVocab.h | 31 --- contrib/basic-decoder/Phrase.cpp | 68 ------ contrib/basic-decoder/Phrase.h | 50 ---- contrib/basic-decoder/Scores.cpp | 119 ---------- contrib/basic-decoder/Scores.h | 32 --- contrib/basic-decoder/Search/Hypothesis.cpp | 121 ---------- contrib/basic-decoder/Search/Hypothesis.h | 107 --------- contrib/basic-decoder/Search/Manager.cpp | 106 --------- contrib/basic-decoder/Search/Manager.h | 36 --- contrib/basic-decoder/Search/Stack.cpp | 217 ------------------ contrib/basic-decoder/Search/Stack.h | 62 ----- contrib/basic-decoder/Search/Stacks.cpp | 16 -- contrib/basic-decoder/Search/Stacks.h | 35 --- contrib/basic-decoder/Sentence.cpp | 30 --- contrib/basic-decoder/Sentence.h | 14 -- contrib/basic-decoder/TargetPhrase.cpp | 49 ---- contrib/basic-decoder/TargetPhrase.h | 33 --- contrib/basic-decoder/TargetPhrases.cpp | 16 -- contrib/basic-decoder/TargetPhrases.h | 38 --- contrib/basic-decoder/Timer.cpp | 50 ---- contrib/basic-decoder/Timer.h | 49 ---- contrib/basic-decoder/TypeDef.cpp | 4 - contrib/basic-decoder/TypeDef.h | 11 - contrib/basic-decoder/Util.cpp | 9 - contrib/basic-decoder/Util.h | 132 ----------- contrib/basic-decoder/Weights.cpp | 38 --- contrib/basic-decoder/Weights.h | 29 --- contrib/basic-decoder/Word.cpp | 45 ---- contrib/basic-decoder/Word.h | 52 ----- contrib/basic-decoder/WordsBitmap.cpp | 109 --------- contrib/basic-decoder/WordsBitmap.h | 39 ---- contrib/basic-decoder/WordsRange.cpp | 46 ---- contrib/basic-decoder/WordsRange.h | 46 ---- contrib/basic-decoder/check.h | 21 -- contrib/basic-decoder/gzfilebuf.h | 88 ------- 74 files changed, 4141 deletions(-) delete mode 100644 contrib/basic-decoder/.cproject delete mode 100644 contrib/basic-decoder/.dep.inc delete mode 100644 contrib/basic-decoder/.gitignore delete mode 100644 contrib/basic-decoder/.project delete mode 100644 contrib/basic-decoder/FF/DistortionScoreProducer.cpp delete mode 100644 contrib/basic-decoder/FF/DistortionScoreProducer.h delete mode 100644 contrib/basic-decoder/FF/FeatureFunction.cpp delete mode 100644 contrib/basic-decoder/FF/FeatureFunction.h delete mode 100644 contrib/basic-decoder/FF/LM/InternalLM.cpp delete mode 100644 contrib/basic-decoder/FF/LM/InternalLM.h delete mode 100644 contrib/basic-decoder/FF/LM/LM.cpp delete mode 100644 contrib/basic-decoder/FF/LM/LM.h delete mode 100644 contrib/basic-decoder/FF/LM/SRILM.cpp delete mode 100644 contrib/basic-decoder/FF/LM/SRILM.h delete mode 100644 contrib/basic-decoder/FF/PhrasePenalty.cpp delete mode 100644 contrib/basic-decoder/FF/PhrasePenalty.h delete mode 100644 contrib/basic-decoder/FF/StatefulFeatureFunction.cpp delete mode 100644 contrib/basic-decoder/FF/StatefulFeatureFunction.h delete mode 100644 contrib/basic-decoder/FF/StatelessFeatureFunction.cpp delete mode 100644 contrib/basic-decoder/FF/StatelessFeatureFunction.h delete mode 100644 contrib/basic-decoder/FF/TranslationModel/Memory/Node.cpp delete mode 100644 contrib/basic-decoder/FF/TranslationModel/Memory/Node.h delete mode 100644 contrib/basic-decoder/FF/TranslationModel/PhraseTable.cpp delete mode 100644 contrib/basic-decoder/FF/TranslationModel/PhraseTable.h delete mode 100644 contrib/basic-decoder/FF/TranslationModel/PhraseTableMemory.cpp delete mode 100644 contrib/basic-decoder/FF/TranslationModel/PhraseTableMemory.h delete mode 100644 contrib/basic-decoder/FF/TranslationModel/UnknownWordPenalty.cpp delete mode 100644 contrib/basic-decoder/FF/TranslationModel/UnknownWordPenalty.h delete mode 100644 contrib/basic-decoder/FF/WordPenaltyProducer.cpp delete mode 100644 contrib/basic-decoder/FF/WordPenaltyProducer.h delete mode 100644 contrib/basic-decoder/Global.cpp delete mode 100644 contrib/basic-decoder/Global.h delete mode 100644 contrib/basic-decoder/InputFileStream.cpp delete mode 100644 contrib/basic-decoder/InputFileStream.h delete mode 100644 contrib/basic-decoder/InputPath.cpp delete mode 100644 contrib/basic-decoder/InputPath.h delete mode 100644 contrib/basic-decoder/Main.cpp delete mode 100644 contrib/basic-decoder/Makefile delete mode 100644 contrib/basic-decoder/MyVocab.cpp delete mode 100644 contrib/basic-decoder/MyVocab.h delete mode 100644 contrib/basic-decoder/Phrase.cpp delete mode 100644 contrib/basic-decoder/Phrase.h delete mode 100644 contrib/basic-decoder/Scores.cpp delete mode 100644 contrib/basic-decoder/Scores.h delete mode 100644 contrib/basic-decoder/Search/Hypothesis.cpp delete mode 100644 contrib/basic-decoder/Search/Hypothesis.h delete mode 100644 contrib/basic-decoder/Search/Manager.cpp delete mode 100644 contrib/basic-decoder/Search/Manager.h delete mode 100644 contrib/basic-decoder/Search/Stack.cpp delete mode 100644 contrib/basic-decoder/Search/Stack.h delete mode 100644 contrib/basic-decoder/Search/Stacks.cpp delete mode 100644 contrib/basic-decoder/Search/Stacks.h delete mode 100644 contrib/basic-decoder/Sentence.cpp delete mode 100644 contrib/basic-decoder/Sentence.h delete mode 100644 contrib/basic-decoder/TargetPhrase.cpp delete mode 100644 contrib/basic-decoder/TargetPhrase.h delete mode 100644 contrib/basic-decoder/TargetPhrases.cpp delete mode 100644 contrib/basic-decoder/TargetPhrases.h delete mode 100644 contrib/basic-decoder/Timer.cpp delete mode 100644 contrib/basic-decoder/Timer.h delete mode 100644 contrib/basic-decoder/TypeDef.cpp delete mode 100644 contrib/basic-decoder/TypeDef.h delete mode 100644 contrib/basic-decoder/Util.cpp delete mode 100644 contrib/basic-decoder/Util.h delete mode 100644 contrib/basic-decoder/Weights.cpp delete mode 100644 contrib/basic-decoder/Weights.h delete mode 100644 contrib/basic-decoder/Word.cpp delete mode 100644 contrib/basic-decoder/Word.h delete mode 100644 contrib/basic-decoder/WordsBitmap.cpp delete mode 100644 contrib/basic-decoder/WordsBitmap.h delete mode 100644 contrib/basic-decoder/WordsRange.cpp delete mode 100644 contrib/basic-decoder/WordsRange.h delete mode 100644 contrib/basic-decoder/check.h delete mode 100644 contrib/basic-decoder/gzfilebuf.h diff --git a/contrib/basic-decoder/.cproject b/contrib/basic-decoder/.cproject deleted file mode 100644 index 4f8302130..000000000 --- a/contrib/basic-decoder/.cproject +++ /dev/null @@ -1,149 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/basic-decoder/.dep.inc b/contrib/basic-decoder/.dep.inc deleted file mode 100644 index 4560e55af..000000000 --- a/contrib/basic-decoder/.dep.inc +++ /dev/null @@ -1,5 +0,0 @@ -# This code depends on make tool being used -DEPFILES=$(wildcard $(addsuffix .d, ${OBJECTFILES})) -ifneq (${DEPFILES},) -include ${DEPFILES} -endif diff --git a/contrib/basic-decoder/.gitignore b/contrib/basic-decoder/.gitignore deleted file mode 100644 index 711b91904..000000000 --- a/contrib/basic-decoder/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/Debug -/Debug -/Debug diff --git a/contrib/basic-decoder/.project b/contrib/basic-decoder/.project deleted file mode 100644 index f9dfaa15a..000000000 --- a/contrib/basic-decoder/.project +++ /dev/null @@ -1,27 +0,0 @@ - - - basic-decoder - - - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - diff --git a/contrib/basic-decoder/FF/DistortionScoreProducer.cpp b/contrib/basic-decoder/FF/DistortionScoreProducer.cpp deleted file mode 100644 index a07ce78aa..000000000 --- a/contrib/basic-decoder/FF/DistortionScoreProducer.cpp +++ /dev/null @@ -1,82 +0,0 @@ - -#include -#include "DistortionScoreProducer.h" -#include "TypeDef.h" -#include "Sentence.h" -#include "WordsBitmap.h" -#include "Search/Hypothesis.h" - -using namespace std; - - -///////////////////////////////////////////////////////////////////////////////////////////////////////// - -DistortionScoreProducer::DistortionScoreProducer(const std::string &line) - :StatefulFeatureFunction(line) -{ - ReadParameters(); -} - -size_t DistortionScoreProducer::Evaluate(const Hypothesis& hypo, - size_t prevState, - Scores &scores) const -{ - const WordsRange &range = hypo.GetRange(); - const WordsBitmap &coverage = hypo.GetCoverage(); - - const Hypothesis *prevHypo = hypo.GetPrevHypo(); - assert(prevHypo); - const WordsRange &prevRange = prevHypo->GetRange(); - - SCORE score = ComputeDistortionScore(prevRange, range); - scores.Add(*this, score); - - size_t firstGap = coverage.GetFirstGapPos(); - - size_t ret = range.GetHash(); - boost::hash_combine(ret, firstGap); - - return ret; -} - -SCORE DistortionScoreProducer::ComputeDistortionScore(const WordsRange &prev, const WordsRange &curr) const -{ - SCORE ret = (SCORE) prev.ComputeDistortionScore(curr); - return ret; -} - -SCORE DistortionScoreProducer::CalculateDistortionScore_MooreAndQuick(const Hypothesis& hypo, - const WordsRange &prev, const WordsRange &curr, int firstGap) -{ - /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007 - Definitions: - S : current source range - S' : last translated source phrase range - S'' : longest fully-translated initial segment - */ - - int prefixEndPos = (int)firstGap-1; - if((int)firstGap==-1) - prefixEndPos = -1; - - // case1: S is adjacent to S'' => return 0 - if ((int) curr.startPos == prefixEndPos+1) { - return 0; - } - - // case2: S is to the left of S' => return 2(length(S)) - if ((int) curr.endPos < (int) prev.endPos) { - return (SCORE) -2*(int)curr.GetNumWordsCovered(); - } - - // case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S)) - if ((int) prev.endPos <= prefixEndPos) { - int z = (int)curr.startPos-prefixEndPos - 1; - return (SCORE) -2*(z + (int)curr.GetNumWordsCovered()); - } - - // case4: otherwise => return 2(nbWordBetween(S,S')+length(S)) - return (SCORE) -2*((int)curr.GetNumWordsBetween(prev) + (int)curr.GetNumWordsCovered()); - - -} diff --git a/contrib/basic-decoder/FF/DistortionScoreProducer.h b/contrib/basic-decoder/FF/DistortionScoreProducer.h deleted file mode 100644 index 38a856467..000000000 --- a/contrib/basic-decoder/FF/DistortionScoreProducer.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -#include -#include -#include "StatefulFeatureFunction.h" -#include "WordsRange.h" - -/** Calculates Distortion scores - */ -class DistortionScoreProducer : public StatefulFeatureFunction -{ -public: - DistortionScoreProducer(const std::string &line); - - virtual void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , Scores &scores - , Scores &estimatedFutureScore) const { - } - - size_t Evaluate( - const Hypothesis& hypo, - size_t prevState, - Scores &scores) const; - -protected: - SCORE ComputeDistortionScore(const WordsRange &prev, const WordsRange &curr) const; - - SCORE CalculateDistortionScore_MooreAndQuick(const Hypothesis& hypo, - const WordsRange &prevRange, - const WordsRange &currRange, - int firstGap); - -}; - diff --git a/contrib/basic-decoder/FF/FeatureFunction.cpp b/contrib/basic-decoder/FF/FeatureFunction.cpp deleted file mode 100644 index 33a2e085e..000000000 --- a/contrib/basic-decoder/FF/FeatureFunction.cpp +++ /dev/null @@ -1,131 +0,0 @@ - -#include -#include "FeatureFunction.h" -#include "Util.h" -#include "TargetPhrase.h" -#include "check.h" - -using namespace std; - -std::vector FeatureFunction::s_staticColl; -size_t FeatureFunction::s_nextInd = 0; -std::map FeatureFunction::m_nameInd; - -FeatureFunction::FeatureFunction(const std::string line) - : m_numScores(1) -{ - s_staticColl.push_back(this); - - std::string featureName; - - ParseLine(line, featureName); - CreateName(featureName); - - Register(); - cerr << m_name << "=" << m_startInd << "-" << (m_startInd+m_numScores-1) << endl; -} - -FeatureFunction::~FeatureFunction() -{ - // TODO Auto-generated destructor stub -} - -void FeatureFunction::ReadParameters() -{ - while (!m_args.empty()) { - const vector &args = m_args[0]; - SetParameter(args[0], args[1]); - - m_args.erase(m_args.begin()); - } - -} - -void FeatureFunction::SetParameter(const std::string& key, const std::string& value) -{ - -} - -void FeatureFunction::ParseLine(const std::string &line, std::string &featureName) -{ - vector toks; - Tokenize(toks, line); - - featureName = toks[0]; - - for (size_t i = 1; i < toks.size(); ++i) { - vector args = TokenizeFirstOnly(toks[i], "="); - CHECK(args.size() == 2); - - if (args[0] == "num-features") { - m_numScores = Scan(args[1]); - } else if (args[0] == "name") { - m_name = args[1]; - } else { - m_args.push_back(args); - } - } -} - -void FeatureFunction::CreateName(const std::string &featureName) -{ - if (m_name.empty()) { - std::map::const_iterator iter; - iter = m_nameInd.find(featureName); - if (iter == m_nameInd.end()) { - m_nameInd[featureName] = 0; - m_name = featureName + SPrint(0); - } else { - size_t num = iter->second; - m_name = featureName + SPrint(num); - } - } -} - -void FeatureFunction::Register() -{ - m_startInd = s_nextInd; - s_nextInd += m_numScores; -} - - - -FeatureFunction &FeatureFunction::FindFeatureFunction(const std::string& name) -{ - for (size_t i = 0; i < s_staticColl.size(); ++i) { - FeatureFunction &ff = *s_staticColl[i]; - if (ff.GetName() == name) { - return ff; - } - } - - throw "Unknown feature " + name; -} - -void FeatureFunction::Evaluate(const Phrase &source - , TargetPhrase &targetPhrase - , Scores &estimatedFutureScore) -{ - Scores &scores = targetPhrase.GetScores(); - for (size_t i = 0; i < s_staticColl.size(); ++i) { - FeatureFunction &ff = *s_staticColl[i]; - ff.Evaluate(source, targetPhrase, scores, estimatedFutureScore); - } - -} - -void FeatureFunction::Initialize(const Sentence &source) -{ - for (size_t i = 0; i < s_staticColl.size(); ++i) { - FeatureFunction &ff = *s_staticColl[i]; - ff.InitializeForInput(source); - } -} - -void FeatureFunction::CleanUp(const Sentence &source) -{ - for (size_t i = 0; i < s_staticColl.size(); ++i) { - FeatureFunction &ff = *s_staticColl[i]; - ff.CleanUpAfterSentenceProcessing(source); - } -} diff --git a/contrib/basic-decoder/FF/FeatureFunction.h b/contrib/basic-decoder/FF/FeatureFunction.h deleted file mode 100644 index 829b91baf..000000000 --- a/contrib/basic-decoder/FF/FeatureFunction.h +++ /dev/null @@ -1,74 +0,0 @@ - -#pragma once - -#include -#include -#include - -class Phrase; -class TargetPhrase; -class Scores; -class Sentence; - -class FeatureFunction -{ -public: - static const std::vector& GetColl() { - return s_staticColl; - } - static FeatureFunction &FindFeatureFunction(const std::string& name); - static void Evaluate(const Phrase &source - , TargetPhrase &targetPhrase - , Scores &estimatedFutureScore); - static void Initialize(const Sentence &source); - static void CleanUp(const Sentence &source); - - static size_t GetTotalNumScores() { - return s_nextInd; - } - - FeatureFunction(const std::string line); - virtual ~FeatureFunction(); - - virtual void Load() - {} - - virtual void InitializeForInput(const Sentence &source) - {} - - virtual void CleanUpAfterSentenceProcessing(const Sentence &source) - {} - - virtual void ReadParameters(); - virtual void SetParameter(const std::string& key, const std::string& value); - - virtual void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , Scores &scores - , Scores &estimatedFutureScore) const = 0; - - size_t GetStartInd() const { - return m_startInd; - } - size_t GetNumScores() const { - return m_numScores; - } - const std::string &GetName() const { - return m_name; - } - -protected: - static std::vector s_staticColl; - static size_t s_nextInd; - static std::map m_nameInd; - - std::vector > m_args; - size_t m_numScores, m_startInd; - std::string m_name; - - void ParseLine(const std::string &line, std::string &featureName); - void CreateName(const std::string &featureName); - void Register(); - -}; - diff --git a/contrib/basic-decoder/FF/LM/InternalLM.cpp b/contrib/basic-decoder/FF/LM/InternalLM.cpp deleted file mode 100644 index d054389a5..000000000 --- a/contrib/basic-decoder/FF/LM/InternalLM.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include -#include -#include "InternalLM.h" -#include "InputFileStream.h" -#include "Util.h" -#include "MyVocab.h" -#include "Phrase.h" - -using namespace std; - -namespace FastMoses -{ - -//////////////////////////////////////////////////////////////////////////////// -InternalLMNode *InternalLMNode::GetOrCreateNode(VOCABID vocabId) -{ - Children::iterator iter; - iter = m_children.find(vocabId); - if (iter == m_children.end()) { - return &m_children[vocabId]; - } else { - InternalLMNode *node = &iter->second; - return node; - } -} - -const InternalLMNode *InternalLMNode::Get(VOCABID vocabId) const -{ - Children::const_iterator iter; - iter = m_children.find(vocabId); - if (iter == m_children.end()) { - return NULL; - } else { - const InternalLMNode *node = &iter->second; - return node; - } -} - - -////////////////////////////////////////////////////////////////////////////// -InternalLM::InternalLM(const std::string &line) - :LM(line) - ,m_lastNode(NULL) -{ - ReadParameters(); -} - -void InternalLM::Load() -{ - // 1st, set prob for root - m_node.score = 0; - m_node.logBackOff = 0; - - Moses::InputFileStream iniStrme(m_path); - - vector toks; - size_t lineNum = 0; - string line; - while (getline(iniStrme, line)) { - lineNum++; - if (lineNum % 1000000 == 0) { - cerr << lineNum << " " << flush; - } - - if (line.size() != 0 && line.substr(0,1) != "\\") { - toks.clear(); - Tokenize(toks, line, "\t"); - - if (toks.size() >= 2) { - // split unigram/bigram trigrams - vector wordVec; - Tokenize(wordVec, toks[1], " "); - - // create / traverse down tree - InternalLMNode *node = &m_node; - for (int pos = (int) wordVec.size() - 1 ; pos >= 0 ; pos--) { - const string &wordStr = wordVec[pos]; - VOCABID vocabId = MyVocab::Instance().GetOrCreateId(wordStr); - node = node->GetOrCreateNode(vocabId); - assert(node); - } - assert(node); - - SCORE score = TransformSRIScore(Scan(toks[0])); - node->score = score; - if (toks.size() == 3) { - SCORE logBackOff = TransformSRIScore(Scan(toks[2])); - node->logBackOff = logBackOff; - } else { - node->logBackOff = 0; - } - } - } - } -} - -size_t InternalLM::GetLastState() const -{ - assert(m_lastNode); - size_t ret = (size_t) m_lastNode; - return ret; -} - -SCORE InternalLM::GetValue(const PhraseVec &phraseVec) const -{ - m_lastNode = &GetNode(phraseVec); - assert(m_lastNode); - return m_lastNode->score; -} - -const InternalLMNode &InternalLM::GetNode(const PhraseVec &phraseVec) const -{ - size_t size = phraseVec.size(); - - const InternalLMNode *node = &m_node; - const InternalLMNode *prevNode = node; - for (int pos = (int) size - 1 ; pos >= 0 ; pos--) { - const Word &word = *phraseVec[pos]; - VOCABID vocabId = word.GetVocab(); - node = node->Get(vocabId); - - if (node) { - prevNode = node; - } else { - node = prevNode; - break; - } - } - - return *node; -} - -void InternalLM::SetParameter(const std::string& key, const std::string& value) -{ - if (key == "path") { - m_path = value; - } else { - LM::SetParameter(key, value); - } -} - - -} diff --git a/contrib/basic-decoder/FF/LM/InternalLM.h b/contrib/basic-decoder/FF/LM/InternalLM.h deleted file mode 100644 index 4ff75489f..000000000 --- a/contrib/basic-decoder/FF/LM/InternalLM.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once -#include -#include -#include "LM.h" - -namespace FastMoses -{ - -class InternalLMNode -{ -public: - typedef boost::unordered_map Children; - - InternalLMNode *GetOrCreateNode(VOCABID vocabId); - const InternalLMNode *Get(VOCABID vocabId) const; - - SCORE score, logBackOff; -protected: - Children m_children; -}; - -class InternalLM : public LM -{ -public: - InternalLM(const std::string &line); - void Load(); - virtual size_t GetLastState() const; - - void SetParameter(const std::string& key, const std::string& value); - -protected: - InternalLMNode m_node; - std::string m_path; - const InternalLMNode &GetNode(const PhraseVec &phraseVec) const; - - mutable const InternalLMNode *m_lastNode; - - virtual SCORE GetValue(const PhraseVec &phraseVec) const; -}; - -} diff --git a/contrib/basic-decoder/FF/LM/LM.cpp b/contrib/basic-decoder/FF/LM/LM.cpp deleted file mode 100644 index 5fa62c79a..000000000 --- a/contrib/basic-decoder/FF/LM/LM.cpp +++ /dev/null @@ -1,164 +0,0 @@ -#include -#include -#include "LM.h" -#include "Util.h" -#include "TargetPhrase.h" -#include "MyVocab.h" -#include "Search/Hypothesis.h" - -using namespace std; - -namespace FastMoses -{ - -LM::LM(const std::string &line) - :StatefulFeatureFunction(line) -{ - m_bos.CreateFromString(""); - m_eos.CreateFromString(""); -} - -void LM::Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , Scores &scores - , Scores &estimatedFutureScore) const -{ - SCORE all = 0, ngram = 0; - - PhraseVec phraseVec; - phraseVec.reserve(m_order); - for (size_t pos = 0; pos < targetPhrase.GetSize(); ++pos) { - const Word &word = targetPhrase.GetWord(pos); - ShiftOrPush(phraseVec, word); - SCORE score = GetValueCache(phraseVec); - - all += score; - if (phraseVec.size() == m_order) { - ngram += score; - } - } - - SCORE estimated = all - ngram; - scores.Add(*this, ngram); - estimatedFutureScore.Add(*this, estimated); -} - -size_t LM::Evaluate( - const Hypothesis& hypo, - size_t prevState, - Scores &scores) const -{ - if (m_order <= 1) { - return 0; // not sure if returning NULL is correct - } - - if (hypo.targetPhrase.GetSize() == 0) { - return 0; // not sure if returning NULL is correct - } - - PhraseVec m_phraseVec(m_order); - - const size_t currEndPos = hypo.targetRange.endPos; - const size_t startPos = hypo.targetRange.startPos; - - size_t index = 0; - for (int currPos = (int) startPos - (int) m_order + 1 ; currPos <= (int) startPos ; currPos++) { - if (currPos >= 0) - m_phraseVec[index++] = &hypo.GetWord(currPos); - else { - m_phraseVec[index++] = &m_bos; - } - } - - SCORE lmScore = GetValueCache(m_phraseVec); - - // main loop - size_t endPos = std::min(startPos + m_order - 2 - , currEndPos); - for (size_t currPos = startPos + 1 ; currPos <= endPos ; currPos++) { - // shift all args down 1 place - for (size_t i = 0 ; i < m_order - 1 ; i++) - m_phraseVec[i] = m_phraseVec[i + 1]; - - // add last factor - m_phraseVec.back() = &hypo.GetWord(currPos); - - lmScore += GetValueCache(m_phraseVec); - } - - // end of sentence - if (hypo.GetCoverage().IsComplete()) { - const size_t size = hypo.GetSize(); - m_phraseVec.back() = &m_eos; - - for (size_t i = 0 ; i < m_order - 1 ; i ++) { - int currPos = (int)(size - m_order + i + 1); - if (currPos < 0) - m_phraseVec[i] = &m_bos; - else - m_phraseVec[i] = &hypo.GetWord((size_t)currPos); - } - lmScore += GetValueCache(m_phraseVec); - } else { - if (endPos < currEndPos) { - //need to get the LM state (otherwise the last LM state is fine) - for (size_t currPos = endPos+1; currPos <= currEndPos; currPos++) { - for (size_t i = 0 ; i < m_order - 1 ; i++) - m_phraseVec[i] = m_phraseVec[i + 1]; - m_phraseVec.back() = &hypo.GetWord(currPos); - } - } - } - - size_t state = GetLastState(); - return state; -} - -SCORE LM::GetValueCache(const PhraseVec &phraseVec) const -{ - SCORE score = GetValue(phraseVec); - return score; - - size_t hash = 0; - for (size_t i = 0; i < phraseVec.size(); ++i) { - VOCABID vocabId = phraseVec[i]->GetVocab(); - boost::hash_combine(hash, vocabId); - } - - Cache::const_iterator iter; - iter = m_cache.find(hash); - if (iter != m_cache.end()) { - return iter->second; - } - else { - SCORE score = GetValue(phraseVec); - m_cache[hash] = score; - return score; - } -} - -void LM::ShiftOrPush(PhraseVec &phraseVec, const Word &word) const -{ - if (phraseVec.size() < m_order) { - phraseVec.push_back(&word); - } else { - // shift - for (size_t currNGramOrder = 0 ; currNGramOrder < m_order - 1 ; currNGramOrder++) { - phraseVec[currNGramOrder] = phraseVec[currNGramOrder + 1]; - } - phraseVec[m_order - 1] = &word; - } -} - -void LM::SetParameter(const std::string& key, const std::string& value) -{ - if (key == "order") { - m_order = Scan(value); - } else { - StatefulFeatureFunction::SetParameter(key, value); - } - -} - -} - diff --git a/contrib/basic-decoder/FF/LM/LM.h b/contrib/basic-decoder/FF/LM/LM.h deleted file mode 100644 index f74c0dad2..000000000 --- a/contrib/basic-decoder/FF/LM/LM.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include -#include "FF/StatefulFeatureFunction.h" -#include "TypeDef.h" -#include "Phrase.h" - - -namespace FastMoses -{ - -class LM : public StatefulFeatureFunction -{ -public: - LM(const std::string &line); - - virtual size_t GetLastState() const = 0; - - virtual void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , Scores &scores - , Scores &estimatedFutureScore) const; - - virtual size_t Evaluate( - const Hypothesis& hypo, - size_t prevState, - Scores &scores) const; - - void SetParameter(const std::string& key, const std::string& value); - -protected: - size_t m_order; - Word m_bos, m_eos; - - typedef boost::unordered_map Cache; - mutable Cache m_cache; - - virtual SCORE GetValue(const PhraseVec &phraseVec) const = 0; - SCORE GetValueCache(const PhraseVec &phraseVec) const; - void ShiftOrPush(PhraseVec &phraseVec, const Word &word) const; -}; - -} diff --git a/contrib/basic-decoder/FF/LM/SRILM.cpp b/contrib/basic-decoder/FF/LM/SRILM.cpp deleted file mode 100644 index 923be1f75..000000000 --- a/contrib/basic-decoder/FF/LM/SRILM.cpp +++ /dev/null @@ -1,139 +0,0 @@ - -#include -#include -#include "SRILM.h" -#include "MyVocab.h" -#include "Util.h" - -using namespace std; - -#define MAX_NGRAM_SIZE 10 - -namespace FastMoses -{ - -SRILM::SRILM(const string &line) - :LM(line) -{ - ReadParameters(); - -} - -void SRILM::Load() -{ - m_srilmVocab = new Vocab(); - m_srilmModel = new Ngram(*m_srilmVocab, m_order); - - m_srilmModel->skipOOVs() = false; - - File file(m_path.c_str(), "r" ); - m_srilmModel->read(file); - - CreateVocab(); - m_unknownId = m_srilmVocab->unkIndex(); - -} - -void SRILM::CreateVocab() -{ - MyVocab &factorCollection = MyVocab::Instance(); - - std::map lmIdMap; - size_t maxFactorId = 0; // to create lookup vector later on - - VocabString str; - VocabIter iter(*m_srilmVocab); - while ( (str = iter.next()) != NULL) { - VocabIndex lmId = GetLmID(str); - VOCABID factorId = factorCollection.GetOrCreateId(str); - lmIdMap[factorId] = lmId; - maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; - } - - VOCABID factorId; - factorId = factorCollection.GetOrCreateId(""); - lmIdMap[factorId] = GetLmID(""); - maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; - - factorId = factorCollection.GetOrCreateId(""); - lmIdMap[factorId] = GetLmID(""); - maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; - - // add to lookup vector in object - m_lmIdLookup.resize(maxFactorId+1); - - fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_unknownId); - - map::iterator iterMap; - for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap) { - m_lmIdLookup[iterMap->first] = iterMap->second; - } - -} - -VocabIndex SRILM::GetLmID( const std::string &str ) const -{ - return m_srilmVocab->getIndex( str.c_str(), m_unknownId ); -} - -VocabIndex SRILM::GetLmID(VOCABID vocabId) const -{ - return ( vocabId >= m_lmIdLookup.size()) ? m_unknownId : m_lmIdLookup[vocabId]; -} - - -SCORE SRILM::GetValue(const PhraseVec &phraseVec) const -{ - size_t count = phraseVec.size(); - if (count <= 0) { - return 0; - } - - // set up context - VocabIndex context[MAX_NGRAM_SIZE]; - for (size_t i = 0 ; i < count - 1 ; i++) { - const Word &word = *phraseVec[count-2-i]; - VOCABID vocabId = word.GetVocab(); - - context[i] = GetLmID(vocabId); - } - context[count-1] = Vocab_None; - - assert(phraseVec[count-1] != NULL); - // call sri lm fn - VocabIndex lmId= GetLmID(phraseVec[count-1]->GetVocab()); - float ret = GetValue(lmId, context); - - for (int i = count - 2 ; i >= 0 ; i--) - context[i+1] = context[i]; - context[0] = lmId; - unsigned len; - m_lastState = m_srilmModel->contextID(context, len); - len++; - - return ret; -} - -float SRILM::GetValue(VocabIndex wordId, VocabIndex *context) const -{ - float p = m_srilmModel->wordProb( wordId, context ); - return FloorScore(TransformSRIScore(p)); // log10->log -} - - -size_t SRILM::GetLastState() const -{ - return (size_t)m_lastState; -} - -void SRILM::SetParameter(const std::string& key, const std::string& value) -{ - if (key == "path") { - m_path = value; - } else { - LM::SetParameter(key, value); - } -} - -} - diff --git a/contrib/basic-decoder/FF/LM/SRILM.h b/contrib/basic-decoder/FF/LM/SRILM.h deleted file mode 100644 index 528ee6416..000000000 --- a/contrib/basic-decoder/FF/LM/SRILM.h +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include "LM.h" -#include - -class Ngram; - -namespace FastMoses -{ - -class SRILM : public LM -{ -public: - SRILM(const std::string &line); - void Load(); - - virtual size_t GetLastState() const; - - void SetParameter(const std::string& key, const std::string& value); - -protected: - Vocab *m_srilmVocab; - Ngram *m_srilmModel; - std::string m_path; - VocabIndex m_unknownId; - std::vector m_lmIdLookup; - mutable void *m_lastState; - - void CreateVocab(); - VocabIndex GetLmID( const std::string &str ) const; - VocabIndex GetLmID(VOCABID vocabId) const; - float GetValue(VocabIndex wordId, VocabIndex *context) const; - - virtual SCORE GetValue(const PhraseVec &phraseVec) const; - -}; - -} diff --git a/contrib/basic-decoder/FF/PhrasePenalty.cpp b/contrib/basic-decoder/FF/PhrasePenalty.cpp deleted file mode 100644 index f456e0807..000000000 --- a/contrib/basic-decoder/FF/PhrasePenalty.cpp +++ /dev/null @@ -1,23 +0,0 @@ - -#include "PhrasePenalty.h" -#include "Scores.h" - -PhrasePenalty::PhrasePenalty(const std::string &line) - :StatelessFeatureFunction(line) -{ - // TODO Auto-generated constructor stub - -} - -PhrasePenalty::~PhrasePenalty() -{ - // TODO Auto-generated destructor stub -} - -void PhrasePenalty::Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , Scores &scores - , Scores &estimatedFutureScore) const -{ - scores.Add(*this, 1); -} diff --git a/contrib/basic-decoder/FF/PhrasePenalty.h b/contrib/basic-decoder/FF/PhrasePenalty.h deleted file mode 100644 index d761210e0..000000000 --- a/contrib/basic-decoder/FF/PhrasePenalty.h +++ /dev/null @@ -1,18 +0,0 @@ - -#pragma once - -#include "StatelessFeatureFunction.h" - -class PhrasePenalty : public StatelessFeatureFunction -{ -public: - PhrasePenalty(const std::string &line); - virtual ~PhrasePenalty(); - - void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , Scores &scores - , Scores &estimatedFutureScore) const; - -}; - diff --git a/contrib/basic-decoder/FF/StatefulFeatureFunction.cpp b/contrib/basic-decoder/FF/StatefulFeatureFunction.cpp deleted file mode 100644 index 5e28b09e4..000000000 --- a/contrib/basic-decoder/FF/StatefulFeatureFunction.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include "StatefulFeatureFunction.h" -#include "Search/Hypothesis.h" - -std::vector StatefulFeatureFunction::s_staticColl; - -StatefulFeatureFunction::StatefulFeatureFunction(const std::string line) - :FeatureFunction(line) -{ - s_staticColl.push_back(this); -} - -StatefulFeatureFunction::~StatefulFeatureFunction() -{ - // TODO Auto-generated destructor stub -} - -/////////////////////////////////////////////////// - -void StatefulFeatureFunction::Evaluate(Hypothesis& hypo) -{ - const Hypothesis &prevHypo = *hypo.GetPrevHypo(); - Scores &scores = hypo.GetScores(); - for (size_t i = 0; i < s_staticColl.size(); ++i) { - const StatefulFeatureFunction &ff = *s_staticColl[i]; - size_t prevFFState = prevHypo.GetState(i); - - size_t ffState = ff.Evaluate(hypo, prevFFState, scores); - assert(ffState); - hypo.SetState(i, ffState); - } -} - -void StatefulFeatureFunction::EvaluateEmptyHypo(const Sentence &input, Hypothesis& hypo) -{ - for (size_t i = 0; i < s_staticColl.size(); ++i) { - const StatefulFeatureFunction &ff = *s_staticColl[i]; - size_t ffState = ff.EmptyHypo(input, hypo); - hypo.SetState(i, ffState); - } -} - diff --git a/contrib/basic-decoder/FF/StatefulFeatureFunction.h b/contrib/basic-decoder/FF/StatefulFeatureFunction.h deleted file mode 100644 index 565014f1f..000000000 --- a/contrib/basic-decoder/FF/StatefulFeatureFunction.h +++ /dev/null @@ -1,36 +0,0 @@ - -#pragma once - -#include "FeatureFunction.h" - -class FFState; -class Hypothesis; -class Scores; - -class StatefulFeatureFunction : public FeatureFunction -{ -public: - static const std::vector& GetColl() { - return s_staticColl; - } - static void Evaluate(Hypothesis& hypo); - static void EvaluateEmptyHypo(const Sentence &input, Hypothesis& hypo); - - StatefulFeatureFunction(const std::string line); - virtual ~StatefulFeatureFunction(); - - virtual size_t Evaluate( - const Hypothesis& hypo, - size_t prevState, - Scores &scores) const = 0; - virtual size_t EmptyHypo( - const Sentence &input, - Hypothesis& hypo) const { - return 0; - } - -protected: - static std::vector s_staticColl; - -}; - diff --git a/contrib/basic-decoder/FF/StatelessFeatureFunction.cpp b/contrib/basic-decoder/FF/StatelessFeatureFunction.cpp deleted file mode 100644 index 9852e089a..000000000 --- a/contrib/basic-decoder/FF/StatelessFeatureFunction.cpp +++ /dev/null @@ -1,16 +0,0 @@ - -#include "StatelessFeatureFunction.h" - -std::vector StatelessFeatureFunction::s_staticColl; - -StatelessFeatureFunction::StatelessFeatureFunction(const std::string line) - :FeatureFunction(line) -{ - s_staticColl.push_back(this); -} - -StatelessFeatureFunction::~StatelessFeatureFunction() -{ - // TODO Auto-generated destructor stub -} - diff --git a/contrib/basic-decoder/FF/StatelessFeatureFunction.h b/contrib/basic-decoder/FF/StatelessFeatureFunction.h deleted file mode 100644 index 56489ac5c..000000000 --- a/contrib/basic-decoder/FF/StatelessFeatureFunction.h +++ /dev/null @@ -1,20 +0,0 @@ - -#pragma once - -#include "FeatureFunction.h" - -class StatelessFeatureFunction : public FeatureFunction -{ -public: - static const std::vector& GetColl() { - return s_staticColl; - } - - StatelessFeatureFunction(const std::string line); - virtual ~StatelessFeatureFunction(); - -protected: - static std::vector s_staticColl; - -}; - diff --git a/contrib/basic-decoder/FF/TranslationModel/Memory/Node.cpp b/contrib/basic-decoder/FF/TranslationModel/Memory/Node.cpp deleted file mode 100644 index 8a722c01e..000000000 --- a/contrib/basic-decoder/FF/TranslationModel/Memory/Node.cpp +++ /dev/null @@ -1,43 +0,0 @@ - -#include "Node.h" -#include "Phrase.h" - -Node::Node() -{ - // TODO Auto-generated constructor stub - -} - -Node::~Node() -{ - // TODO Auto-generated destructor stub -} - -Node &Node::GetOrCreate(const Phrase &source, size_t pos) -{ - if (pos == source.GetSize()) { - return *this; - } - - const Word &word = source.GetWord(pos); - Node &child = m_children[word]; - return child.GetOrCreate(source, pos + 1); -} - -const Node *Node::Get(const Word &word) const -{ - Children::const_iterator iter; - iter = m_children.find(word); - if (iter == m_children.end()) { - return NULL; - } - - // found child node - const Node &child = iter->second; - return &child; -} - -void Node::AddTarget(TargetPhrase *target) -{ - m_tpColl.Add(target); -} diff --git a/contrib/basic-decoder/FF/TranslationModel/Memory/Node.h b/contrib/basic-decoder/FF/TranslationModel/Memory/Node.h deleted file mode 100644 index 1370eb0fc..000000000 --- a/contrib/basic-decoder/FF/TranslationModel/Memory/Node.h +++ /dev/null @@ -1,32 +0,0 @@ - -#pragma once - -#include -#include "Word.h" -#include "TargetPhrase.h" -#include "TargetPhrases.h" - -class Phrase; - -class Node -{ -public: - typedef boost::unordered_map Children; - - Node(); - virtual ~Node(); - - Node &GetOrCreate(const Phrase &source, size_t pos); - const Node *Get(const Word &word) const; - - void AddTarget(TargetPhrase *target); - const TargetPhrases &GetTargetPhrases() const { - return m_tpColl; - } - -protected: - Children m_children; - TargetPhrases m_tpColl; -}; - - diff --git a/contrib/basic-decoder/FF/TranslationModel/PhraseTable.cpp b/contrib/basic-decoder/FF/TranslationModel/PhraseTable.cpp deleted file mode 100644 index dc0baec21..000000000 --- a/contrib/basic-decoder/FF/TranslationModel/PhraseTable.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/* - * PhraseTable.cpp - * - * Created on: 5 Oct 2013 - * Author: hieu - */ - -#include "PhraseTable.h" -#include "InputPath.h" - -std::vector PhraseTable::s_staticColl; -size_t PhraseTable::s_ptId = 0; - -PhraseTable::PhraseTable(const std::string line) - :StatelessFeatureFunction(line) - ,m_ptId(s_ptId++) -{ - s_staticColl.push_back(this); - -} - -PhraseTable::~PhraseTable() -{ - // TODO Auto-generated destructor stub -} - diff --git a/contrib/basic-decoder/FF/TranslationModel/PhraseTable.h b/contrib/basic-decoder/FF/TranslationModel/PhraseTable.h deleted file mode 100644 index 5cc5427a8..000000000 --- a/contrib/basic-decoder/FF/TranslationModel/PhraseTable.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * PhraseTable.h - * - * Created on: 5 Oct 2013 - * Author: hieu - */ - -#pragma once - -#include "FF/StatelessFeatureFunction.h" - -class InputPath; - -class PhraseTable :public StatelessFeatureFunction -{ -public: - static const std::vector& GetColl() { - return s_staticColl; - } - - PhraseTable(const std::string line); - virtual ~PhraseTable(); - - virtual void Lookup(const std::vector &inputPathQueue) = 0; -protected: - static std::vector s_staticColl; - static size_t s_ptId; - - size_t m_ptId; - -}; - diff --git a/contrib/basic-decoder/FF/TranslationModel/PhraseTableMemory.cpp b/contrib/basic-decoder/FF/TranslationModel/PhraseTableMemory.cpp deleted file mode 100644 index dd5649731..000000000 --- a/contrib/basic-decoder/FF/TranslationModel/PhraseTableMemory.cpp +++ /dev/null @@ -1,110 +0,0 @@ -/* - * PhraseTableMemory.cpp - * - * Created on: 5 Oct 2013 - * Author: hieu - */ - -#include -#include -#include "PhraseTableMemory.h" -#include "InputFileStream.h" -#include "Util.h" -#include "Phrase.h" -#include "TargetPhrase.h" -#include "InputPath.h" - -using namespace std; - - -PhraseTableMemory::PhraseTableMemory(const std::string &line) - :PhraseTable(line) - ,m_tableLimit(20) -{ - ReadParameters(); -} - -PhraseTableMemory::~PhraseTableMemory() -{ - // TODO Auto-generated destructor stub -} - -void PhraseTableMemory::Load() -{ - Scores *estimatedFutureScore = new Scores(); - - Moses::InputFileStream iniStrme(m_path); - - vector toks; - size_t lineNum = 0; - string line; - while (getline(iniStrme, line)) { - if (lineNum % 10000 == 0) { - cerr << lineNum << " " << flush; - } - toks.clear(); - TokenizeMultiCharSeparator(toks, line, "|||"); - - Phrase *source = Phrase::CreateFromString(toks[0]); - TargetPhrase *target = TargetPhrase::CreateFromString(*this, toks[1], toks[2], true); - FeatureFunction::Evaluate(*source, *target, *estimatedFutureScore); - - //cerr << target->Debug() << endl; - - Node &node = m_root.GetOrCreate(*source, 0); - node.AddTarget(target); - - ++lineNum; - } -} - -void PhraseTableMemory::SetParameter(const std::string& key, const std::string& value) -{ - if (key == "path") { - m_path = value; - } else if (key == "table-limit") { - m_tableLimit = Scan(value); - } else { - PhraseTable::SetParameter(key, value); - } -} - -void PhraseTableMemory::Lookup(const std::vector &inputPathQueue) -{ - for (size_t i = 0; i < inputPathQueue.size(); ++i) { - InputPath &path = *inputPathQueue[i]; - const InputPath *prevPath = path.GetPrevPath(); - - //cerr << path.GetPhrase().Debug() << endl; - - // which node to start the lookup - const Node *node; - if (prevPath) { - // get node from previous lookup. - // May be null --> don't lookup any further - node = (const Node *) prevPath->GetPtLookup(m_ptId).ptNode; - } else { - // 1st lookup. Start from root - node = &m_root; - } - - // where to store the info for this lookup - PhraseTableLookup &ptLookup = path.GetPtLookup(m_ptId); - if (node) { - // LOOKUP - // lookup the LAST word only - const Phrase &source = path.GetPhrase(); - const Word &lastWord = source.Back(); - - node = node->Get(lastWord); - } - - if (node) { - // found something - const TargetPhrases &tpColl = node->GetTargetPhrases(); - ptLookup.Set(&tpColl, node); - } else { - ptLookup.Set(NULL, NULL); - } - } -} diff --git a/contrib/basic-decoder/FF/TranslationModel/PhraseTableMemory.h b/contrib/basic-decoder/FF/TranslationModel/PhraseTableMemory.h deleted file mode 100644 index 60216c40b..000000000 --- a/contrib/basic-decoder/FF/TranslationModel/PhraseTableMemory.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * PhraseTableMemory.h - * - * Created on: 5 Oct 2013 - * Author: hieu - */ - -#pragma once - -#include -#include "PhraseTable.h" -#include "Memory/Node.h" - -class PhraseTableMemory: public PhraseTable -{ -public: - PhraseTableMemory(const std::string &line); - virtual ~PhraseTableMemory(); - - void Load(); - void SetParameter(const std::string& key, const std::string& value); - - virtual void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , Scores &scores - , Scores &estimatedFutureScore) const { - } - - void Lookup(const std::vector &inputPathQueue); -protected: - std::string m_path; - size_t m_tableLimit; - - Node m_root; - -}; - diff --git a/contrib/basic-decoder/FF/TranslationModel/UnknownWordPenalty.cpp b/contrib/basic-decoder/FF/TranslationModel/UnknownWordPenalty.cpp deleted file mode 100644 index 76c693488..000000000 --- a/contrib/basic-decoder/FF/TranslationModel/UnknownWordPenalty.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * UnknownWordPenalty.cpp - * - * Created on: 5 Oct 2013 - * Author: hieu - */ -#include -#include -#include "UnknownWordPenalty.h" -#include "InputPath.h" -#include "TargetPhrase.h" -#include "TargetPhrases.h" -#include "WordsRange.h" -#include "Util.h" -#include "TypeDef.h" -#include "FF/FeatureFunction.h" - -using namespace std; - -UnknownWordPenalty::UnknownWordPenalty(const std::string line) - :PhraseTable(line) -{ - ReadParameters(); - -} - -UnknownWordPenalty::~UnknownWordPenalty() -{ - // TODO Auto-generated destructor stub -} - -void UnknownWordPenalty::Lookup(const std::vector &inputPathQueue) -{ - Scores *estimatedFutureScore = new Scores(); - - for (size_t i = 0; i < inputPathQueue.size(); ++i) { - InputPath &path = *inputPathQueue[i]; - PhraseTableLookup &ptLookup = path.GetPtLookup(m_ptId); - - const Phrase &source = path.GetPhrase(); - if (source.GetSize() == 1) { - const Word &sourceWord = source.GetWord(0); - string str = sourceWord.ToString(); - str = "UNK:" + str + ":UNK"; - - Word targetWord; - targetWord.CreateFromString(str); - - TargetPhrase *tp = new TargetPhrase(1); - tp->Set(0, targetWord); - tp->GetScores().Add(*this, LOWEST_SCORE); - - FeatureFunction::Evaluate(source, *tp, *estimatedFutureScore); - - TargetPhrases *tpColl = new TargetPhrases(); - m_targetPhrases.push_back(tpColl); - tpColl->Add(tp); - - ptLookup.Set(tpColl, NULL); - } else { - ptLookup.Set(NULL, NULL); - } - } -} - -void UnknownWordPenalty::CleanUpAfterSentenceProcessing(const Sentence &source) -{ - m_targetPhrases.clear(); -} - diff --git a/contrib/basic-decoder/FF/TranslationModel/UnknownWordPenalty.h b/contrib/basic-decoder/FF/TranslationModel/UnknownWordPenalty.h deleted file mode 100644 index cd7945bf6..000000000 --- a/contrib/basic-decoder/FF/TranslationModel/UnknownWordPenalty.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * UnknownWordPenalty.h - * - * Created on: 5 Oct 2013 - * Author: hieu - */ -#pragma once - -#include "PhraseTable.h" - -class TargetPhrases; - -class UnknownWordPenalty: public PhraseTable -{ -public: - UnknownWordPenalty(const std::string line); - virtual ~UnknownWordPenalty(); - - void CleanUpAfterSentenceProcessing(const Sentence &source); - - virtual void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , Scores &scores - , Scores &estimatedFutureScore) const { - } - - void Lookup(const std::vector &inputPathQueue); - -protected: - std::vector m_targetPhrases; -}; - diff --git a/contrib/basic-decoder/FF/WordPenaltyProducer.cpp b/contrib/basic-decoder/FF/WordPenaltyProducer.cpp deleted file mode 100644 index 3896dda79..000000000 --- a/contrib/basic-decoder/FF/WordPenaltyProducer.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "WordPenaltyProducer.h" -#include "TargetPhrase.h" - -WordPenaltyProducer::WordPenaltyProducer(const std::string &line) - :StatelessFeatureFunction(line) -{ - ReadParameters(); -} - -void WordPenaltyProducer::Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , Scores &scores - , Scores &estimatedFutureScore) const -{ - SCORE numWords = - (SCORE) targetPhrase.GetSize(); - scores.Add(*this, numWords); -} diff --git a/contrib/basic-decoder/FF/WordPenaltyProducer.h b/contrib/basic-decoder/FF/WordPenaltyProducer.h deleted file mode 100644 index 187ec072f..000000000 --- a/contrib/basic-decoder/FF/WordPenaltyProducer.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include -#include "StatelessFeatureFunction.h" - - -class WordPenaltyProducer : public StatelessFeatureFunction -{ -public: - WordPenaltyProducer(const std::string &line); - - virtual void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , Scores &scores - , Scores &estimatedFutureScore) const; - - -}; - - diff --git a/contrib/basic-decoder/Global.cpp b/contrib/basic-decoder/Global.cpp deleted file mode 100644 index 24d611893..000000000 --- a/contrib/basic-decoder/Global.cpp +++ /dev/null @@ -1,172 +0,0 @@ - -#include -#include "Global.h" -#include "InputFileStream.h" -#include "Util.h" -#include "check.h" - -#include "FF/FeatureFunction.h" -#include "FF/DistortionScoreProducer.h" -#include "FF/WordPenaltyProducer.h" -#include "FF/PhrasePenalty.h" -#include "FF/TranslationModel/PhraseTableMemory.h" -#include "FF/TranslationModel/UnknownWordPenalty.h" -#include "FF/LM/InternalLM.h" -#include "FF/LM/SRILM.h" - -using namespace std; - -Global Global::s_instance; - -Global::Global() -{} - -Global::~Global() -{ - // TODO Auto-generated destructor stub -} - -void Global::Init(int argc, char** argv) -{ - for (int i = 0; i < argc; ++i) { - string arg = argv[i]; - if (arg == "-f") { - m_iniPath = argv[++i]; - } else if (arg == "-i") { - m_inputPath = argv[++i]; - } - } - - // input file - if (m_inputPath.empty()) { - m_inputStrme = &cin; - } else { - m_inputStrme = new Moses::InputFileStream(m_inputPath); - } - - // read ini file - Moses::InputFileStream iniStrme(m_iniPath); - - ParamList *paramList = NULL; - string line; - while (getline(iniStrme, line)) { - line = Trim(line); - if (line.find("[") == 0) { - paramList = &m_params[line]; - } else if (line.find("#") == 0 || line.empty()) { - // do nothing - } else { - paramList->push_back(line); - } - } - - timer.check("InitParams"); - InitParams(); - timer.check("InitFF"); - InitFF(); - timer.check("InitWeight"); - InitWeight(); - timer.check("Start Load"); - Load(); - timer.check("Finished Load"); - -} - -bool Global::ParamExist(const std::string &key) const -{ - Params::const_iterator iter; - iter = m_params.find(key); - bool ret = (iter != m_params.end()); - return ret; -} - -void Global::InitParams() -{ - if (ParamExist("[stack]")) { - stackSize = Scan(m_params["[stack]"][0]); - } else { - stackSize = 200; - } - - if (ParamExist("[distortion-limit]")) { - maxDistortion = Scan(m_params["[distortion-limit]"][0]); - } else { - maxDistortion = 6; - } -} - -void Global::InitFF() -{ - ParamList &list = m_params["[feature]"]; - - for (size_t i = 0; i < list.size(); ++i) { - string &line = list[i]; - cerr << "line=" << line << endl; - - FeatureFunction *ff = NULL; - if (line.find("Distortion") == 0) { - ff = new DistortionScoreProducer(line); - } else if (line.find("WordPenalty") == 0) { - ff = new WordPenaltyProducer(line); - } else if (line.find("PhraseDictionaryMemory") == 0) { - ff = new PhraseTableMemory(line); - } else if (line.find("UnknownWordPenalty") == 0) { - ff = new UnknownWordPenalty(line); - } else if (line.find("PhrasePenalty") == 0) { - ff = new PhrasePenalty(line); - } else if (line.find("InternalLM") == 0) { - ff = new FastMoses::InternalLM(line); - } else if (line.find("SRILM") == 0) { - ff = new FastMoses::SRILM(line); - } else { - cerr << "Unknown FF " << line << endl; - abort(); - } - } -} - -void Global::InitWeight() -{ - weights.SetNumScores(FeatureFunction::GetTotalNumScores()); - ParamList &list = m_params["[weight]"]; - - for (size_t i = 0; i < list.size(); ++i) { - string &line = list[i]; - cerr << "line=" << line << endl; - - vector toks = TokenizeFirstOnly(line, "="); - CHECK(toks.size() == 2); - - FeatureFunction &ff = FeatureFunction::FindFeatureFunction(toks[0]); - - vector featureWeights; - Tokenize(featureWeights, toks[1]); - CHECK(ff.GetNumScores() == featureWeights.size()); - weights.SetWeights(ff, featureWeights); - } -} - -void Global::Load() -{ - std::vector pts; - - cerr << "Loading" << endl; - const std::vector &ffs = FeatureFunction::GetColl(); - for (size_t i = 0; i < ffs.size(); ++i) { - FeatureFunction *ff = ffs[i]; - PhraseTable *pt = dynamic_cast(ff); - if (pt) { - // load pt after other ff - pts.push_back(pt); - } else { - cerr << ff->GetName() << endl; - ff->Load(); - } - } - - // load pt - for (size_t i = 0; i < pts.size(); ++i) { - cerr << pts[i]->GetName() << endl; - pts[i]->Load(); - } -} diff --git a/contrib/basic-decoder/Global.h b/contrib/basic-decoder/Global.h deleted file mode 100644 index 9ea71859d..000000000 --- a/contrib/basic-decoder/Global.h +++ /dev/null @@ -1,59 +0,0 @@ - -#pragma once - -#include -#include -#include -#include -#include "Weights.h" -#include "Timer.h" - -namespace Moses -{ -class InputFileStream; -} - -class FeatureFunction; - -class Global -{ -public: - static const Global &Instance() { - return s_instance; - } - static Global &InstanceNonConst() { - return s_instance; - } - - Global(); - virtual ~Global(); - void Init(int argc, char** argv); - - std::istream &GetInputStream() const { - return *m_inputStrme; - } - - size_t stackSize; - int maxDistortion; - - Weights weights; - mutable Moses::Timer timer; - -protected: - static Global s_instance; - std::string m_iniPath, m_inputPath; - - mutable std::istream *m_inputStrme; - - typedef std::vector ParamList; - typedef std::map Params; - Params m_params; - - void InitParams(); - void InitFF(); - void InitWeight(); - void Load(); - - bool ParamExist(const std::string &key) const; -}; - diff --git a/contrib/basic-decoder/InputFileStream.cpp b/contrib/basic-decoder/InputFileStream.cpp deleted file mode 100644 index d2d774bf1..000000000 --- a/contrib/basic-decoder/InputFileStream.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// $Id$ - -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#include "InputFileStream.h" -#include "gzfilebuf.h" -#include - -using namespace std; - -namespace Moses -{ -InputFileStream::InputFileStream(const std::string &filePath) - : std::istream(NULL) - , m_streambuf(NULL) -{ - if (filePath.size() > 3 && - filePath.substr(filePath.size() - 3, 3) == ".gz") { - m_streambuf = new gzfilebuf(filePath.c_str()); - } else { - std::filebuf* fb = new std::filebuf(); - fb = fb->open(filePath.c_str(), std::ios::in); - if (! fb) { - cerr << "Can't read " << filePath.c_str() << endl; - exit(1); - } - m_streambuf = fb; - } - this->init(m_streambuf); -} - -InputFileStream::~InputFileStream() -{ - delete m_streambuf; - m_streambuf = NULL; -} - -void InputFileStream::Close() -{ -} - - -} - diff --git a/contrib/basic-decoder/InputFileStream.h b/contrib/basic-decoder/InputFileStream.h deleted file mode 100644 index 4b2139c80..000000000 --- a/contrib/basic-decoder/InputFileStream.h +++ /dev/null @@ -1,46 +0,0 @@ -// $Id$ - -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#pragma once - -#include -#include -#include - -namespace Moses -{ - -/** Used in place of std::istream, can read zipped files if it ends in .gz -*/ -class InputFileStream : public std::istream -{ -protected: - std::streambuf *m_streambuf; -public: - - InputFileStream(const std::string &filePath); - ~InputFileStream(); - - void Close(); -}; - -} - diff --git a/contrib/basic-decoder/InputPath.cpp b/contrib/basic-decoder/InputPath.cpp deleted file mode 100644 index 92307c200..000000000 --- a/contrib/basic-decoder/InputPath.cpp +++ /dev/null @@ -1,19 +0,0 @@ - -#include -#include "InputPath.h" -#include "WordsRange.h" -#include "FF/TranslationModel/PhraseTable.h" - -InputPath::InputPath(const InputPath *prevPath, const Phrase *phrase, size_t endPos) - :m_lookupColl(PhraseTable::GetColl().size()) - ,m_prevPath(prevPath) - ,m_phrase(phrase) -{ - size_t startPos = prevPath ? prevPath->GetRange().startPos : endPos; - m_range = new WordsRange(startPos, endPos); -} - -InputPath::~InputPath() -{ -} - diff --git a/contrib/basic-decoder/InputPath.h b/contrib/basic-decoder/InputPath.h deleted file mode 100644 index 1a027df1d..000000000 --- a/contrib/basic-decoder/InputPath.h +++ /dev/null @@ -1,51 +0,0 @@ - -#pragma once - -#include -#include "Phrase.h" - -class WordsRange; -class TargetPhrases; - -struct PhraseTableLookup { - const TargetPhrases *tpColl; - const void *ptNode; - - void Set(const TargetPhrases *tpColl, const void *ptNode) { - this->tpColl = tpColl; - this->ptNode = ptNode; - } -}; - -class InputPath -{ -public: - InputPath(const InputPath *prevPath, const Phrase *phrase, size_t endPos); - virtual ~InputPath(); - - const Phrase &GetPhrase() const { - return *m_phrase; - } - - const PhraseTableLookup &GetPtLookup(size_t ptId) const { - return m_lookupColl[ptId]; - } - PhraseTableLookup &GetPtLookup(size_t ptId) { - return m_lookupColl[ptId]; - } - - const InputPath *GetPrevPath() const { - return m_prevPath; - } - - const WordsRange &GetRange() const { - return *m_range; - } - -protected: - const InputPath *m_prevPath; - const Phrase *m_phrase; - const WordsRange *m_range; - std::vector m_lookupColl; // arranged by pt -}; - diff --git a/contrib/basic-decoder/Main.cpp b/contrib/basic-decoder/Main.cpp deleted file mode 100644 index 597654644..000000000 --- a/contrib/basic-decoder/Main.cpp +++ /dev/null @@ -1,57 +0,0 @@ - -#include -#include -#include "Sentence.h" -#include "Global.h" -#include "Util.h" -#include "Search/Manager.h" - -using namespace std; - -void temp(); - -int main(int argc, char** argv) -{ - //temp(); - - Fix(cerr, 3); - - Global &global = Global::InstanceNonConst(); - global.timer.start("Starting..."); - - global.Init(argc, argv); - - global.timer.check("Ready for input:"); - - string line; - while (getline(global.GetInputStream(), line)) { - if (line == "EXIT") { - break; - } - - Sentence *input = Sentence::CreateFromString(line); - cerr << "input=" << input->Debug() << endl; - - Manager manager(*input); - - const Hypothesis *hypo = manager.GetHypothesis(); - if (hypo) { - cerr << "TRANSLATION FOUND" << hypo->Debug() << endl; - hypo->Output(cout); - } else { - cerr << "NO BEST TRANSLATION" << endl; - } - cout << endl; - - cerr << "Ready for input:" << endl; - } - - cerr << "Shutting down" << endl; - - cerr << "hypotheses created=" << Hypothesis::GetNumHypothesesCreated() << endl; - - global.timer.check("Finished"); -} - - - diff --git a/contrib/basic-decoder/Makefile b/contrib/basic-decoder/Makefile deleted file mode 100644 index 86f380dc0..000000000 --- a/contrib/basic-decoder/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -UNAME_S := $(shell uname -s) - -BOOST_DIR = $(PWD)/../3rdparty/boost -BOOST_LIB = $(BOOST_DIR)/lib64 -SRI_DIR = $(PWD)/../3rdparty/srilm -SRI_MACHINE_TYPE=$(shell $(SRI_DIR)/sbin/machine-type) -SRI_LIB = $(SRI_DIR)/lib/$(SRI_MACHINE_TYPE) - -CXX = g++ -CPPFLAGS = -Wall -O3 -I. -I$(SRI_DIR)/include -I$(BOOST_DIR)/include -DSCORE_BREAKDOWN -LIBS = -L$(BOOST_LIB) -L$(SRI_LIB) -lz -loolm -ldstruct -lflm -llattice -lmisc -lboost_system-mt -lpthread #-liconv - -OBJECTS = Global.o Phrase.o Timer.o Word.o \ - InputFileStream.o Scores.o TypeDef.o WordsBitmap.o \ - InputPath.o Sentence.o Util.o WordsRange.o \ - Main.o TargetPhrase.o MyVocab.o \ - TargetPhrases.o Weights.o \ - FF/DistortionScoreProducer.o FF/PhrasePenalty.o FF/WordPenaltyProducer.o \ - FF/StatefulFeatureFunction.o \ - FF/FeatureFunction.o FF/StatelessFeatureFunction.o \ - FF/TranslationModel/PhraseTable.o FF/TranslationModel/UnknownWordPenalty.o \ - FF/TranslationModel/PhraseTableMemory.o \ - FF/TranslationModel/Memory/Node.o \ - FF/LM/LM.o FF/LM/InternalLM.o FF/LM/SRILM.o \ - Search/Hypothesis.o Search/Manager.o Search/Stack.o Search/Stacks.o - -basic-decoder: $(OBJECTS) - $(CXX) $(CPPFLAGS) -o basic-decoder $(OBJECTS) $(LIBS) - #rm -f *.o */*.o */*/*.o */*/*/*.o - -clean: - rm -f basic-decoder *.o */*.o */*/*.o */*/*/*.o diff --git a/contrib/basic-decoder/MyVocab.cpp b/contrib/basic-decoder/MyVocab.cpp deleted file mode 100644 index 61829b7bc..000000000 --- a/contrib/basic-decoder/MyVocab.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -#include "MyVocab.h" -#include "Util.h" -#include "check.h" - -using namespace std; - -namespace FastMoses -{ - -MyVocab MyVocab::s_instance; -VOCABID MyVocab::s_currId = 0; - -MyVocab::MyVocab() -{ - // TODO Auto-generated constructor stub -} - -MyVocab::~MyVocab() -{ - cerr << "delete Vocab" << endl; -} - -VOCABID MyVocab::GetOrCreateId(const std::string &str) -{ - Coll::left_map::const_iterator iter; - iter = m_coll.left.find(str); - if (iter != m_coll.left.end()) { - return iter->second; - } else { - ++s_currId; - m_coll.insert(Coll::value_type(str, s_currId)); - return s_currId; - } -} - -const std::string &MyVocab::GetString(VOCABID id) const -{ - Coll::right_map::const_iterator iter; - iter = m_coll.right.find(id); - assert(iter != m_coll.right.end()); - - const string &str = iter->second; - return str; -} - -} diff --git a/contrib/basic-decoder/MyVocab.h b/contrib/basic-decoder/MyVocab.h deleted file mode 100644 index e1004022d..000000000 --- a/contrib/basic-decoder/MyVocab.h +++ /dev/null @@ -1,31 +0,0 @@ - -#pragma once - -#include -#include "TypeDef.h" - -namespace FastMoses -{ - -class MyVocab -{ -public: - static MyVocab &Instance() { - return s_instance; - } - - MyVocab(); - virtual ~MyVocab(); - - VOCABID GetOrCreateId(const std::string &str); - const std::string &GetString(VOCABID id) const; -protected: - static MyVocab s_instance; - static VOCABID s_currId; - - typedef boost::bimap Coll; - Coll m_coll; - -}; - -} diff --git a/contrib/basic-decoder/Phrase.cpp b/contrib/basic-decoder/Phrase.cpp deleted file mode 100644 index a3415182d..000000000 --- a/contrib/basic-decoder/Phrase.cpp +++ /dev/null @@ -1,68 +0,0 @@ - -#include -#include -#include -#include "Phrase.h" -#include "Util.h" - -using namespace std; - -Phrase::Phrase(size_t size) - :m_size(size) - ,m_words(size) -{ -} - -Phrase::Phrase(const Phrase ©, size_t extra) - :m_size(copy.GetSize() + extra) -,m_words(copy.GetSize() + extra) -{ - for (size_t i = 0; i < copy.GetSize(); ++i) { - const Word &word = copy.GetWord(i); - Set(i, word); - } -} - -Phrase::~Phrase() -{ -} - -void Phrase::Set(size_t pos, const Word &word) -{ - m_words[pos].Set(word); -} - - -void Phrase::Output(std::ostream &out) const -{ - for (size_t i = 0; i < m_size; ++i) { - const Word &word = m_words[i]; - word.Output(out); - out << " "; - } -} - -std::string Phrase::Debug() const -{ - stringstream strme; - for (size_t i = 0; i < m_size; ++i) { - const Word &word = m_words[i]; - strme << word.Debug() << " "; - } - - return strme.str(); -} - -Phrase *Phrase::CreateFromString(const std::string &line) -{ - vector toks; - Tokenize(toks, line); - Phrase *phrase = new Phrase(toks.size()); - - for (size_t i = 0; i < toks.size(); ++i) { - Word &word = phrase->GetWord(i); - word.CreateFromString(toks[i]); - } - - return phrase; -} diff --git a/contrib/basic-decoder/Phrase.h b/contrib/basic-decoder/Phrase.h deleted file mode 100644 index e9300abd0..000000000 --- a/contrib/basic-decoder/Phrase.h +++ /dev/null @@ -1,50 +0,0 @@ - -#pragma once - -#include -#include -#include -#include "Word.h" - -typedef std::vector PhraseVec; - -class Phrase -{ -public: - static Phrase *CreateFromString(const std::string &line); - - Phrase(const Phrase ©); // do not implement - Phrase(size_t size); - Phrase(const Phrase ©, size_t extra); - - virtual ~Phrase(); - - const Word &GetWord(size_t pos) const { - return m_words[pos]; - } - Word &GetWord(size_t pos) { - assert(pos < m_size); - return m_words[pos]; - } - const Word &Back() const { - assert(m_size); - return m_words[m_size - 1]; - } - - size_t GetSize() const { - return m_size; - } - - void Set(size_t pos, const Word &word); - void SetLastWord(const Word &word) { - Set(m_size - 1, word); - } - - void Output(std::ostream &out) const; - virtual std::string Debug() const; - -protected: - size_t m_size; - std::vector m_words; -}; - diff --git a/contrib/basic-decoder/Scores.cpp b/contrib/basic-decoder/Scores.cpp deleted file mode 100644 index a84d34344..000000000 --- a/contrib/basic-decoder/Scores.cpp +++ /dev/null @@ -1,119 +0,0 @@ - -#include -#include -#include "Scores.h" -#include "Global.h" -#include "Util.h" -#include "check.h" -#include "FF/FeatureFunction.h" - -using namespace std; - -Scores::Scores() - :m_weightedScore(0) -#ifdef SCORE_BREAKDOWN - ,m_scores(FeatureFunction::GetTotalNumScores(), 0) -#endif -{ -} - -Scores::Scores(const Scores ©) - :m_weightedScore(copy.m_weightedScore) -#ifdef SCORE_BREAKDOWN - ,m_scores(copy.m_scores) -#endif -{ -} - -Scores::~Scores() -{ -} - -void Scores::CreateFromString(const FeatureFunction &ff, const std::string &line, bool logScores) -{ - std::vector scores(ff.GetNumScores()); - - if (logScores) { - std::vector probs(ff.GetNumScores()); - Tokenize(probs, line); - std::transform(probs.begin(),probs.end(),scores.begin(), - TransformScore); - } else { - Tokenize(scores, line); - } - - Add(ff, scores); -} - -void Scores::Add(const Scores &other) -{ - m_weightedScore += other.m_weightedScore; - -#ifdef SCORE_BREAKDOWN - size_t numScores = FeatureFunction::GetTotalNumScores(); - for (size_t i = 0; i < numScores; ++i) { - m_scores[i] += other.m_scores[i]; - } -#endif -} - -void Scores::Add(const FeatureFunction &ff, SCORE score) -{ - size_t numScores = ff.GetNumScores(); - CHECK(numScores == 1); - size_t startInd = ff.GetStartInd(); - - // weighted score - const Global &Global = Global::Instance(); - const std::vector &weights = Global.weights.GetWeights(); - SCORE weight = weights[startInd]; - - m_weightedScore += weight * score; - - // update vector -#ifdef SCORE_BREAKDOWN - m_scores[startInd] += score; -#endif -} - -void Scores::Add(const FeatureFunction &ff, const std::vector &scores) -{ - size_t numScores = ff.GetNumScores(); - CHECK(numScores == scores.size()); - size_t startInd = ff.GetStartInd(); - - const Global &Global = Global::Instance(); - const std::vector &weights = Global.weights.GetWeights(); - - for (size_t i = 0; i < numScores; ++i) { - size_t ffInd = startInd + i; - SCORE score = scores[i]; - SCORE weight = weights[ffInd]; - - // weighted score - m_weightedScore += weight * score; - - // update vector -#ifdef SCORE_BREAKDOWN - m_scores[ffInd] += score; -#endif - } - -} - -std::string Scores::Debug() const -{ - stringstream strme; - strme << "TOTAL=" << m_weightedScore; -#ifdef SCORE_BREAKDOWN - strme << " [" << m_scores[0]; - - size_t numScores = FeatureFunction::GetTotalNumScores(); - for (size_t i = 1; i < numScores; ++i) { - strme << "," << m_scores[i]; - } - strme << "]"; -#endif - return strme.str(); - -} diff --git a/contrib/basic-decoder/Scores.h b/contrib/basic-decoder/Scores.h deleted file mode 100644 index 6932cd334..000000000 --- a/contrib/basic-decoder/Scores.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include -#include -#include "TypeDef.h" - -class FeatureFunction; - -class Scores -{ -public: - Scores(); - Scores(const Scores ©); - virtual ~Scores(); - void CreateFromString(const FeatureFunction &ff, const std::string &line, bool logScores); - - SCORE GetWeightedScore() const { - return m_weightedScore; - } - - void Add(const Scores &other); - void Add(const FeatureFunction &ff, SCORE score); - void Add(const FeatureFunction &ff, const std::vector &scores); - - std::string Debug() const; - -protected: -#ifdef SCORE_BREAKDOWN - std::vector m_scores; // maybe it doesn't need this -#endif - SCORE m_weightedScore; -}; diff --git a/contrib/basic-decoder/Search/Hypothesis.cpp b/contrib/basic-decoder/Search/Hypothesis.cpp deleted file mode 100644 index d6c801c4a..000000000 --- a/contrib/basic-decoder/Search/Hypothesis.cpp +++ /dev/null @@ -1,121 +0,0 @@ - -#include -#include "Hypothesis.h" -#include "TargetPhrase.h" -#include "Sentence.h" -#include "WordsRange.h" -#include "Util.h" -#include "FF/StatefulFeatureFunction.h" - -using namespace std; - -size_t Hypothesis::s_id = 0; - -Hypothesis::Hypothesis(const TargetPhrase &tp, const WordsRange &range, const WordsBitmap &coverage) - :m_id(++s_id) - ,targetPhrase(tp) - ,m_range(range) - ,m_prevHypo(NULL) - ,m_coverage(coverage) - ,m_scores(tp.GetScores()) - ,m_hash(0) - ,targetRange(NOT_FOUND, NOT_FOUND) -{ - size_t numSFF = StatefulFeatureFunction::GetColl().size(); - m_ffStates.resize(numSFF); -} - -Hypothesis::Hypothesis(const TargetPhrase &tp, const Hypothesis &prevHypo, const WordsRange &range, const WordsBitmap &coverage) - :m_id(++s_id) - ,targetPhrase(tp) - ,m_range(range) - ,m_prevHypo(&prevHypo) - ,m_coverage(coverage) - ,m_scores(prevHypo.GetScores()) - ,m_hash(0) - ,targetRange(prevHypo.targetRange, tp.GetSize()) -{ - m_scores.Add(targetPhrase.GetScores()); - size_t numSFF = StatefulFeatureFunction::GetColl().size(); - m_ffStates.resize(numSFF); -} - -Hypothesis::~Hypothesis() -{ -} - -size_t Hypothesis::GetHash() const -{ - if (m_hash == 0) { - // do nothing, assume already hashed - // m_hash can be 0, but very small prob, or no statefull ff - size_t numStates = StatefulFeatureFunction::GetColl().size(); - for (size_t i = 0; i < numStates; ++i) { - size_t state = m_ffStates[i]; - boost::hash_combine(m_hash, state); - } - } - - return m_hash; -} - -bool Hypothesis::operator==(const Hypothesis &other) const -{ - size_t numStates = StatefulFeatureFunction::GetColl().size(); - for (size_t i = 0; i < numStates; ++i) { - size_t state = m_ffStates[i]; - size_t otherState = other.m_ffStates[i]; - - bool isEqual = (state == otherState); - if (!isEqual) { - return false; - } - } - - return true; -} - -void Hypothesis::Output(std::ostream &out) const -{ - if (m_prevHypo) { - m_prevHypo->Output(out); - } - targetPhrase.Output(out); -} - -std::string Hypothesis::Debug() const -{ - stringstream strme; - Fix(strme, 3); - strme << m_range.Debug() << " targetRange=" << targetRange.Debug() << " " << m_scores.Debug() << " "; - Output(strme); - - // states - strme << "states="; - size_t numSFF = StatefulFeatureFunction::GetColl().size(); - for (size_t i = 0; i < numSFF; ++i) { - size_t state = m_ffStates[i]; - strme << state << ","; - } - strme << "=" << m_hash; - - /* - if (m_prevHypo) { - strme << endl; - strme << m_prevHypo->Debug(); - } - */ - return strme.str(); -} - -const Word &Hypothesis::GetWord(size_t pos) const -{ - assert(pos <= targetRange.endPos); - const Hypothesis *hypo = this; - while (pos < hypo->targetRange.startPos) { - hypo = hypo->GetPrevHypo(); - assert(hypo != NULL); - } - return hypo->GetCurrWord(pos - hypo->targetRange.startPos); -} - diff --git a/contrib/basic-decoder/Search/Hypothesis.h b/contrib/basic-decoder/Search/Hypothesis.h deleted file mode 100644 index f637762d1..000000000 --- a/contrib/basic-decoder/Search/Hypothesis.h +++ /dev/null @@ -1,107 +0,0 @@ - -#pragma once - -#include -#include "WordsBitmap.h" -#include "WordsRange.h" -#include "Scores.h" -#include "Word.h" -#include "TargetPhrase.h" - -class FFState; -class Sentence; -class WordsRange; - -class Hypothesis -{ -public: - const TargetPhrase &targetPhrase; - const WordsRange targetRange; - - Hypothesis(); // do no implement - Hypothesis(const Hypothesis ©); // do not implement - - // creating the inital hypo - Hypothesis(const TargetPhrase &tp, const WordsRange &range, const WordsBitmap &coverage); - - // for extending a previous hypo - Hypothesis(const TargetPhrase &tp, const Hypothesis &prevHypo, const WordsRange &range, const WordsBitmap &coverage); - virtual ~Hypothesis(); - - const Scores &GetScores() const { - return m_scores; - } - Scores &GetScores() { - return m_scores; - } - - const WordsBitmap &GetCoverage() const { - return m_coverage; - } - const Hypothesis *GetPrevHypo() const { - return m_prevHypo; - } - - const WordsRange &GetRange() const { - return m_range; - } - - size_t GetState(size_t id) const { - return m_ffStates[id]; - } - void SetState(size_t id, size_t state) { - m_ffStates[id] = state; - } - - const Word &GetWord(size_t pos) const; - inline const Word &GetCurrWord(size_t pos) const { - return targetPhrase.GetWord(pos); - } - - /** length of the partial translation (from the start of the sentence) */ - inline size_t GetSize() const { - return targetRange.endPos + 1; - } - - void Output(std::ostream &out) const; - - size_t GetHash() const; - bool operator==(const Hypothesis &other) const; - - std::string Debug() const; - - static size_t GetNumHypothesesCreated() { - return s_id; - } -protected: - static size_t s_id; - size_t m_id; - - const WordsRange &m_range; - const Hypothesis *m_prevHypo; - const WordsBitmap m_coverage; - Scores m_scores; - - std::vector m_ffStates; - mutable size_t m_hash; -}; - - -struct HypothesisHasher { - size_t operator()(const Hypothesis *hypo) const { - return hypo->GetHash(); - } -}; - -struct HypothesisEqual { - bool operator()(const Hypothesis *a, const Hypothesis *b) const { - bool ret = *a == *b; - return ret; - } -}; - -struct HypothesisScoreOrderer { - bool operator()(const Hypothesis* a, const Hypothesis* b) const { - return a->GetScores().GetWeightedScore() > b->GetScores().GetWeightedScore(); - } -}; diff --git a/contrib/basic-decoder/Search/Manager.cpp b/contrib/basic-decoder/Search/Manager.cpp deleted file mode 100644 index 27f27b447..000000000 --- a/contrib/basic-decoder/Search/Manager.cpp +++ /dev/null @@ -1,106 +0,0 @@ - -#include -#include "Manager.h" -#include "InputPath.h" -#include "Hypothesis.h" -#include "Global.h" -#include "FF/StatefulFeatureFunction.h" -#include "FF/TranslationModel/PhraseTable.h" - -using namespace std; - -Manager::Manager(Sentence &sentence) - :m_sentence(sentence) - ,m_stacks(sentence.GetSize() + 1) - ,m_emptyPhrase(new TargetPhrase(0)) - ,m_emptyRange(new WordsRange(NOT_FOUND, NOT_FOUND)) - ,m_emptyCoverage(new WordsBitmap(sentence.GetSize())) -{ - FeatureFunction::Initialize(m_sentence); - - Global &global = Global::InstanceNonConst(); - - global.timer.check("Begin CreateInputPaths"); - CreateInputPaths(); - global.timer.check("Begin Lookup"); - Lookup(); - global.timer.check("Begin Search"); - Search(); - global.timer.check("Finished Search"); -} - -Manager::~Manager() -{ - FeatureFunction::CleanUp(m_sentence); -} - -void Manager::CreateInputPaths() -{ - for (size_t pos = 0; pos < m_sentence.GetSize(); ++pos) { - Phrase *phrase = new Phrase(1); - phrase->Set(0, m_sentence.GetWord(pos)); - - InputPath *path = new InputPath(NULL, phrase, pos); - m_inputPathQueue.push_back(path); - - CreateInputPaths(*path, pos + 1); - } -} - -void Manager::CreateInputPaths(const InputPath &prevPath, size_t pos) -{ - if (pos >= m_sentence.GetSize()) { - return; - } - - Phrase *phrase = new Phrase(prevPath.GetPhrase(), 1); - phrase->SetLastWord(m_sentence.GetWord(pos)); - - InputPath *path = new InputPath(&prevPath, phrase, pos); - m_inputPathQueue.push_back(path); - - CreateInputPaths(*path, pos + 1); -} - -void Manager::Lookup() -{ - const std::vector &pts = PhraseTable::GetColl(); - for (size_t i = 0; i < pts.size(); ++i) { - PhraseTable &pt = *pts[i]; - pt.Lookup(m_inputPathQueue); - } -} - -void Manager::Search() -{ - Hypothesis *emptyHypo = new Hypothesis(*m_emptyPhrase, *m_emptyRange, *m_emptyCoverage); - StatefulFeatureFunction::EvaluateEmptyHypo(m_sentence, *emptyHypo); - - m_stacks.Add(emptyHypo, 0); - - for (size_t i = 0; i < m_stacks.GetSize() - 1; ++i) { - cerr << Debug() << endl; - - Stack &stack = m_stacks.Get(i); - stack.PruneToSize(); - stack.Search(m_inputPathQueue); - } - -} - -const Hypothesis *Manager::GetHypothesis() const -{ - const Stack &lastStack = m_stacks.Back(); - const Hypothesis *hypo = lastStack.GetHypothesis(); - return hypo; -} - -std::string Manager::Debug() const -{ - stringstream strme; - for (size_t i = 0; i < m_stacks.GetSize(); ++i) { - const Stack &stack = m_stacks.Get(i); - strme << stack.Debug() << " "; - } - return strme.str(); -} diff --git a/contrib/basic-decoder/Search/Manager.h b/contrib/basic-decoder/Search/Manager.h deleted file mode 100644 index 83c59ac8c..000000000 --- a/contrib/basic-decoder/Search/Manager.h +++ /dev/null @@ -1,36 +0,0 @@ - -#pragma once - -#include "Sentence.h" -#include "Stacks.h" -#include "TargetPhrase.h" -#include "WordsRange.h" - -class InputPath; - -class Manager -{ -public: - Manager(Sentence &sentence); - virtual ~Manager(); - - const Hypothesis *GetHypothesis() const; - - std::string Debug() const; - -protected: - Sentence &m_sentence; - std::vector m_inputPathQueue; - Stacks m_stacks; - - TargetPhrase *m_emptyPhrase; - WordsRange *m_emptyRange; - WordsBitmap *m_emptyCoverage; - - void CreateInputPaths(); - void CreateInputPaths(const InputPath &prevPath, size_t pos); - - void Lookup(); - void Search(); -}; - diff --git a/contrib/basic-decoder/Search/Stack.cpp b/contrib/basic-decoder/Search/Stack.cpp deleted file mode 100644 index f2ce891db..000000000 --- a/contrib/basic-decoder/Search/Stack.cpp +++ /dev/null @@ -1,217 +0,0 @@ - -#include -#include "Stack.h" -#include "Stacks.h" -#include "check.h" -#include "InputPath.h" -#include "TargetPhrase.h" -#include "TargetPhrases.h" -#include "WordsRange.h" -#include "Global.h" -#include "FF/TranslationModel/PhraseTable.h" -#include "FF/StatefulFeatureFunction.h" - -using namespace std; - -Stack::Stack() -{ - m_maxHypoStackSize = Global::Instance().stackSize; - m_coll.reserve(m_maxHypoStackSize*2); -} - -Stack::~Stack() -{ - // TODO Auto-generated destructor stub -} - -bool Stack::AddPrune(Hypothesis *hypo) -{ - - std::pair addRet = Add(hypo); - if (addRet.second) { - // added - return true; - } - - // recombine - // equiv hypo exists, recombine with other hypo - iterator &iterExisting = addRet.first; - const Hypothesis *hypoExisting = *iterExisting; - - if (hypo->GetScores().GetWeightedScore() > hypoExisting->GetScores().GetWeightedScore()) { - // incoming hypo is better than the one we have - Remove(iterExisting); - - bool added = Add(hypo).second; - assert(added); - return false; - } else { - // already storing the best hypo. discard current hypo - return false; - } - -} - -std::pair Stack::Add(const Hypothesis *hypo) -{ - pair ret = m_coll.insert(hypo); - if (ret.second) { - // equiv hypo doesn't exists - if (m_coll.size() > m_maxHypoStackSize * 2) { - PruneToSize(m_maxHypoStackSize); - } - } - - return ret; -} - -void Stack::PruneToSize() -{ - PruneToSize(m_maxHypoStackSize); -} - -void Stack::PruneToSize(size_t newSize) -{ - if (m_coll.size() <= newSize ) { - return; // not over limit - } - - vector keep; - SortHypotheses(newSize, keep); - - m_coll.clear(); - vector::const_iterator iter; - for (iter = keep.begin(); iter != keep.end(); ++iter) { - const Hypothesis *hypo = *iter; - //cerr << "hypo" << hypo->Debug() << endl; - std::pair ret = Add(hypo); - CHECK(ret.second); - } -} - -void Stack::SortHypotheses(size_t newSize, vector &out) -{ - // sort hypotheses - out.reserve(m_coll.size()); - std::copy(m_coll.begin(), m_coll.end(), std::inserter(out, out.end())); - std::sort(out.begin(), out.end(), HypothesisScoreOrderer()); - - // also keep those on boundary - const Hypothesis &boundaryHypo = *out[newSize - 1]; - SCORE boundaryScore = boundaryHypo.GetScores().GetWeightedScore(); - - for (size_t i = newSize; i < out.size(); ++i) { - const Hypothesis *hypo = out[i]; - SCORE score = hypo->GetScores().GetWeightedScore(); - if (score < boundaryScore) { - // score for this hypothesis is less than boundary score. - // Discard this and all following hypos - out.resize(i); - break; - } - } -} - -void Stack::Remove(Coll::iterator &iter) -{ - //const Hypothesis *hypo = *iter; - size_t sizeBefore = m_coll.size(); - m_coll.erase(iter); - assert(sizeBefore - m_coll.size() == 1); -} - -void Stack::Search(const std::vector &queue) -{ - for (iterator iter = begin(); iter != end(); ++iter) { - const Hypothesis &hypo = **iter; - Extend(hypo, queue); - } -} - -void Stack::Extend(const Hypothesis &hypo, const std::vector &queue) -{ - //cerr << "extending " << hypo.Debug() << endl; - const WordsBitmap &hypoCoverage = hypo.GetCoverage(); - - for (size_t i = 0; i < queue.size(); ++i) { - const InputPath &path = *queue[i]; - const WordsRange &range = path.GetRange(); - //cerr << range.Debug() << " " << hypoCoverage.Debug() << endl; - if (!hypoCoverage.Overlap(range)) { - Extend(hypo, path); - //cerr << "EXTEND" << endl; - } else { - //cerr << "DONT EXTEND" << endl; - } - } -} - -void Stack::Extend(const Hypothesis &hypo, const InputPath &path) -{ - const WordsRange &range = path.GetRange(); - const WordsRange &prevRange = hypo.GetRange(); - const WordsBitmap &coverage = hypo.GetCoverage(); - if (!coverage.WithinReorderingConstraint(prevRange, range)) { - return; - } - - size_t numPt = PhraseTable::GetColl().size(); - for (size_t i = 0; i < numPt; ++i) { - const PhraseTableLookup &lookup = path.GetPtLookup(i); - const TargetPhrases *tpColl = lookup.tpColl; - if (tpColl) { - Extend(hypo, *tpColl, range); - } - } -} - -void Stack::Extend(const Hypothesis &hypo, const TargetPhrases &tpColl, const WordsRange &range) -{ - //cerr << "range=" << range.Debug() << " tpColl=" << tpColl.GetSize() << endl; - WordsBitmap newCoverage(hypo.GetCoverage(), range); - size_t wordsCovered = newCoverage.GetNumWordsCovered(); - - TargetPhrases::const_iterator iter; - for (iter = tpColl.begin(); iter != tpColl.end(); ++iter) { - const TargetPhrase &tp = **iter; - - Hypothesis *newHypo = new Hypothesis(tp, hypo, range, newCoverage); - - StatefulFeatureFunction::Evaluate(*newHypo); - - bool added = m_stacks->Add(newHypo, wordsCovered); - if (added) { - //cerr << "added" << newHypo->Debug() << endl; - } else { - // discarded - //delete newHypo; - } - } - - -} - -std::string Stack::Debug() const -{ - stringstream strme; - strme << GetSize(); - return strme.str(); -} - -const Hypothesis *Stack::GetHypothesis() const -{ - const Hypothesis *ret = NULL; - SCORE bestScore = -std::numeric_limits::max(); - - for (const_iterator iter = begin(); iter != end(); ++iter) { - const Hypothesis *currHypo = *iter; - SCORE currScore = currHypo->GetScores().GetWeightedScore(); - //cerr << currHypo->Debug() << endl; - if (currScore > bestScore) { - ret = currHypo; - bestScore = currScore; - } - - } - return ret; -} diff --git a/contrib/basic-decoder/Search/Stack.h b/contrib/basic-decoder/Search/Stack.h deleted file mode 100644 index 039bfc27a..000000000 --- a/contrib/basic-decoder/Search/Stack.h +++ /dev/null @@ -1,62 +0,0 @@ - -#pragma once - -#include -#include "Search/Hypothesis.h" - -class InputPath; -class TargetPhrases; -class Stacks; - -class Stack -{ -protected: - typedef boost::unordered_set Coll; - Coll m_coll; - size_t m_maxHypoStackSize; - Stacks *m_stacks; - - std::pair Add(const Hypothesis *hypo); - - void Remove(Coll::iterator &iter); - void SortHypotheses(size_t newSize, std::vector &out); - void Extend(const Hypothesis &hypo, const std::vector &queue); - void Extend(const Hypothesis &hypo, const InputPath &path); - void Extend(const Hypothesis &hypo, const TargetPhrases &tpColl, const WordsRange &range); - void PruneToSize(size_t newSize); - -public: - typedef Coll::iterator iterator; - typedef Coll::const_iterator const_iterator; - //! iterators - const_iterator begin() const { - return m_coll.begin(); - } - const_iterator end() const { - return m_coll.end(); - } - - Stack(); - virtual ~Stack(); - - size_t GetSize() const { - return m_coll.size(); - } - - const Hypothesis *GetHypothesis() const; - - bool AddPrune(Hypothesis *hypo); - void PruneToSize(); - - void Search(const std::vector &queue); - - void SetContainer(Stacks &stacks) { - m_stacks = &stacks; - } - - std::string Debug() const; - -}; - diff --git a/contrib/basic-decoder/Search/Stacks.cpp b/contrib/basic-decoder/Search/Stacks.cpp deleted file mode 100644 index 70436473b..000000000 --- a/contrib/basic-decoder/Search/Stacks.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include "Stacks.h" - -Stacks::Stacks(size_t size) -:m_stacks(size) -{ - for (size_t i = 0; i <= m_stacks.size(); ++i) { - m_stacks[i].SetContainer(*this); - } -} -/* -bool Stacks::Add(Hypothesis *hypo, size_t wordsCovered) -{ - bool added = m_stacks[wordsCovered].AddPrune(hypo); - return added; -} -*/ diff --git a/contrib/basic-decoder/Search/Stacks.h b/contrib/basic-decoder/Search/Stacks.h deleted file mode 100644 index 72ba28112..000000000 --- a/contrib/basic-decoder/Search/Stacks.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -#include -#include "Stack.h" - -class Hypothesis; - -class Stacks -{ -public: - Stacks(size_t size); - - size_t GetSize() const - { return m_stacks.size(); } - - const Stack &Get(size_t i) const - { return m_stacks[i]; } - Stack &Get(size_t i) - { return m_stacks[i]; } - - const Stack &Back() const - { return m_stacks.back(); } - - inline bool Add(Hypothesis *hypo, size_t wordsCovered) - { - bool added = m_stacks[wordsCovered].AddPrune(hypo); - return added; - } - -protected: - std::vector m_stacks; - -}; - - diff --git a/contrib/basic-decoder/Sentence.cpp b/contrib/basic-decoder/Sentence.cpp deleted file mode 100644 index 2ca1649d3..000000000 --- a/contrib/basic-decoder/Sentence.cpp +++ /dev/null @@ -1,30 +0,0 @@ - -#include "Sentence.h" -#include "Util.h" - -using namespace std; - -Sentence::Sentence(size_t size) - :Phrase(size) -{ - // TODO Auto-generated constructor stub - -} - -Sentence::~Sentence() -{ -} - -Sentence *Sentence::CreateFromString(const std::string &line) -{ - vector toks; - Tokenize(toks, line); - Sentence *phrase = new Sentence(toks.size()); - - for (size_t i = 0; i < toks.size(); ++i) { - Word &word = phrase->GetWord(i); - word.CreateFromString(toks[i]); - } - - return phrase; -} diff --git a/contrib/basic-decoder/Sentence.h b/contrib/basic-decoder/Sentence.h deleted file mode 100644 index 33979e8a1..000000000 --- a/contrib/basic-decoder/Sentence.h +++ /dev/null @@ -1,14 +0,0 @@ - -#pragma once - -#include "Phrase.h" - -class Sentence :public Phrase -{ -public: - static Sentence *CreateFromString(const std::string &line); - - Sentence(size_t size); - virtual ~Sentence(); -}; - diff --git a/contrib/basic-decoder/TargetPhrase.cpp b/contrib/basic-decoder/TargetPhrase.cpp deleted file mode 100644 index 8c461cdbf..000000000 --- a/contrib/basic-decoder/TargetPhrase.cpp +++ /dev/null @@ -1,49 +0,0 @@ - -#include "TargetPhrase.h" -#include "Util.h" -#include - -using namespace std; - -TargetPhrase::TargetPhrase(size_t size) - :Phrase(size) - ,m_scores() -{ - // TODO Auto-generated constructor stub - -} - -TargetPhrase::~TargetPhrase() -{ -} - -TargetPhrase *TargetPhrase::CreateFromString( - const FeatureFunction &ff, - const std::string &targetStr, - const std::string &scoreStr, - bool logScores) -{ - vector toks; - - // words - Tokenize(toks, targetStr); - TargetPhrase *phrase = new TargetPhrase(toks.size()); - - for (size_t i = 0; i < toks.size(); ++i) { - Word &word = phrase->GetWord(i); - word.CreateFromString(toks[i]); - } - - // score - phrase->GetScores().CreateFromString(ff, scoreStr, logScores); - - return phrase; -} - -std::string TargetPhrase::Debug() const -{ - stringstream strme; - strme << Phrase::Debug() << " "; - strme << m_scores.Debug(); - return strme.str(); -} diff --git a/contrib/basic-decoder/TargetPhrase.h b/contrib/basic-decoder/TargetPhrase.h deleted file mode 100644 index 46767bf49..000000000 --- a/contrib/basic-decoder/TargetPhrase.h +++ /dev/null @@ -1,33 +0,0 @@ - -#pragma once - -#include "Phrase.h" -#include "Scores.h" - -class TargetPhrase: public Phrase -{ -public: - static TargetPhrase *CreateFromString(const FeatureFunction &ff, - const std::string &targetStr, - const std::string &scoreStr, - bool logScores); - - TargetPhrase(size_t size); - - TargetPhrase(const TargetPhrase ©); // do not implement - - virtual ~TargetPhrase(); - - Scores &GetScores() { - return m_scores; - } - const Scores &GetScores() const { - return m_scores; - } - - virtual std::string Debug() const; - -protected: - Scores m_scores; -}; - diff --git a/contrib/basic-decoder/TargetPhrases.cpp b/contrib/basic-decoder/TargetPhrases.cpp deleted file mode 100644 index 66f65acc3..000000000 --- a/contrib/basic-decoder/TargetPhrases.cpp +++ /dev/null @@ -1,16 +0,0 @@ - -#include -#include "TargetPhrases.h" - -using namespace std; - -TargetPhrases::TargetPhrases() -{ - // TODO Auto-generated constructor stub - -} - -TargetPhrases::~TargetPhrases() -{ - //cerr << "deleted=" << this << endl; -} diff --git a/contrib/basic-decoder/TargetPhrases.h b/contrib/basic-decoder/TargetPhrases.h deleted file mode 100644 index fb0eeb086..000000000 --- a/contrib/basic-decoder/TargetPhrases.h +++ /dev/null @@ -1,38 +0,0 @@ - -#pragma once - -#include - -class TargetPhrase; - -class TargetPhrases -{ - typedef std::vector Coll; - -public: - typedef Coll::iterator iterator; - typedef Coll::const_iterator const_iterator; - - const_iterator begin() const { - return m_coll.begin(); - } - const_iterator end() const { - return m_coll.end(); - } - - TargetPhrases(); - virtual ~TargetPhrases(); - - void Add(const TargetPhrase *tp) { - m_coll.push_back(tp); - } - - size_t GetSize() const { - return m_coll.size(); - } - -protected: - Coll m_coll; - -}; - diff --git a/contrib/basic-decoder/Timer.cpp b/contrib/basic-decoder/Timer.cpp deleted file mode 100644 index cb56c6935..000000000 --- a/contrib/basic-decoder/Timer.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include "Timer.h" - -using namespace std; - -namespace Moses -{ - -void Timer::start(const char* msg) -{ - // Print an optional message, something like "Starting timer t"; - if (msg) { - cerr << msg << std::endl; - } - - // Return immediately if the timer is already running - if (running) return; - - // Change timer status to running - running = true; - - // Set the start time; - time(&start_time); -} - -void Timer::check(const char* msg) -{ - // Print an optional message, something like "Checking timer t"; - if (msg) { - cerr << msg << " : "; - } - - // TRACE_ERR( "[" << std::setiosflags(std::ios::fixed) << std::setprecision(2) << (running ? elapsed_time() : 0) << "] seconds\n"); - cerr << "[" << (running ? elapsed_time() : 0) << "] seconds\n"; -} - -double Timer::elapsed_time() -{ - time_t now; - time(&now); - return difftime(now, start_time); -} - -double Timer::get_elapsed_time() -{ - return elapsed_time(); -} - -} - diff --git a/contrib/basic-decoder/Timer.h b/contrib/basic-decoder/Timer.h deleted file mode 100644 index a6bd0e91a..000000000 --- a/contrib/basic-decoder/Timer.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef moses_Time_H -#define moses_Time_H - -#include -#include -#include -#include "Util.h" - -namespace Moses -{ - -/** Wrapper around time_t to time how long things have been running - * according to walltime. We avoid CPU time since it is less reliable - * in a multi-threaded environment and can spuriously include clock cycles - * used by other threads in the same process. - */ -class Timer -{ - friend std::ostream& operator<<(std::ostream& os, Timer& t); - -private: - bool running; - // note: this only has the resolution of seconds, we'd often like better resolution - // we make our best effort to do this on a system-by-system basis - time_t start_time; - - // in seconds - double elapsed_time(); - -public: - /*** - * 'running' is initially false. A timer needs to be explicitly started - * using 'start' or 'restart' - */ - Timer() : running(false) { - start_time = 0; - } - - void start(const char* msg = 0); -// void restart(const char* msg = 0); -// void stop(const char* msg = 0); - void check(const char* msg = 0); - double get_elapsed_time(); - -}; - -} - -#endif diff --git a/contrib/basic-decoder/TypeDef.cpp b/contrib/basic-decoder/TypeDef.cpp deleted file mode 100644 index b312b58ec..000000000 --- a/contrib/basic-decoder/TypeDef.cpp +++ /dev/null @@ -1,4 +0,0 @@ - -#include "TypeDef.h" - - diff --git a/contrib/basic-decoder/TypeDef.h b/contrib/basic-decoder/TypeDef.h deleted file mode 100644 index fdf4fe129..000000000 --- a/contrib/basic-decoder/TypeDef.h +++ /dev/null @@ -1,11 +0,0 @@ - -#pragma once - -#include - -typedef float SCORE; -typedef int VOCABID; - -#define NOT_FOUND std::numeric_limits::max() - -const SCORE LOWEST_SCORE = -100.0; diff --git a/contrib/basic-decoder/Util.cpp b/contrib/basic-decoder/Util.cpp deleted file mode 100644 index 8e1cca72d..000000000 --- a/contrib/basic-decoder/Util.cpp +++ /dev/null @@ -1,9 +0,0 @@ - -#include "Util.h" - -const std::string Trim(const std::string& str, const std::string dropChars) -{ - std::string res = str; - res.erase(str.find_last_not_of(dropChars)+1); - return res.erase(0, res.find_first_not_of(dropChars)); -} diff --git a/contrib/basic-decoder/Util.h b/contrib/basic-decoder/Util.h deleted file mode 100644 index 44c9dd850..000000000 --- a/contrib/basic-decoder/Util.h +++ /dev/null @@ -1,132 +0,0 @@ - -#pragma once - -#include -#include -#include -#include -#include -#include "TypeDef.h" - -//! get string representation of any object/variable, as long as it can pipe to a stream -template -inline std::string SPrint(const T &input) -{ - std::stringstream stream(""); - stream << input; - return stream.str(); -} - -//! delete white spaces at beginning and end of string -const std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r"); - -//! convert string to variable of type T. Used to reading floats, int etc from files -template -inline T Scan(const std::string &input) -{ - std::stringstream stream(input); - T ret; - stream >> ret; - return ret; -} - -template -inline void Scan(std::vector &output, const std::vector< std::string > &input) -{ - output.resize(input.size()); - for (size_t i = 0 ; i < input.size() ; i++) { - output[i] = Scan( input[i] ); - } -} - -inline void Tokenize(std::vector &output - , const std::string& str - , const std::string& delimiters = " \t") -{ - // Skip delimiters at beginning. - std::string::size_type lastPos = str.find_first_not_of(delimiters, 0); - // Find first "non-delimiter". - std::string::size_type pos = str.find_first_of(delimiters, lastPos); - - while (std::string::npos != pos || std::string::npos != lastPos) { - // Found a token, add it to the vector. - output.push_back(str.substr(lastPos, pos - lastPos)); - // Skip delimiters. Note the "not_of" - lastPos = str.find_first_not_of(delimiters, pos); - // Find next "non-delimiter" - pos = str.find_first_of(delimiters, lastPos); - } -} - -template -inline void Tokenize( std::vector &output - , const std::string &input - , const std::string& delimiters = " \t") -{ - std::vector stringVector; - Tokenize(stringVector, input, delimiters); - return Scan(output, stringVector ); -} - - -/** only split of the first delimiter. Used by class FeatureFunction for parse key=value pair. - * Value may have = character -*/ -inline std::vector TokenizeFirstOnly(const std::string& str, - const std::string& delimiters = " \t") -{ - std::vector tokens; - std::string::size_type pos = str.find_first_of(delimiters); - - if (std::string::npos != pos) { - // Found a token, add it to the vector. - tokens.push_back(str.substr(0, pos)); - tokens.push_back(str.substr(pos + 1, str.size() - pos - 1)); - } else { - tokens.push_back(str); - } - - return tokens; -} - -// speeded up version of above -inline void TokenizeMultiCharSeparator(std::vector &output - ,const std::string& str - ,const std::string& separator) -{ - size_t pos = 0; - // Find first "non-delimiter". - std::string::size_type nextPos = str.find(separator, pos); - - while (nextPos != std::string::npos) { - // Found a token, add it to the vector. - output.push_back(Trim(str.substr(pos, nextPos - pos))); - // Skip delimiters. Note the "not_of" - pos = nextPos + separator.size(); - // Find next "non-delimiter" - nextPos = str.find(separator, pos); - } - output.push_back(Trim(str.substr(pos, nextPos - pos))); -} - -inline SCORE FloorScore(SCORE logScore) -{ - return (std::max)(logScore , LOWEST_SCORE); -} - -inline SCORE TransformScore(SCORE prob) -{ - return FloorScore(log(prob)); -} - -inline float TransformSRIScore(float sriScore) -{ - return sriScore * 2.30258509299405f; -} - -/** Enforce rounding */ -inline void Fix(std::ostream& stream, size_t size) -{ - stream.setf(std::ios::fixed); - stream.precision(size); -} diff --git a/contrib/basic-decoder/Weights.cpp b/contrib/basic-decoder/Weights.cpp deleted file mode 100644 index b768f7629..000000000 --- a/contrib/basic-decoder/Weights.cpp +++ /dev/null @@ -1,38 +0,0 @@ - -#include "Weights.h" -#include "Util.h" -#include "check.h" -#include "FF/FeatureFunction.h" - -using namespace std; - -Weights::Weights() - :m_weights(FeatureFunction::GetTotalNumScores(), 0) -{ - // TODO Auto-generated constructor stub - -} - -Weights::~Weights() -{ - // TODO Auto-generated destructor stub -} - -void Weights::CreateFromString(const std::string &line) -{ - Tokenize(m_weights, line); - -} - -void Weights::SetWeights(const FeatureFunction &ff, const std::vector &weights) -{ - size_t numScores = ff.GetNumScores(); - CHECK(numScores == weights.size()); - size_t startInd = ff.GetStartInd(); - - size_t inInd = 0; - for (size_t i = startInd; i < startInd + numScores; ++i, ++inInd) { - m_weights[i] = weights[inInd]; - } -} - diff --git a/contrib/basic-decoder/Weights.h b/contrib/basic-decoder/Weights.h deleted file mode 100644 index 417d98840..000000000 --- a/contrib/basic-decoder/Weights.h +++ /dev/null @@ -1,29 +0,0 @@ - -#pragma once - -#include -#include -#include "TypeDef.h" - -class FeatureFunction; - -class Weights -{ -public: - Weights(); - virtual ~Weights(); - void CreateFromString(const std::string &line); - - const std::vector &GetWeights() const { - return m_weights; - } - - void SetWeights(const FeatureFunction &ff, const std::vector &weights); - void SetNumScores(size_t num) { - m_weights.resize(num, 0); - } - -protected: - std::vector m_weights; -}; - diff --git a/contrib/basic-decoder/Word.cpp b/contrib/basic-decoder/Word.cpp deleted file mode 100644 index e9dfadda4..000000000 --- a/contrib/basic-decoder/Word.cpp +++ /dev/null @@ -1,45 +0,0 @@ - -#include "Word.h" -#include "MyVocab.h" - -using namespace std; - -Word::Word() -{ - // TODO Auto-generated constructor stub - -} - -Word::~Word() -{ -} - -void Word::Set(const Word &word) -{ - m_vocabId = word.m_vocabId; -} - -void Word::CreateFromString(const std::string &line) -{ - FastMoses::MyVocab &vocab = FastMoses::MyVocab::Instance(); - m_vocabId = vocab.GetOrCreateId(line); -} - -void Word::Output(std::ostream &out) const -{ - FastMoses::MyVocab &vocab = FastMoses::MyVocab::Instance(); - const string &ret = vocab.GetString(m_vocabId); - out << ret; -} - -std::string Word::ToString() const -{ - stringstream strme; - Output(strme); - return strme.str(); -} - -std::string Word::Debug() const -{ - return ToString(); -} diff --git a/contrib/basic-decoder/Word.h b/contrib/basic-decoder/Word.h deleted file mode 100644 index 1d98817d3..000000000 --- a/contrib/basic-decoder/Word.h +++ /dev/null @@ -1,52 +0,0 @@ - -#pragma once - -#include -#include "TypeDef.h" - -class Word -{ -public: - Word(); - virtual ~Word(); - - void CreateFromString(const std::string &line); - - VOCABID GetVocab() const { - return m_vocabId; - } - - void Set(const Word &word); - - void Output(std::ostream &out) const; - std::string ToString() const; - - std::string Debug() const; - - int Compare(const Word &other) const { - if (m_vocabId == other.m_vocabId) { - return 0; - } - - return (m_vocabId < other.m_vocabId) ? -1 : +1; - } - - bool operator== (const Word &other) const { - // needed to store word in GenerationDictionary map - // uses comparison of FactorKey - // 'proper' comparison, not address/id comparison - return Compare(other) == 0; - } - -protected: - VOCABID m_vocabId; -}; - -class WordHasher -{ -public: - size_t operator()(const Word &word) const { - return word.GetVocab(); - } -}; - diff --git a/contrib/basic-decoder/WordsBitmap.cpp b/contrib/basic-decoder/WordsBitmap.cpp deleted file mode 100644 index 92d4f2813..000000000 --- a/contrib/basic-decoder/WordsBitmap.cpp +++ /dev/null @@ -1,109 +0,0 @@ - -#include -#include -#include -#include -#include -#include "WordsBitmap.h" -#include "WordsRange.h" -#include "TypeDef.h" -#include "Global.h" - -using namespace std; - -WordsBitmap::WordsBitmap(size_t size) -:m_bitmap(size, false) -{ -} - -WordsBitmap::WordsBitmap(const WordsBitmap ©) -:m_bitmap(copy.m_bitmap) -{ -} - -WordsBitmap::WordsBitmap(const WordsBitmap ©, const WordsRange &range) -:m_bitmap(copy.m_bitmap) -{ - for (size_t pos = range.startPos; pos <= range.endPos; ++pos) { - m_bitmap[pos] = true; - } -} - -WordsBitmap::~WordsBitmap() -{ -} - -size_t WordsBitmap::GetNumWordsCovered() const -{ - size_t count = 0; - for (size_t pos = 0 ; pos < m_bitmap.size() ; pos++) { - if (m_bitmap[pos]) - ++count; - } - return count; -} - -size_t WordsBitmap::GetFirstGapPos() const -{ - for (size_t pos = 0 ; pos < m_bitmap.size() ; pos++) { - if (!m_bitmap[pos]) { - return pos; - } - } - // all words translated - return NOT_FOUND; -} - -bool WordsBitmap::WithinReorderingConstraint(const WordsRange &prevRange, const WordsRange &nextRange) const -{ - //return true; - int maxDistortion = Global::Instance().maxDistortion; - if (maxDistortion < 0) { - // unlimited distortion - return true; - } - - // actual distortion score if you do create this hypo - int distScore = prevRange.ComputeDistortionScore(nextRange); - if (distScore > maxDistortion) { - return false; - } - - // what distortion you need to expend to jump back to earler untranslated words - size_t firstGap = GetFirstGapPos(); - if (nextRange.startPos == firstGap) { - // no gaps - return true; - } - - assert(firstGap < nextRange.endPos); - if (nextRange.endPos - firstGap + 1 > maxDistortion) { - // has to jump back too far - return false; - } - - return true; -} - -bool WordsBitmap::Overlap(const WordsRange &compare) const -{ - for (size_t pos = compare.startPos ; pos <= compare.endPos ; pos++) { - if (m_bitmap[pos]) { - return true; - } - } - return false; -} - -std::string WordsBitmap::Debug() const -{ - stringstream strme; - strme << "["; - for (size_t i = 0; i < m_bitmap.size(); ++i) { - strme << m_bitmap[i]; - } - strme << "]"; - return strme.str(); - -} - diff --git a/contrib/basic-decoder/WordsBitmap.h b/contrib/basic-decoder/WordsBitmap.h deleted file mode 100644 index 9a0f31756..000000000 --- a/contrib/basic-decoder/WordsBitmap.h +++ /dev/null @@ -1,39 +0,0 @@ - -#pragma once - -#include -#include - -class WordsRange; - -class WordsBitmap -{ -protected: - std::vector m_bitmap; -public: - WordsBitmap(); // do not implement - - // creating the inital hypo. No words translated - WordsBitmap(size_t size); - WordsBitmap(const WordsBitmap ©); - WordsBitmap(const WordsBitmap ©, const WordsRange &range); - virtual ~WordsBitmap(); - - //! count of words translated - size_t GetNumWordsCovered() const; - - //! count of words translated - size_t GetFirstGapPos() const; - - bool IsComplete() const { - return m_bitmap.size() == GetNumWordsCovered(); - } - - bool WithinReorderingConstraint(const WordsRange &prevRange, const WordsRange &nextRange) const; - - //! whether the wordrange overlaps with any translated word in this bitmap - bool Overlap(const WordsRange &compare) const; - - std::string Debug() const; -}; - diff --git a/contrib/basic-decoder/WordsRange.cpp b/contrib/basic-decoder/WordsRange.cpp deleted file mode 100644 index 2a02498b8..000000000 --- a/contrib/basic-decoder/WordsRange.cpp +++ /dev/null @@ -1,46 +0,0 @@ - -#include -#include -#include "WordsRange.h" - -using namespace std; - -WordsRange::WordsRange(const WordsRange &prevRange, size_t phraseSize) -{ - startPos = prevRange.endPos + 1; - endPos = prevRange.endPos + phraseSize; -} - -WordsRange::~WordsRange() -{ -} - -size_t WordsRange::GetHash() const -{ - size_t ret = startPos; - boost::hash_combine(ret, endPos); - return ret; -} - -bool WordsRange::operator==(const WordsRange &other) const -{ - return (startPos == other.startPos) && (endPos == other.endPos); -} - -std::string WordsRange::Debug() const -{ - stringstream strme; - strme << "[" << startPos << "," << endPos << "]"; - return strme.str(); -} - -int WordsRange::ComputeDistortionScore(const WordsRange &next) const -{ - int dist = 0; - if (GetNumWordsCovered() == 0) { - dist = next.startPos; - } else { - dist = (int)endPos - (int)next.startPos + 1 ; - } - return -abs(dist); -} diff --git a/contrib/basic-decoder/WordsRange.h b/contrib/basic-decoder/WordsRange.h deleted file mode 100644 index ae902074b..000000000 --- a/contrib/basic-decoder/WordsRange.h +++ /dev/null @@ -1,46 +0,0 @@ - -#pragma once - -#include -#include "TypeDef.h" - -class WordsRange -{ -public: - WordsRange(); // do not implement - - WordsRange(const WordsRange &prevRange, size_t phraseSize); - - WordsRange(size_t s, size_t e) - :startPos(s) - ,endPos(e) - {} - - virtual ~WordsRange(); - - size_t startPos, endPos; - - //! count of words translated - inline size_t GetNumWordsCovered() const { - return (startPos == NOT_FOUND) ? 0 : endPos - startPos + 1; - } - - inline size_t GetNumWordsBetween(const WordsRange &other) const { - //CHECK(!Overlap(x)); - - if (other.endPos < startPos) { - return startPos - other.endPos - 1; - } - - return other.startPos - endPos - 1; - } - - int ComputeDistortionScore(const WordsRange &next) const; - - size_t GetHash() const; - bool operator==(const WordsRange &other) const; - - std::string Debug() const; - -}; - diff --git a/contrib/basic-decoder/check.h b/contrib/basic-decoder/check.h deleted file mode 100644 index 672e3fa45..000000000 --- a/contrib/basic-decoder/check.h +++ /dev/null @@ -1,21 +0,0 @@ -/* People have been abusing assert by assuming it will always execute. To - * rememdy the situation, asserts were replaced with CHECK. These should then - * be manually replaced with assert (when used correctly) or UTIL_THROW (for - * runtime checks). - */ -#ifndef UTIL_CHECK__ -#define UTIL_CHECK__ - -#include -#include - -#include - -#define CHECK(Condition) do { \ - if (!(Condition)) { \ - std::cerr << "Check " << #Condition << " failed in " << __FILE__ << ":" << __LINE__ << std::endl; \ - abort(); \ - } \ -} while (0) // swallow ; - -#endif // UTIL_CHECK__ diff --git a/contrib/basic-decoder/gzfilebuf.h b/contrib/basic-decoder/gzfilebuf.h deleted file mode 100644 index 2376c2875..000000000 --- a/contrib/basic-decoder/gzfilebuf.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef moses_gzfile_buf_h -#define moses_gzfile_buf_h - -#include -#include -#include - -/** wrapper around gzip input stream. Unknown parentage - * @todo replace with boost version - output stream already uses it - */ -class gzfilebuf : public std::streambuf -{ -public: - gzfilebuf(const char *filename) { - _gzf = gzopen(filename, "rb"); - setg (_buff+sizeof(int), // beginning of putback area - _buff+sizeof(int), // read position - _buff+sizeof(int)); // end position - } - ~gzfilebuf() { - gzclose(_gzf); - } -protected: - virtual int_type overflow (int_type /* c */) { - throw; - } - - // write multiple characters - virtual - std::streamsize xsputn (const char* /* s */, - std::streamsize /* num */) { - throw; - } - - virtual std::streampos seekpos ( std::streampos /* sp */, std::ios_base::openmode /* which = std::ios_base::in | std::ios_base::out */ ) { - throw; - } - - //read one character - virtual int_type underflow () { - // is read position before end of _buff? - if (gptr() < egptr()) { - return traits_type::to_int_type(*gptr()); - } - - /* process size of putback area - * - use number of characters read - * - but at most four - */ - unsigned int numPutback = gptr() - eback(); - if (numPutback > sizeof(int)) { - numPutback = sizeof(int); - } - - /* copy up to four characters previously read into - * the putback _buff (area of first four characters) - */ - std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback, - numPutback); - - // read new characters - int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int)); - if (num <= 0) { - // ERROR or EOF - return EOF; - } - - // reset _buff pointers - setg (_buff+(sizeof(int)-numPutback), // beginning of putback area - _buff+sizeof(int), // read position - _buff+sizeof(int)+num); // end of buffer - - // return next character - return traits_type::to_int_type(*gptr()); - } - - std::streamsize xsgetn (char* s, - std::streamsize num) { - return gzread(_gzf,s,num); - } - -private: - gzFile _gzf; - static const unsigned int _buffsize = 1024; - char _buff[_buffsize]; -}; - -#endif From d6d0877ea32f651c0fede89c33d985e2bf38a1b9 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 15 Nov 2013 11:56:00 +0000 Subject: [PATCH 04/12] add comment for future work on making fdstream more portable /Jeroen Vermeulen --- mert/Fdstream.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mert/Fdstream.h b/mert/Fdstream.h index 5c549587e..2258ef4a5 100644 --- a/mert/Fdstream.h +++ b/mert/Fdstream.h @@ -1,7 +1,12 @@ /* * This class creates c++ like stream from file descriptor + * It uses gcc-specific functions, therefore is not portable + * + * Jeroen Vermeulen reckons that it can be replaced with Boost's io::stream_buffer + * */ + #ifndef _FDSTREAM_ #define _FDSTREAM_ From 952b41597517cfabbefbea27348c9cf4dc6ed49f Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 15 Nov 2013 14:10:49 +0000 Subject: [PATCH 05/12] delete persistent cache arguments. Still have to disable new phrase-table cache --- mira/Decoder.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp index 075772997..ab7854a7c 100644 --- a/mira/Decoder.cpp +++ b/mira/Decoder.cpp @@ -47,7 +47,7 @@ static char* strToChar(const string& s) MosesDecoder::MosesDecoder(const string& inifile, int debuglevel, int argc, vector decoder_params) : m_manager(NULL) { - static int BASE_ARGC = 8; + static int BASE_ARGC = 4; Parameter* params = new Parameter(); char ** mosesargv = new char*[BASE_ARGC + argc]; mosesargv[0] = strToChar("-f"); @@ -56,10 +56,13 @@ MosesDecoder::MosesDecoder(const string& inifile, int debuglevel, int argc, vect stringstream dbgin; dbgin << debuglevel; mosesargv[3] = strToChar(dbgin.str()); + + /* mosesargv[4] = strToChar("-use-persistent-cache"); mosesargv[5] = strToChar("0"); mosesargv[6] = strToChar("-persistent-cache-size"); mosesargv[7] = strToChar("0"); + */ for (int i = 0; i < argc; ++i) { char *cstr = &(decoder_params[i])[0]; From d66d6be1b118450b3fb6444d2849350ccb0d9d9f Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 15 Nov 2013 15:25:35 +0000 Subject: [PATCH 06/12] eclipse project for mira program --- contrib/other-builds/mira/.cproject | 176 ++++++++++++++++++ contrib/other-builds/mira/.project | 81 ++++++++ .../other-builds/moses-chart-cmd/.cproject | 9 +- contrib/other-builds/moses-cmd/.cproject | 9 +- 4 files changed, 267 insertions(+), 8 deletions(-) create mode 100644 contrib/other-builds/mira/.cproject create mode 100644 contrib/other-builds/mira/.project diff --git a/contrib/other-builds/mira/.cproject b/contrib/other-builds/mira/.cproject new file mode 100644 index 000000000..d7ddb7661 --- /dev/null +++ b/contrib/other-builds/mira/.cproject @@ -0,0 +1,176 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/contrib/other-builds/mira/.project b/contrib/other-builds/mira/.project new file mode 100644 index 000000000..03838731f --- /dev/null +++ b/contrib/other-builds/mira/.project @@ -0,0 +1,81 @@ + + + mira + + + mert_lib + moses + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.core.ccnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + + + Decoder.cpp + 1 + PARENT-3-PROJECT_LOC/mira/Decoder.cpp + + + Decoder.h + 1 + PARENT-3-PROJECT_LOC/mira/Decoder.h + + + Hildreth.cpp + 1 + PARENT-3-PROJECT_LOC/mira/Hildreth.cpp + + + Hildreth.h + 1 + PARENT-3-PROJECT_LOC/mira/Hildreth.h + + + HypothesisQueue.cpp + 1 + PARENT-3-PROJECT_LOC/mira/HypothesisQueue.cpp + + + HypothesisQueue.h + 1 + PARENT-3-PROJECT_LOC/mira/HypothesisQueue.h + + + Main.cpp + 1 + PARENT-3-PROJECT_LOC/mira/Main.cpp + + + Main.h + 1 + PARENT-3-PROJECT_LOC/mira/Main.h + + + MiraOptimiser.cpp + 1 + PARENT-3-PROJECT_LOC/mira/MiraOptimiser.cpp + + + Perceptron.cpp + 1 + PARENT-3-PROJECT_LOC/mira/Perceptron.cpp + + + diff --git a/contrib/other-builds/moses-chart-cmd/.cproject b/contrib/other-builds/moses-chart-cmd/.cproject index 8beb0319a..b9042cd5a 100644 --- a/contrib/other-builds/moses-chart-cmd/.cproject +++ b/contrib/other-builds/moses-chart-cmd/.cproject @@ -5,13 +5,13 @@ + + - - @@ -79,6 +79,7 @@ + @@ -107,13 +108,13 @@ + + - - diff --git a/contrib/other-builds/moses-cmd/.cproject b/contrib/other-builds/moses-cmd/.cproject index 4e3c7dc9d..f9eeebf1d 100644 --- a/contrib/other-builds/moses-cmd/.cproject +++ b/contrib/other-builds/moses-cmd/.cproject @@ -5,13 +5,13 @@ + + - - @@ -85,6 +85,7 @@ + diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp index b3695a06f..6e12675da 100644 --- a/moses-cmd/Main.cpp +++ b/moses-cmd/Main.cpp @@ -530,7 +530,7 @@ void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream) featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, slf[i], outputSearchGraphStream); } } - const vector& pds = staticData.GetPhraseDictionaries(); + const vector& pds = PhraseDictionary::GetColl(); for( size_t i=0; i &dictionaries = staticData.GetPhraseDictionaries(); + const std::vector &dictionaries = PhraseDictionary::GetColl(); m_ruleLookupManagers.reserve(dictionaries.size()); for (std::vector::const_iterator p = dictionaries.begin(); p != dictionaries.end(); ++p) { diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp index a6ba048e5..c60dec9cc 100644 --- a/moses/FF/InputFeature.cpp +++ b/moses/FF/InputFeature.cpp @@ -19,8 +19,7 @@ InputFeature::InputFeature(const std::string &line) void InputFeature::Load() { - const StaticData &staticData = StaticData::Instance(); - const PhraseDictionary *pt = staticData.GetTranslationScoreProducer(0); + const PhraseDictionary *pt = PhraseDictionary::GetColl()[0]; const PhraseDictionaryTreeAdaptor *ptBin = dynamic_cast(pt); m_legacy = (ptBin != NULL); diff --git a/moses/Manager.cpp b/moses/Manager.cpp index 0618d2d91..697eef347 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -464,7 +464,7 @@ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo // phrase table scores const StaticData &staticData = StaticData::Instance(); - const std::vector &phraseTables = staticData.GetPhraseDictionaries(); + const std::vector &phraseTables = PhraseDictionary::GetColl(); std::vector::const_iterator iterPhraseTable; for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable) { const PhraseDictionary *phraseTable = *iterPhraseTable; @@ -687,7 +687,7 @@ void Manager::OutputFeatureWeightsForSLF(std::ostream &outputSearchGraphStream) featureIndex = OutputFeatureWeightsForSLF(featureIndex, slf[i], outputSearchGraphStream); } } - const vector& pds = staticData.GetPhraseDictionaries(); + const vector& pds = PhraseDictionary::GetColl(); for( size_t i=0; i& pds = staticData.GetPhraseDictionaries(); + const vector& pds = PhraseDictionary::GetColl(); for( size_t i=0; i& pds = staticData.GetPhraseDictionaries(); + const vector& pds = PhraseDictionary::GetColl(); for( size_t i=0; iGetNumScoreComponents(); } diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index 1aad30254..80522f3df 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -650,6 +650,7 @@ bool StaticData::LoadDecodeGraphs() { const vector &mappingVector = m_parameter->GetParam("mapping"); const vector &maxChartSpans = Scan(m_parameter->GetParam("max-chart-span")); + const vector& pts = PhraseDictionary::GetColl(); const std::vector *featuresRemaining = &FeatureFunction::GetFeatureFunctions(); DecodeStep *prev = 0; @@ -687,14 +688,14 @@ bool StaticData::LoadDecodeGraphs() DecodeStep* decodeStep = NULL; switch (decodeType) { case Translate: - if(index>=m_phraseDictionary.size()) { + if(index>=pts.size()) { stringstream strme; strme << "No phrase dictionary with index " << index << " available!"; UserMessage::Add(strme.str()); CHECK(false); } - decodeStep = new DecodeStepTranslation(m_phraseDictionary[index], prev, *featuresRemaining); + decodeStep = new DecodeStepTranslation(pts[index], prev, *featuresRemaining); break; case Generate: if(index>=m_generationDictionary.size()) { @@ -897,7 +898,6 @@ void StaticData::LoadFeatureFunctions() bool doLoad = true; if (PhraseDictionary *ffCast = dynamic_cast(ff)) { - m_phraseDictionary.push_back(ffCast); doLoad = false; } else if (const GenerationDictionary *ffCast = dynamic_cast(ff)) { @@ -920,8 +920,9 @@ void StaticData::LoadFeatureFunctions() } } - for (size_t i = 0; i < m_phraseDictionary.size(); ++i) { - PhraseDictionary *pt = m_phraseDictionary[i]; + const std::vector &pts = PhraseDictionary::GetColl(); + for (size_t i = 0; i < pts.size(); ++i) { + PhraseDictionary *pt = pts[i]; pt->Load(); } @@ -1078,8 +1079,9 @@ void StaticData::OverrideFeatures() void StaticData::CheckLEGACYPT() { - for (size_t i = 0; i < m_phraseDictionary.size(); ++i) { - const PhraseDictionary *phraseDictionary = m_phraseDictionary[i]; + const std::vector &pts = PhraseDictionary::GetColl(); + for (size_t i = 0; i < pts.size(); ++i) { + const PhraseDictionary *phraseDictionary = pts[i]; if (dynamic_cast(phraseDictionary) != NULL) { m_useLegacyPT = true; return; diff --git a/moses/StaticData.h b/moses/StaticData.h index 16136f849..51ce0d8d8 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -47,7 +47,6 @@ namespace Moses { class InputType; -class PhraseDictionary; class GenerationDictionary; class DecodeGraph; class DecodeStep; @@ -67,7 +66,6 @@ class StaticData private: static StaticData s_instance; protected: - std::vector m_phraseDictionary; std::vector m_generationDictionary; Parameter *m_parameter; std::vector m_inputFactorOrder, m_outputFactorOrder; @@ -723,15 +721,9 @@ public: float GetWeightWordPenalty() const; float GetWeightUnknownWordPenalty() const; - const std::vector& GetPhraseDictionaries() const { - return m_phraseDictionary; - } const std::vector& GetGenerationDictionaries() const { return m_generationDictionary; } - const PhraseDictionary*GetTranslationScoreProducer(size_t index) const { - return GetPhraseDictionaries().at(index); - } const std::vector& GetDecodeGraphs() const { return m_decodeGraphs; diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp index 9fd3df0f3..fc8ea1d16 100644 --- a/moses/TargetPhrase.cpp +++ b/moses/TargetPhrase.cpp @@ -152,7 +152,7 @@ void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath) void TargetPhrase::SetXMLScore(float score) { const StaticData &staticData = StaticData::Instance(); - const FeatureFunction* prod = staticData.GetPhraseDictionaries()[0]; + const FeatureFunction* prod = PhraseDictionary::GetColl()[0]; size_t numScores = prod->GetNumScoreComponents(); vector scoreVector(numScores,score/numScores); diff --git a/moses/TranslationModel/PhraseDictionary.cpp b/moses/TranslationModel/PhraseDictionary.cpp index 159e798a1..26ef2ad3a 100644 --- a/moses/TranslationModel/PhraseDictionary.cpp +++ b/moses/TranslationModel/PhraseDictionary.cpp @@ -31,12 +31,14 @@ using namespace std; namespace Moses { +std::vector PhraseDictionary::s_staticColl; PhraseDictionary::PhraseDictionary(const std::string &line) :DecodeFeature(line) ,m_tableLimit(20) // default ,m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE) { + s_staticColl.push_back(this); } const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(const Phrase& src) const diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h index f8477f70e..7fe8169dd 100644 --- a/moses/TranslationModel/PhraseDictionary.h +++ b/moses/TranslationModel/PhraseDictionary.h @@ -59,6 +59,10 @@ class ChartParser; class PhraseDictionary : public DecodeFeature { public: + static const std::vector& GetColl() { + return s_staticColl; + } + PhraseDictionary(const std::string &line); virtual ~PhraseDictionary() { @@ -110,6 +114,8 @@ public: virtual const TargetPhraseCollectionWithSourcePhrase* GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const; protected: + static std::vector s_staticColl; + size_t m_tableLimit; std::string m_filePath; diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp index be3986929..76dc3dffa 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp +++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp @@ -464,8 +464,7 @@ double CrossEntropy::operator() ( const dlib::matrix& arg) const PhraseDictionary *FindPhraseDictionary(const string &ptName) { - const StaticData &staticData = StaticData::Instance(); - const std::vector &pts = staticData.GetPhraseDictionaries(); + const std::vector &pts = PhraseDictionary::GetColl(); PhraseDictionary *pt = NULL; std::vector::const_iterator iter; From c9d09a716872231b80e38db6bd6668d4e54bb991 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 15 Nov 2013 17:30:37 +0000 Subject: [PATCH 09/12] correct hashing of contextFactor for state. /Ken --- moses/LM/DALM.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/moses/LM/DALM.cpp b/moses/LM/DALM.cpp index c04e80324..acd3e94e2 100644 --- a/moses/LM/DALM.cpp +++ b/moses/LM/DALM.cpp @@ -119,8 +119,10 @@ LMResult LanguageModelDALM::GetValue(const vector &contextFactor, S ret.score = score; // hash of n-1 words to use as state + size_t startPos = (contextFactor.size() < m_nGramOrder) ? 0 : 1; + size_t hash = 0; - for (size_t i = 1; i < contextFactor.size(); ++i) { + for (size_t i = startPos; i < contextFactor.size(); ++i) { const Word &word = *contextFactor[i]; const Factor *factor = word.GetFactor(m_factorType); boost::hash_combine(hash, factor); From 3f8972d67b4fb566a3ad644c61dc9745e6a14d74 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 15 Nov 2013 17:43:41 +0000 Subject: [PATCH 10/12] move collection of generation-table from Staticdata to GenerationDictionary --- moses-cmd/Main.cpp | 2 +- moses/GenerationDictionary.cpp | 3 +++ moses/GenerationDictionary.h | 6 ++++++ moses/Manager.cpp | 6 +++--- moses/StaticData.cpp | 7 ++++--- moses/StaticData.h | 6 ------ 6 files changed, 17 insertions(+), 13 deletions(-) diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp index 6e12675da..0e60c88df 100644 --- a/moses-cmd/Main.cpp +++ b/moses-cmd/Main.cpp @@ -534,7 +534,7 @@ void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream) for( size_t i=0; i& gds = staticData.GetGenerationDictionaries(); + const vector& gds = GenerationDictionary::GetColl(); for( size_t i=0; i GenerationDictionary::s_staticColl; GenerationDictionary::GenerationDictionary(const std::string &line) : DecodeFeature(line) { + s_staticColl.push_back(this); + ReadParameters(); } diff --git a/moses/GenerationDictionary.h b/moses/GenerationDictionary.h index a22cb930c..257162ae1 100644 --- a/moses/GenerationDictionary.h +++ b/moses/GenerationDictionary.h @@ -46,12 +46,18 @@ class GenerationDictionary : public DecodeFeature { typedef std::map Collection; protected: + static std::vector s_staticColl; + Collection m_collection; // 1st = source // 2nd = target std::string m_filePath; public: + static const std::vector& GetColl() { + return s_staticColl; + } + GenerationDictionary(const std::string &line); virtual ~GenerationDictionary(); diff --git a/moses/Manager.cpp b/moses/Manager.cpp index 697eef347..fbf721ffa 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -691,7 +691,7 @@ void Manager::OutputFeatureWeightsForSLF(std::ostream &outputSearchGraphStream) for( size_t i=0; i& gds = staticData.GetGenerationDictionaries(); + const vector& gds = GenerationDictionary::GetColl(); for( size_t i=0; i& gds = staticData.GetGenerationDictionaries(); + const vector& gds = GenerationDictionary::GetColl(); for( size_t i=0; i& gds = staticData.GetGenerationDictionaries(); + const vector& gds = GenerationDictionary::GetColl(); for( size_t i=0; i &mappingVector = m_parameter->GetParam("mapping"); const vector &maxChartSpans = Scan(m_parameter->GetParam("max-chart-span")); const vector& pts = PhraseDictionary::GetColl(); + const vector& gens = GenerationDictionary::GetColl(); const std::vector *featuresRemaining = &FeatureFunction::GetFeatureFunctions(); DecodeStep *prev = 0; @@ -698,14 +699,14 @@ bool StaticData::LoadDecodeGraphs() decodeStep = new DecodeStepTranslation(pts[index], prev, *featuresRemaining); break; case Generate: - if(index>=m_generationDictionary.size()) { + if(index>=gens.size()) { stringstream strme; strme << "No generation dictionary with index " << index << " available!"; UserMessage::Add(strme.str()); CHECK(false); } - decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev, *featuresRemaining); + decodeStep = new DecodeStepGeneration(gens[index], prev, *featuresRemaining); break; case InsertNullFertilityWord: CHECK(!"Please implement NullFertilityInsertion."); @@ -901,7 +902,7 @@ void StaticData::LoadFeatureFunctions() doLoad = false; } else if (const GenerationDictionary *ffCast = dynamic_cast(ff)) { - m_generationDictionary.push_back(ffCast); + // do nothing } else if (WordPenaltyProducer *ffCast = dynamic_cast(ff)) { CHECK(m_wpProducer == NULL); // max 1 feature; diff --git a/moses/StaticData.h b/moses/StaticData.h index 51ce0d8d8..78d1bced9 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -47,7 +47,6 @@ namespace Moses { class InputType; -class GenerationDictionary; class DecodeGraph; class DecodeStep; class WordPenaltyProducer; @@ -66,7 +65,6 @@ class StaticData private: static StaticData s_instance; protected: - std::vector m_generationDictionary; Parameter *m_parameter; std::vector m_inputFactorOrder, m_outputFactorOrder; mutable ScoreComponentCollection m_allWeights; @@ -721,10 +719,6 @@ public: float GetWeightWordPenalty() const; float GetWeightUnknownWordPenalty() const; - const std::vector& GetGenerationDictionaries() const { - return m_generationDictionary; - } - const std::vector& GetDecodeGraphs() const { return m_decodeGraphs; } From 682563df761ba2d60e806be573090ac734e0c8b0 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 15 Nov 2013 18:29:22 +0000 Subject: [PATCH 11/12] UTIL_THROW_IF2 with base Exception class --- util/exception.hh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/util/exception.hh b/util/exception.hh index 74046cf90..d73e898ee 100644 --- a/util/exception.hh +++ b/util/exception.hh @@ -98,6 +98,9 @@ template typename Except::template ExceptionTag Date: Fri, 15 Nov 2013 20:14:10 +0000 Subject: [PATCH 12/12] replace CHECK with UTIL_THROW_IF. It's gonna be a fun week --- OnDiskPt/OnDiskWrapper.cpp | 77 ++++++++++++++++++++++++++++---------- OnDiskPt/Phrase.cpp | 9 +++-- OnDiskPt/PhraseNode.cpp | 3 +- util/exception.hh | 7 ++++ 4 files changed, 72 insertions(+), 24 deletions(-) diff --git a/OnDiskPt/OnDiskWrapper.cpp b/OnDiskPt/OnDiskWrapper.cpp index 3d1dd160b..700b62678 100644 --- a/OnDiskPt/OnDiskWrapper.cpp +++ b/OnDiskPt/OnDiskWrapper.cpp @@ -21,10 +21,11 @@ #include #endif #include -#include "util/check.hh" #include #include "OnDiskWrapper.h" #include "moses/Factor.h" +#include "util/check.hh" +#include "util/exception.hh" using namespace std; @@ -59,19 +60,29 @@ bool OnDiskWrapper::BeginLoad(const std::string &filePath) bool OnDiskWrapper::OpenForLoad(const std::string &filePath) { m_fileSource.open((filePath + "/Source.dat").c_str(), ios::in | ios::binary); - CHECK(m_fileSource.is_open()); + UTIL_THROW_IF(!m_fileSource.is_open(), + util::FileOpenException, + "Couldn't open file " << filePath << "/Source.dat"); m_fileTargetInd.open((filePath + "/TargetInd.dat").c_str(), ios::in | ios::binary); - CHECK(m_fileTargetInd.is_open()); + UTIL_THROW_IF(!m_fileTargetInd.is_open(), + util::FileOpenException, + "Couldn't open file " << filePath << "/TargetInd.dat"); m_fileTargetColl.open((filePath + "/TargetColl.dat").c_str(), ios::in | ios::binary); - CHECK(m_fileTargetColl.is_open()); + UTIL_THROW_IF(!m_fileTargetColl.is_open(), + util::FileOpenException, + "Couldn't open file " << filePath << "/TargetColl.dat"); m_fileVocab.open((filePath + "/Vocab.dat").c_str(), ios::in); - CHECK(m_fileVocab.is_open()); + UTIL_THROW_IF(!m_fileVocab.is_open(), + util::FileOpenException, + "Couldn't open file " << filePath << "/Vocab.dat"); m_fileMisc.open((filePath + "/Misc.dat").c_str(), ios::in); - CHECK(m_fileMisc.is_open()); + UTIL_THROW_IF(!m_fileMisc.is_open(), + util::FileOpenException, + "Couldn't open file " << filePath << "/Misc.dat"); // set up root node LoadMisc(); @@ -89,7 +100,9 @@ bool OnDiskWrapper::LoadMisc() while(m_fileMisc.getline(line, 100000)) { vector tokens; Moses::Tokenize(tokens, line); - CHECK(tokens.size() == 2); + UTIL_THROW_IF2(tokens.size() != 2, "Except key value. Found " << line); + + const string &key = tokens[0]; m_miscInfo[key] = Moses::Scan(tokens[1]); } @@ -112,33 +125,52 @@ bool OnDiskWrapper::BeginSave(const std::string &filePath #endif m_fileSource.open((filePath + "/Source.dat").c_str(), ios::out | ios::in | ios::binary | ios::ate | ios::trunc); - CHECK(m_fileSource.is_open()); + UTIL_THROW_IF(!m_fileSource.is_open(), + util::FileOpenException, + "Couldn't open file " << filePath << "/Source.dat"); m_fileTargetInd.open((filePath + "/TargetInd.dat").c_str(), ios::out | ios::binary | ios::ate | ios::trunc); - CHECK(m_fileTargetInd.is_open()); + UTIL_THROW_IF(!m_fileTargetInd.is_open(), + util::FileOpenException, + "Couldn't open file " << filePath << "/TargetInd.dat"); m_fileTargetColl.open((filePath + "/TargetColl.dat").c_str(), ios::out | ios::binary | ios::ate | ios::trunc); - CHECK(m_fileTargetColl.is_open()); + UTIL_THROW_IF(!m_fileTargetColl.is_open(), + util::FileOpenException, + "Couldn't open file " << filePath << "/TargetColl.dat"); m_fileVocab.open((filePath + "/Vocab.dat").c_str(), ios::out | ios::ate | ios::trunc); - CHECK(m_fileVocab.is_open()); + UTIL_THROW_IF(!m_fileVocab.is_open(), + util::FileOpenException, + "Couldn't open file " << filePath << "/Vocab.dat"); m_fileMisc.open((filePath + "/Misc.dat").c_str(), ios::out | ios::ate | ios::trunc); - CHECK(m_fileMisc.is_open()); + UTIL_THROW_IF(!m_fileMisc.is_open(), + util::FileOpenException, + "Couldn't open file " << filePath << "/Misc.dat"); // offset by 1. 0 offset is reserved char c = 0xff; m_fileSource.write(&c, 1); - CHECK(1 == m_fileSource.tellp()); + UTIL_THROW_IF(1 != m_fileSource.tellp(), + util::Exception, + "Couldn't write to stream m_fileSource"); m_fileTargetInd.write(&c, 1); - CHECK(1 == m_fileTargetInd.tellp()); + UTIL_THROW_IF(1 != m_fileTargetInd.tellp(), + util::Exception, + "Couldn't write to stream m_fileTargetInd"); m_fileTargetColl.write(&c, 1); - CHECK(1 == m_fileTargetColl.tellp()); + UTIL_THROW_IF(1 != m_fileTargetColl.tellp(), + util::Exception, + "Couldn't write to stream m_fileTargetColl"); // set up root node - CHECK(GetNumCounts() == 1); + UTIL_THROW_IF(GetNumCounts() != 1, + util::Exception, + "Not sure what this is..."); + vector counts(GetNumCounts()); counts[0] = DEFAULT_COUNT; m_rootSourceNode = new PhraseNode(); @@ -150,7 +182,7 @@ bool OnDiskWrapper::BeginSave(const std::string &filePath void OnDiskWrapper::EndSave() { bool ret = m_rootSourceNode->Saved(); - CHECK(ret); + UTIL_THROW_IF(!ret, util::Exception, "Root node not saved"); GetVocab().Save(*this); @@ -187,7 +219,10 @@ UINT64 OnDiskWrapper::GetMisc(const std::string &key) const { std::map::const_iterator iter; iter = m_miscInfo.find(key); - CHECK(iter != m_miscInfo.end()); + UTIL_THROW_IF(iter == m_miscInfo.end() + , util::Exception + , "Couldn't find value for key " << key + ); return iter->second; } @@ -201,7 +236,7 @@ Word *OnDiskWrapper::ConvertFromMoses(const std::vector &fact size_t factorType = factorsVec[0]; const Moses::Factor *factor = origWord.GetFactor(factorType); - CHECK(factor); + UTIL_THROW_IF(factor == NULL, util::Exception, "Expecting factor " << factorType); strme << factor->GetString(); for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) { @@ -211,7 +246,9 @@ Word *OnDiskWrapper::ConvertFromMoses(const std::vector &fact // can have less factors than factorType.size() break; } - CHECK(factor); + UTIL_THROW_IF(factor == NULL, + util::Exception, + "Expecting factor " << factorType << " at position " << ind); strme << "|" << factor->GetString(); } // for (size_t factorType diff --git a/OnDiskPt/Phrase.cpp b/OnDiskPt/Phrase.cpp index 32bc9b169..1c265c3a3 100644 --- a/OnDiskPt/Phrase.cpp +++ b/OnDiskPt/Phrase.cpp @@ -18,9 +18,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include -#include "util/check.hh" #include "moses/Util.h" #include "Phrase.h" +#include "util/check.hh" +#include "util/exception.hh" using namespace std; @@ -35,7 +36,9 @@ void Phrase::AddWord(WordPtr word) void Phrase::AddWord(WordPtr word, size_t pos) { - CHECK(pos < m_words.size()); + UTIL_THROW_IF(!(pos < m_words.size()), + util::Exception, + "Trying to get word " << pos << " when phrase size is " << m_words.size()); m_words.insert(m_words.begin() + pos + 1, word); } @@ -59,7 +62,7 @@ int Phrase::Compare(const Phrase &compare) const } if (ret == 0) { - CHECK(compare.GetSize() >= GetSize()); + assert(compare.GetSize() >= GetSize()); ret = (compare.GetSize() > GetSize()) ? 1 : 0; } return ret; diff --git a/OnDiskPt/PhraseNode.cpp b/OnDiskPt/PhraseNode.cpp index c259aa077..d141a1af2 100644 --- a/OnDiskPt/PhraseNode.cpp +++ b/OnDiskPt/PhraseNode.cpp @@ -17,12 +17,13 @@ License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ -#include "util/check.hh" #include "PhraseNode.h" #include "OnDiskWrapper.h" #include "TargetPhraseCollection.h" #include "SourcePhrase.h" #include "moses/Util.h" +#include "util/check.hh" +#include "util/exception.hh" using namespace std; diff --git a/util/exception.hh b/util/exception.hh index d73e898ee..0298272bd 100644 --- a/util/exception.hh +++ b/util/exception.hh @@ -114,6 +114,13 @@ class ErrnoException : public Exception { int errno_; }; +// file wasn't there, or couldn't be open for some reason +class FileOpenException : public Exception { + public: + FileOpenException() throw() {} + ~FileOpenException() throw() {} +}; + // Utilities for overflow checking. class OverflowException : public Exception { public: