From 9a089b5baad9233e9fc979d41a6e226acfe53075 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 28 Jun 2016 11:59:10 +0100 Subject: [PATCH] import OnDisk pt --- .../moses2/FF/FeatureFunctions.cpp | 14 +- .../moses2/FF/FeatureRegistry.cpp | 2 + contrib/other-builds/moses2/Jamfile | 1 + .../TranslationModel/PhraseTableOnDisk.cpp | 239 ++++++++++++++++++ .../TranslationModel/PhraseTableOnDisk.h | 84 ++++++ 5 files changed, 328 insertions(+), 12 deletions(-) create mode 100644 contrib/other-builds/moses2/TranslationModel/PhraseTableOnDisk.cpp create mode 100644 contrib/other-builds/moses2/TranslationModel/PhraseTableOnDisk.h diff --git a/contrib/other-builds/moses2/FF/FeatureFunctions.cpp b/contrib/other-builds/moses2/FF/FeatureFunctions.cpp index 1a1fb4768..5850cd028 100644 --- a/contrib/other-builds/moses2/FF/FeatureFunctions.cpp +++ b/contrib/other-builds/moses2/FF/FeatureFunctions.cpp @@ -12,21 +12,11 @@ #include "../Scores.h" #include "../MemPool.h" -#include "SkeletonStatelessFF.h" -#include "SkeletonStatefulFF.h" -#include "WordPenalty.h" -#include "PhrasePenalty.h" -#include "Distortion.h" -#include "LexicalReordering/LexicalReordering.h" -#include "../TranslationModel/Memory/PhraseTableMemory.h" -#include "../TranslationModel/ProbingPT.h" +#include "../TranslationModel/PhraseTable.h" #include "../TranslationModel/UnknownWordPenalty.h" -#include "../LM/LanguageModel.h" -//#include "../LM/LanguageModelDALM.h" -#include "../LM/KENLM.h" #include "../SCFG/TargetPhraseImpl.h" -#include "util/exception.hh" #include "../SCFG/Word.h" +#include "util/exception.hh" using namespace std; diff --git a/contrib/other-builds/moses2/FF/FeatureRegistry.cpp b/contrib/other-builds/moses2/FF/FeatureRegistry.cpp index 28aa4258d..b4563c268 100644 --- a/contrib/other-builds/moses2/FF/FeatureRegistry.cpp +++ b/contrib/other-builds/moses2/FF/FeatureRegistry.cpp @@ -1,6 +1,7 @@ #include "FeatureRegistry.h" #include "../TranslationModel/Memory/PhraseTableMemory.h" +#include "../TranslationModel/PhraseTableOnDisk.h" #include "../TranslationModel/ProbingPT.h" #include "../TranslationModel/UnknownWordPenalty.h" @@ -50,6 +51,7 @@ FeatureRegistry::FeatureRegistry() #define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >()); MOSES_FNAME2("PhraseDictionaryMemory", PhraseTableMemory); + MOSES_FNAME2("PhraseDictionaryOnDisk", PhraseTableOnDisk); MOSES_FNAME(ProbingPT); MOSES_FNAME(UnknownWordPenalty); diff --git a/contrib/other-builds/moses2/Jamfile b/contrib/other-builds/moses2/Jamfile index efa40bce6..c44f63614 100644 --- a/contrib/other-builds/moses2/Jamfile +++ b/contrib/other-builds/moses2/Jamfile @@ -50,6 +50,7 @@ alias deps : ../../..//z ../../..//boost_iostreams ../../..//boost_filesystem . LM/GPULM.cpp TranslationModel/PhraseTable.cpp + TranslationModel/PhraseTableOnDisk.cpp TranslationModel/ProbingPT.cpp TranslationModel/UnknownWordPenalty.cpp TranslationModel/Memory/PhraseTableMemory.cpp diff --git a/contrib/other-builds/moses2/TranslationModel/PhraseTableOnDisk.cpp b/contrib/other-builds/moses2/TranslationModel/PhraseTableOnDisk.cpp new file mode 100644 index 000000000..4c2f18c3b --- /dev/null +++ b/contrib/other-builds/moses2/TranslationModel/PhraseTableOnDisk.cpp @@ -0,0 +1,239 @@ +/* + * PhraseTableOnDisk.cpp + * + * Created on: 28 Oct 2015 + * Author: hieu + */ +#include +#include "PhraseTableOnDisk.h" +#include "../System.h" +#include "../Scores.h" +#include "../InputType.h" +#include "../PhraseBased/Manager.h" +#include "../PhraseBased/TargetPhraseImpl.h" +#include "../PhraseBased/InputPath.h" +#include "../PhraseBased/TargetPhrases.h" +#include "../PhraseBased/Sentence.h" +#include "../SCFG/InputPath.h" +#include "../SCFG/TargetPhraseImpl.h" +#include "../SCFG/Manager.h" +#include "../SCFG/Sentence.h" + +using namespace std; + +namespace Moses2 +{ + +PhraseTableOnDisk::PhraseTableOnDisk(size_t startInd, const std::string &line) : + PhraseTable(startInd, line) +{ + ReadParameters(); +} + +PhraseTableOnDisk::~PhraseTableOnDisk() +{ + // TODO Auto-generated destructor stub +} + +void PhraseTableOnDisk::ProcessXML( + const Manager &mgr, + MemPool &pool, + const Sentence &sentence, + InputPaths &inputPaths) const +{ + const Vector &xmlOptions = sentence.GetXMLOptions(); + BOOST_FOREACH(const InputType::XMLOption *xmlOption, xmlOptions) { + TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(pool, *this, mgr.system, xmlOption->GetTranslation()); + + if (xmlOption->prob) { + Scores &scores = target->GetScores(); + scores.PlusEquals(mgr.system, *this, Moses2::TransformScore(xmlOption->prob)); + } + + InputPath *path = inputPaths.GetMatrix().GetValue(xmlOption->startPos, xmlOption->phraseSize - 1); + const SubPhrase &source = path->subPhrase; + + mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, source, *target); + + TargetPhrases *tps = new (pool.Allocate()) TargetPhrases(pool, 1); + + tps->AddTargetPhrase(*target); + mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, source); + + path->AddTargetPhrases(*this, tps); + } +} + +void PhraseTableOnDisk::Lookup(const Manager &mgr, + InputPathsBase &inputPaths) const +{ + BOOST_FOREACH(InputPathBase *pathBase, inputPaths){ + InputPath *path = static_cast(pathBase); + + if (SatisfyBackoff(mgr, *path)) { + const SubPhrase &phrase = path->subPhrase; + + TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path); + path->AddTargetPhrases(*this, tps); + } + } + +} + +TargetPhrases *PhraseTableOnDisk::Lookup(const Manager &mgr, MemPool &pool, + InputPath &inputPath) const +{ + const System &system = mgr.system; + TargetPhrases *tps = NULL; + + // any other pt translate this? + size_t numPt = mgr.system.mappings.size(); + const TargetPhrases **allTPS = + static_cast(inputPath).targetPhrases; + for (size_t i = 0; i < numPt; ++i) { + const TargetPhrases *otherTps = allTPS[i]; + + if (otherTps && otherTps->GetSize()) { + return tps; + } + } + + const SubPhrase &source = inputPath.subPhrase; + const Moses2::Word &sourceWord = source[0]; + const Factor *factor = sourceWord[0]; + + tps = new (pool.Allocate()) TargetPhrases(pool, 1); + + TargetPhraseImpl *target = + new (pool.Allocate()) TargetPhraseImpl(pool, *this, + system, 1); + Moses2::Word &word = (*target)[0]; + + //FactorCollection &fc = system.vocab; + //const Factor *factor = fc.AddFactor("SSS", false); + word[0] = factor; + + Scores &scores = target->GetScores(); + scores.PlusEquals(mgr.system, *this, -100); + + MemPool &memPool = mgr.GetPool(); + system.featureFunctions.EvaluateInIsolation(memPool, system, source, *target); + + tps->AddTargetPhrase(*target); + system.featureFunctions.EvaluateAfterTablePruning(memPool, *tps, source); + + return tps; +} + +void PhraseTableOnDisk::EvaluateInIsolation(const System &system, + const Phrase &source, const TargetPhrase &targetPhrase, Scores &scores, + SCORE *estimatedScore) const +{ + +} + +void PhraseTableOnDisk::InitActiveChart( + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const +{ +} + +void PhraseTableOnDisk::Lookup(MemPool &pool, + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const +{ + const System &system = mgr.system; + + size_t numWords = path.range.GetNumWordsCovered(); + if (numWords > 1) { + // only create 1 word phrases + return; + } + + if (path.GetNumRules()) { + return; + } + + // don't do 1st of last word + if (path.range.GetStartPos() == 0) { + return; + } + const SCFG::Sentence &sentence = static_cast(mgr.GetInput()); + if (path.range.GetStartPos() + 1 == sentence.GetSize()) { + return; + } + + // terminal + const SCFG::Word &lastWord = path.subPhrase.Back(); + //cerr << "PhraseTableOnDisk lastWord=" << lastWord << endl; + + const Factor *factor = lastWord[0]; + SCFG::TargetPhraseImpl *tp = new (pool.Allocate()) SCFG::TargetPhraseImpl(pool, *this, system, 1); + SCFG::Word &word = (*tp)[0]; + word.CreateFromString(system.GetVocab(), system, factor->GetString().as_string()); + + tp->lhs.CreateFromString(system.GetVocab(), system, "[X]"); + + size_t endPos = path.range.GetEndPos(); + const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(endPos, 1); + + SCFG::SymbolBind symbolBind(pool); + symbolBind.Add(subPhrasePath.range, lastWord, NULL); + + Scores &scores = tp->GetScores(); + scores.PlusEquals(mgr.system, *this, -100); + + MemPool &memPool = mgr.GetPool(); + const SubPhrase &source = path.subPhrase; + system.featureFunctions.EvaluateInIsolation(memPool, system, source, *tp); + + path.AddTargetPhrase(pool, *this, symbolBind, tp); +} + +void PhraseTableOnDisk::LookupUnary(MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const +{ +} + +void PhraseTableOnDisk::LookupNT( + MemPool &pool, + const SCFG::Manager &mgr, + const Moses2::Range &subPhraseRange, + const SCFG::InputPath &prevPath, + const SCFG::Stacks &stacks, + SCFG::InputPath &outPath) const +{ + UTIL_THROW2("Not implemented"); +} + +void PhraseTableOnDisk::LookupGivenWord( + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::InputPath &prevPath, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const +{ + UTIL_THROW2("Not implemented"); +} + +void PhraseTableOnDisk::LookupGivenNode( + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const +{ + UTIL_THROW2("Not implemented"); +} + +} + diff --git a/contrib/other-builds/moses2/TranslationModel/PhraseTableOnDisk.h b/contrib/other-builds/moses2/TranslationModel/PhraseTableOnDisk.h new file mode 100644 index 000000000..0bc0b0a51 --- /dev/null +++ b/contrib/other-builds/moses2/TranslationModel/PhraseTableOnDisk.h @@ -0,0 +1,84 @@ +/* + * UnknownWordPenalty.h + * + * Created on: 28 Oct 2015 + * Author: hieu + */ + +#pragma once + +#include "PhraseTable.h" + +namespace Moses2 +{ +class Sentence; +class InputPaths; +class Range; + +class PhraseTableOnDisk : public PhraseTable +{ +public: + PhraseTableOnDisk(size_t startInd, const std::string &line); + virtual ~PhraseTableOnDisk(); + + void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const; + virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, + InputPath &inputPath) const; + + void ProcessXML( + const Manager &mgr, + MemPool &pool, + const Sentence &sentence, + InputPaths &inputPaths) const; + + virtual void + EvaluateInIsolation(const System &system, const Phrase &source, + const TargetPhrase &targetPhrase, Scores &scores, + SCORE *estimatedScore) const; + + virtual void InitActiveChart( + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const; + + void Lookup(MemPool &pool, + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; + + void LookupUnary(MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; + +protected: + virtual void LookupNT( + MemPool &pool, + const SCFG::Manager &mgr, + const Moses2::Range &subPhraseRange, + const SCFG::InputPath &prevPath, + const SCFG::Stacks &stacks, + SCFG::InputPath &outPath) const; + + virtual void LookupGivenWord( + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::InputPath &prevPath, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; + + virtual void LookupGivenNode( + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; +}; + +} +