From 6b1c0837b045217a36a968bb6d6119ef32b0aded Mon Sep 17 00:00:00 2001 From: hieuhoang1972 Date: Sat, 21 Oct 2006 19:00:58 +0000 Subject: [PATCH] added comments git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@907 1f5c12ca-751b-0410-a591-d2e778427230 --- moses/moses.vcproj | 4 -- moses/src/FactorCollection.cpp | 4 +- moses/src/FactorCollection.h | 2 +- moses/src/InputOutput.cpp | 28 --------- moses/src/InputOutput.h | 77 ----------------------- moses/src/LMList.h | 1 + moses/src/LanguageModel.h | 2 +- moses/src/LanguageModelChunking.h | 17 +++-- moses/src/LanguageModelIRST.cpp | 6 +- moses/src/LanguageModelIRST.h | 5 +- moses/src/LanguageModelJoint.h | 9 ++- moses/src/LanguageModelMultiFactor.cpp | 2 +- moses/src/LanguageModelMultiFactor.h | 3 +- moses/src/LanguageModelSingleFactor.cpp | 2 +- moses/src/LanguageModelSingleFactor.h | 3 +- moses/src/LatticePath.cpp | 12 ++-- moses/src/LatticePath.h | 41 ++++++++---- moses/src/LexicalReordering.cpp | 12 ++-- moses/src/LexicalReordering.h | 6 +- moses/src/Manager.cpp | 2 +- moses/src/PDTAimp.h | 4 +- moses/src/PhraseDictionary.cpp | 2 +- moses/src/PhraseDictionary.h | 17 +++-- moses/src/PhraseDictionaryMemory.cpp | 16 +---- moses/src/PhraseDictionaryMemory.h | 5 -- moses/src/PhraseDictionaryNode.h | 9 ++- moses/src/PhraseDictionaryTreeAdaptor.cpp | 2 +- moses/src/StaticData.cpp | 4 +- 28 files changed, 109 insertions(+), 188 deletions(-) delete mode 100644 moses/src/InputOutput.cpp delete mode 100755 moses/src/InputOutput.h diff --git a/moses/moses.vcproj b/moses/moses.vcproj index 2338e9d08..f742ca6b0 100755 --- a/moses/moses.vcproj +++ b/moses/moses.vcproj @@ -553,10 +553,6 @@ RelativePath=".\src\mempool.h" > - - diff --git a/moses/src/FactorCollection.cpp b/moses/src/FactorCollection.cpp index 1a276c2ba..6eaa79791 100755 --- a/moses/src/FactorCollection.cpp +++ b/moses/src/FactorCollection.cpp @@ -29,9 +29,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA using namespace std; -void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorType, const string &fileName) +void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorType, const string &filePath) { - ifstream inFile(fileName.c_str()); + ifstream inFile(filePath.c_str()); string line; diff --git a/moses/src/FactorCollection.h b/moses/src/FactorCollection.h index b39fc3d9a..635f15b36 100755 --- a/moses/src/FactorCollection.h +++ b/moses/src/FactorCollection.h @@ -72,7 +72,7 @@ public: */ const Factor *AddFactor(FactorDirection direction, FactorType factorType, const std::string &factorString); //! Load list of factors. Deprecated - void LoadVocab(FactorDirection direction, FactorType factorType, const std::string &fileName); + void LoadVocab(FactorDirection direction, FactorType factorType, const std::string &filePath); TO_STRING(); diff --git a/moses/src/InputOutput.cpp b/moses/src/InputOutput.cpp deleted file mode 100644 index 7d49dd21d..000000000 --- a/moses/src/InputOutput.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// $Id$ -#include "InputOutput.h" -#include "InputType.h" - -InputOutput::InputOutput() : m_translationId(0) {} - -InputOutput::~InputOutput() {} - -void InputOutput::Release(InputType *s) {delete s;} - - -InputType* InputOutput::GetInput(InputType *inputType - , std::istream &inputStream - , const std::vector &factorOrder - , FactorCollection &factorCollection) -{ - if(inputType->Read(inputStream,factorOrder,factorCollection)) - { - inputType->SetTranslationId(m_translationId++); - return inputType; - } - else - { - delete inputType; - return NULL; - } -} - diff --git a/moses/src/InputOutput.h b/moses/src/InputOutput.h deleted file mode 100755 index 3c08d1acf..000000000 --- a/moses/src/InputOutput.h +++ /dev/null @@ -1,77 +0,0 @@ -// $Id$ - -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#pragma once - -#include -#include -#include -#include "TypeDef.h" - -class Hypothesis; -class LatticePathList; -class FactorCollection; -class InputType; - -/** Abstract class that represent a device which through the Moses library reads and writes data. -* The users of the library, eg. moses-cmd, should create a class which is inherited from this -* class -*/ -class InputOutput -{ -protected: - long m_translationId; - - // constructor - InputOutput(); - - /** fill inputType (currently either a Sentence or ConfusionNet) by calling its Read() function. - * Return the same inputType, or delete and return NULL if unsuccessful - */ - InputType* GetInput(InputType * inputType - , std::istream &inputStream - , const std::vector &factorOrder - , FactorCollection &factorCollection); - -public: - virtual ~InputOutput(); - - /** return a sentence or confusion network with data read from file or stdin - \param in empty InputType to be filled with data - */ - virtual InputType* GetInput(InputType *in) = 0; - - /** return the best translation in hypo, or NULL if no translation was possible - \param hypo return arg of best translation found by decoder - \param translationId id of the input - \param reportSegmentation set to true if segmentation info required. Outputs to stdout - \reportAllFactors output all factors, rather than just output factors. Not sure if needed now we know which output factors we want - */ - virtual void SetOutput(const Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors) = 0; - - /** return n-best list via the arg nBestList */ - virtual void SetNBest(const LatticePathList &nBestList, long translationId) = 0; - - //! delete InputType - virtual void Release(InputType *inputType); - - void ResetTranslationId() { m_translationId = 0; } -}; diff --git a/moses/src/LMList.h b/moses/src/LMList.h index b8ba15c0b..7230a205f 100644 --- a/moses/src/LMList.h +++ b/moses/src/LMList.h @@ -8,6 +8,7 @@ class Phrase; class ScoreColl; class ScoreComponentCollection; +//! List of language models class LMList : public std::list < LanguageModel* > { public: diff --git a/moses/src/LanguageModel.h b/moses/src/LanguageModel.h index 587992a6b..170208e4e 100755 --- a/moses/src/LanguageModel.h +++ b/moses/src/LanguageModel.h @@ -38,7 +38,7 @@ class LanguageModel : public ScoreProducer { protected: float m_weight; //! scoring weight. Shouldn't this now be superceded by ScoreProducer??? - std::string m_filename; //! for debugging purposes + std::string m_filePath; //! for debugging purposes size_t m_nGramOrder; //! max n-gram length contained in this LM Word m_sentenceStartArray, m_sentenceEndArray; //! Contains factors which represents the beging and end words for this LM. //! Usually and diff --git a/moses/src/LanguageModelChunking.h b/moses/src/LanguageModelChunking.h index fd1c0c224..fc2a69cab 100644 --- a/moses/src/LanguageModelChunking.h +++ b/moses/src/LanguageModelChunking.h @@ -28,6 +28,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "Phrase.h" #include "FactorCollection.h" +/* Hacked up LM which skips any factor with string '---' +* order of chunk hardcoded to 3 (m_realNGramOrder) +*/ class LanguageModelChunking : public LanguageModelSingleFactor { protected: @@ -35,6 +38,9 @@ protected: LanguageModelSingleFactor *m_lmImpl; public: + /** Constructor + * \param lmImpl SRI or IRST LM which this LM can use to load data + */ LanguageModelChunking(LanguageModelSingleFactor *lmImpl, bool registerScore) : LanguageModelSingleFactor(registerScore) { @@ -44,7 +50,7 @@ public: { delete m_lmImpl; } - void Load(const std::string &fileName + void Load(const std::string &filePath , FactorCollection &factorCollection , FactorType factorType , float weight @@ -52,16 +58,15 @@ public: { m_factorType = factorType; m_weight = weight; - m_filename = fileName; + m_filePath = filePath; m_nGramOrder = nGramOrder; - // hack. this LM is a joint factor of morph and low POS tag & hacked-up TIGER tag m_realNGramOrder = 3; m_sentenceStartArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, BOS_); m_sentenceEndArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, EOS_); - m_lmImpl->Load(fileName, factorCollection, m_factorType, weight, nGramOrder); + m_lmImpl->Load(filePath, factorCollection, m_factorType, weight, nGramOrder); } float GetValue(const std::vector &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const @@ -78,7 +83,7 @@ public: // only process context where last word is a word we want const Factor *factor = (*contextFactor.back())[m_factorType]; std::string strWord = factor->GetString(); - if (strWord.find("???") == 0) + if (strWord.find("---") == 0) return 0; // add last word @@ -93,7 +98,7 @@ public: const Word &word = *contextFactor[currPos]; factor = word[m_factorType]; std::string strWord = factor->GetString(); - bool skip = strWord.find("???") == 0; + bool skip = strWord.find("---") == 0; if (skip) continue; diff --git a/moses/src/LanguageModelIRST.cpp b/moses/src/LanguageModelIRST.cpp index 43fbe6a06..d811ab6dd 100755 --- a/moses/src/LanguageModelIRST.cpp +++ b/moses/src/LanguageModelIRST.cpp @@ -49,7 +49,7 @@ LanguageModelIRST::~LanguageModelIRST() } -void LanguageModelIRST::Load(const std::string &fileName +void LanguageModelIRST::Load(const std::string &filePath , FactorCollection &factorCollection , FactorType factorType , float weight @@ -58,10 +58,10 @@ void LanguageModelIRST::Load(const std::string &fileName m_factorType = factorType; m_weight = weight; m_nGramOrder = nGramOrder; - m_filename = fileName; + m_filePath = filePath; // Open the input file (possibly gzipped) and load the (possibly binary) model - InputFileStream inp(fileName); + InputFileStream inp(filePath); m_lmtb = new lmtable; m_lmtb->load(inp); diff --git a/moses/src/LanguageModelIRST.h b/moses/src/LanguageModelIRST.h index d7c04092a..a78b8a0cb 100755 --- a/moses/src/LanguageModelIRST.h +++ b/moses/src/LanguageModelIRST.h @@ -35,6 +35,9 @@ class Phrase; class lmtable; // irst lm table class ngram; +/** Implementation of single factor LM using IRST's code. +* This is the default LM for Moses and is available from the same sourceforge repository +*/ class LanguageModelIRST : public LanguageModelSingleFactor { protected: @@ -61,7 +64,7 @@ protected: public: LanguageModelIRST(bool registerScore); ~LanguageModelIRST(); - void Load(const std::string &fileName + void Load(const std::string &filePath , FactorCollection &factorCollection , FactorType factorType , float weight diff --git a/moses/src/LanguageModelJoint.h b/moses/src/LanguageModelJoint.h index 5030ea5df..35c0e78a9 100644 --- a/moses/src/LanguageModelJoint.h +++ b/moses/src/LanguageModelJoint.h @@ -33,6 +33,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA class Phrase; class FactorCollection; +/** LM of multiple factors. A simple extension of single factor LM - factors backoff together. + * Rather slow as this uses string concatenation/split +*/ class LanguageModelJoint : public LanguageModelMultiFactor { protected: @@ -53,7 +56,7 @@ public: delete m_lmImpl; } - void Load(const std::string &fileName + void Load(const std::string &filePath , FactorCollection &factorCollection , const std::vector &factorTypes , float weight @@ -61,7 +64,7 @@ public: { m_factorTypes = FactorMask(factorTypes); m_weight = weight; - m_filename = fileName; + m_filePath = filePath; m_nGramOrder = nGramOrder; m_factorTypesOrdered= factorTypes; @@ -76,7 +79,7 @@ public: m_sentenceEndArray[factorType] = factorCollection.AddFactor(Output, factorType, EOS_); } - m_lmImpl->Load(fileName, factorCollection, m_implFactor, weight, nGramOrder); + m_lmImpl->Load(filePath, factorCollection, m_implFactor, weight, nGramOrder); } float GetValue(const std::vector &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const diff --git a/moses/src/LanguageModelMultiFactor.cpp b/moses/src/LanguageModelMultiFactor.cpp index 275e5a0e5..47daf3cc0 100644 --- a/moses/src/LanguageModelMultiFactor.cpp +++ b/moses/src/LanguageModelMultiFactor.cpp @@ -30,7 +30,7 @@ const std::string LanguageModelMultiFactor::GetScoreProducerDescription() const { std::ostringstream oss; // what about LMs that are over multiple factors at once, POS + stem, for example? - oss << GetNGramOrder() << "-gram LM score, factor-type= ??? " << ", file=" << m_filename; + oss << GetNGramOrder() << "-gram LM score, factor-type= ??? " << ", file=" << m_filePath; return oss.str(); } diff --git a/moses/src/LanguageModelMultiFactor.h b/moses/src/LanguageModelMultiFactor.h index fded2124b..43d59978b 100644 --- a/moses/src/LanguageModelMultiFactor.h +++ b/moses/src/LanguageModelMultiFactor.h @@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA class Phrase; +//! Abstract class for for multi factor LM class LanguageModelMultiFactor : public LanguageModel { protected: @@ -37,7 +38,7 @@ protected: LanguageModelMultiFactor(bool registerScore); public: - virtual void Load(const std::string &fileName + virtual void Load(const std::string &filePath , FactorCollection &factorCollection , const std::vector &factorTypes , float weight diff --git a/moses/src/LanguageModelSingleFactor.cpp b/moses/src/LanguageModelSingleFactor.cpp index 8eddb2561..f9a02efda 100644 --- a/moses/src/LanguageModelSingleFactor.cpp +++ b/moses/src/LanguageModelSingleFactor.cpp @@ -47,7 +47,7 @@ const std::string LanguageModelSingleFactor::GetScoreProducerDescription() const { std::ostringstream oss; // what about LMs that are over multiple factors at once, POS + stem, for example? - oss << GetNGramOrder() << "-gram LM score, factor-type=" << GetFactorType() << ", file=" << m_filename; + oss << GetNGramOrder() << "-gram LM score, factor-type=" << GetFactorType() << ", file=" << m_filePath; return oss.str(); } diff --git a/moses/src/LanguageModelSingleFactor.h b/moses/src/LanguageModelSingleFactor.h index d9d7b1a15..5ed2c86e5 100644 --- a/moses/src/LanguageModelSingleFactor.h +++ b/moses/src/LanguageModelSingleFactor.h @@ -27,6 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA class FactorCollection; class Factor; +//! Abstract class for for single factor LM class LanguageModelSingleFactor : public LanguageModel { protected: @@ -39,7 +40,7 @@ public: static State UnknownState; virtual ~LanguageModelSingleFactor(); - virtual void Load(const std::string &fileName + virtual void Load(const std::string &filePath , FactorCollection &factorCollection , FactorType factorType , float weight diff --git a/moses/src/LatticePath.cpp b/moses/src/LatticePath.cpp index d6721304f..85068c402 100755 --- a/moses/src/LatticePath.cpp +++ b/moses/src/LatticePath.cpp @@ -26,7 +26,7 @@ using namespace std; LatticePath::LatticePath(const Hypothesis *hypo) : m_prevEdgeChanged(NOT_FOUND) -{ // create path OF pure hypo +{ m_scoreBreakdown = hypo->GetScoreBreakdown(); m_totalScore = hypo->GetTotalScore(); @@ -70,15 +70,15 @@ LatticePath::LatticePath(const LatticePath ©, size_t edgeIndex, const Hypoth CalcScore(copy, edgeIndex, arc); } -void LatticePath::CalcScore(const LatticePath ©, size_t edgeIndex, const Hypothesis *arc) +void LatticePath::CalcScore(const LatticePath &origPath, size_t edgeIndex, const Hypothesis *arc) { ScoreComponentCollection adj = arc->GetScoreBreakdown(); - adj.MinusEquals(copy.m_path[edgeIndex]->GetScoreBreakdown()); - m_scoreBreakdown = copy.m_scoreBreakdown; + adj.MinusEquals(origPath.m_path[edgeIndex]->GetScoreBreakdown()); + m_scoreBreakdown = origPath.m_scoreBreakdown; m_scoreBreakdown.PlusEquals(adj); - float fadj = arc->GetTotalScore() - copy.m_path[edgeIndex]->GetTotalScore(); - m_totalScore = copy.GetTotalScore() + fadj; + float fadj = arc->GetTotalScore() - origPath.m_path[edgeIndex]->GetTotalScore(); + m_totalScore = origPath.GetTotalScore() + fadj; } void LatticePath::CreateDeviantPaths(LatticePathCollection &pathColl) const diff --git a/moses/src/LatticePath.h b/moses/src/LatticePath.h index c7a61c340..a67a6699a 100755 --- a/moses/src/LatticePath.h +++ b/moses/src/LatticePath.h @@ -29,44 +29,63 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA class LatticePathCollection; +/** Encapsulate the set of hypotheses/arcs that goes from decoding 1 phrase to all the source phrases + * to reach a final translation. For the best translation, this consist of all hypotheses, for the other + * n-best paths, the node on the path can consist of hypotheses or arcs + */ class LatticePath { friend std::ostream& operator<<(std::ostream&, const LatticePath&); protected: - std::vector m_path; - size_t m_prevEdgeChanged; + std::vector m_path; //< list of hypotheses/arcs + size_t m_prevEdgeChanged; /**< the last node that was wiggled to create this path + , or NOT_FOUND if this path is the best trans so consist of only hypos + */ ScoreComponentCollection m_scoreBreakdown; float m_totalScore; - void CalcScore(const LatticePath ©, size_t edgeIndex, const Hypothesis *arc); + /** Calculate m_totalScore & m_scoreBreakdown, taking into account the same score in the + * original path, copy, and the deviation arc + * TODO - check that this is correct when applied to path that just deviated from pure hypo, ie the 2nd constructor below + */ + void CalcScore(const LatticePath &origPath, size_t edgeIndex, const Hypothesis *arc); public: LatticePath(); // not implemented + //! create path OF pure hypo LatticePath(const Hypothesis *hypo); - // create path OF pure hypo + + /** create path FROM pure hypo, deviate at edgeIndex by using arc instead, + * which may change other hypo back from there + */ LatticePath(const LatticePath ©, size_t edgeIndex, const Hypothesis *arc); - // create path FROM pure hypo - // deviate from edgeIndex backwards + + /** create path from ANY hypo + * \param reserve arg not used. To differentiate from other constructor + * deviate from edgeIndex. however, all other edges the same - only correct if prev hypo of original + * & replacing arc are the same + */ LatticePath(const LatticePath ©, size_t edgeIndex, const Hypothesis *arc, bool reserve); - // create path from ANY hypo - // reserve arg not used. to differential from other constructor - // deviate from edgeIndex. however, all other edges the same inline float GetTotalScore() const { return m_totalScore; } + /** list of each hypo/arcs in path. For anything other than the best hypo, it is not possible just to follow the + * m_prevHypo variable in the hypothesis object + */ inline const std::vector &GetEdges() const { return m_path; } - + //! whether or not this consists of only hypos inline bool IsPurePath() const { return m_prevEdgeChanged == NOT_FOUND; } - + + //! create a set of next best paths by wiggling 1 of the node at a time. void CreateDeviantPaths(LatticePathCollection &pathColl) const; inline const ScoreComponentCollection &GetScoreBreakdown() const diff --git a/moses/src/LexicalReordering.cpp b/moses/src/LexicalReordering.cpp index b89a0d551..29eae266f 100755 --- a/moses/src/LexicalReordering.cpp +++ b/moses/src/LexicalReordering.cpp @@ -12,10 +12,10 @@ using namespace std; -/** Load the file pointed to by filename; set up the table according to +/** Load the file pointed to by filePath; set up the table according to * the orientation and condition parameters. Direction will be used * later for computing the score. - * \param filename file that contains the table + * \param filePath file that contains the table * \param orientation orientation as defined in DistortionOrientationType (monotone/msd) * \param direction direction as defined in LexReorderType (forward/backward/bidirectional) * \param condition either conditioned on foreign or foreign+english @@ -23,11 +23,11 @@ using namespace std; * \param input input factors * \param output output factors */ -LexicalReordering::LexicalReordering(const std::string &filename, +LexicalReordering::LexicalReordering(const std::string &filePath, int orientation, int direction, int condition, const std::vector& weights, vector input, vector output) : - m_orientation(orientation), m_condition(condition), m_numScores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output) + m_orientation(orientation), m_condition(condition), m_numScores(weights.size()), m_filePath(filePath), m_sourceFactors(input), m_targetFactors(output) { //add score producer const_cast(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this); @@ -59,7 +59,7 @@ LexicalReordering::LexicalReordering(const std::string &filename, */ void LexicalReordering::LoadFile() { - InputFileStream inFile(m_filename); + InputFileStream inFile(m_filePath); string line = "", key = ""; while (getline(inFile,line)) { @@ -256,5 +256,5 @@ size_t LexicalReordering::GetNumScoreComponents() const /** returns description of the model */ const std::string LexicalReordering::GetScoreProducerDescription() const { - return "Lexicalized reordering score, file=" + m_filename; + return "Lexicalized reordering score, file=" + m_filePath; } diff --git a/moses/src/LexicalReordering.h b/moses/src/LexicalReordering.h index 236c17802..e385d4692 100755 --- a/moses/src/LexicalReordering.h +++ b/moses/src/LexicalReordering.h @@ -75,7 +75,7 @@ private: int m_condition; /**< fe or f */ int m_numScores; /**< 1, 2, 3, or 6 */ int m_numOrientationTypes; /**< 2(mono) or 3(msd) */ - std::string m_filename; /**< probability table location */ + std::string m_filePath; /**< probability table location */ vector m_sourceFactors; /**< source factors to condition on */ vector m_targetFactors; /**< target factors to condition on */ @@ -86,10 +86,10 @@ private: void LoadFile(void); public: - // Constructor: takes 3 arguments -- filename is the path to the + // Constructor: takes 3 arguments -- filePath is the path to the // orientation probability table, orientation is one of {MSD, MONO}, // direction is one of {FOR,BACK,BI}, and condition is one of {F,FE}. - LexicalReordering(const std::string &filename, int orientation, int direction, + LexicalReordering(const std::string &filePath, int orientation, int direction, int condition, const std::vector& weights, vector input, vector output); diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp index 014d8cb85..2dec8bba6 100755 --- a/moses/src/Manager.cpp +++ b/moses/src/Manager.cpp @@ -325,7 +325,7 @@ void Manager::CalcNBest(size_t count, LatticePathList &ret,bool onlyDistinct) co bool addPath = true; if(onlyDistinct) { - // not entirely correct. + // TODO - not entirely correct. // output phrase can't be assumed to only contain factor 0. // have to look in StaticData.GetOutputFactorOrder() to find out what output factors should be std::vector tgtPhrase; diff --git a/moses/src/PDTAimp.h b/moses/src/PDTAimp.h index adac3b9e4..93f921e06 100644 --- a/moses/src/PDTAimp.h +++ b/moses/src/PDTAimp.h @@ -3,9 +3,9 @@ #pragma once -inline bool existsFile(const char* filename) { +inline bool existsFile(const char* filePath) { struct stat mystat; - return (stat(filename,&mystat)==0); + return (stat(filePath,&mystat)==0); } double addLogScale(double x,double y) diff --git a/moses/src/PhraseDictionary.cpp b/moses/src/PhraseDictionary.cpp index fe9879886..42b6cf74f 100644 --- a/moses/src/PhraseDictionary.cpp +++ b/moses/src/PhraseDictionary.cpp @@ -40,7 +40,7 @@ GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const const std::string PhraseDictionary::GetScoreProducerDescription() const { - return "Translation score, file=" + m_filename; + return "Translation score, file=" + m_filePath; } size_t PhraseDictionary::GetNumScoreComponents() const diff --git a/moses/src/PhraseDictionary.h b/moses/src/PhraseDictionary.h index 1b78c5284..4826dc37b 100644 --- a/moses/src/PhraseDictionary.h +++ b/moses/src/PhraseDictionary.h @@ -35,27 +35,36 @@ class StaticData; class InputType; class WordsRange; +/** abstract base class for phrase table classes +*/ class PhraseDictionary : public Dictionary, public ScoreProducer { protected: size_t m_tableLimit; - std::string m_filename; // just for debugging purposes + std::string m_filePath; // just for debugging purposes public: PhraseDictionary(size_t numScoreComponent); virtual ~PhraseDictionary(); - + DecodeType GetDecodeType() const { return Translate; } + //! table limit number. size_t GetTableLimit() const { return m_tableLimit; } - virtual void InitializeForInput(InputType const&) {} + //! Overriden by load on demand phrase tables classes to load data for each input + virtual void InitializeForInput(InputType const &source) {} const std::string GetScoreProducerDescription() const; size_t GetNumScoreComponents() const; + /** set/change translation weights and recalc weighted score for each translation. + * TODO This may be redundant now we use ScoreCollection + */ virtual void SetWeightTransModel(const std::vector &weightT)=0; + //! find list of translations that can translates src. Only for phrase input virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const=0; + //! find list of translations that can translates a portion of src. Used by confusion network decoding virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const; - + //! Create entry for translation of source to targetPhrase virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0; }; diff --git a/moses/src/PhraseDictionaryMemory.cpp b/moses/src/PhraseDictionaryMemory.cpp index a26b2ff44..b06db5b51 100755 --- a/moses/src/PhraseDictionaryMemory.cpp +++ b/moses/src/PhraseDictionaryMemory.cpp @@ -49,7 +49,7 @@ void PhraseDictionaryMemory::Load(const std::vector &input , const StaticData& staticData) { m_tableLimit = tableLimit; - m_filename = filePath; + m_filePath = filePath; //factors m_inputFactors = FactorMask(input); @@ -168,20 +168,6 @@ void PhraseDictionaryMemory::SetWeightTransModel(const vector &weightT) } } -bool PhraseDictionaryMemory::Contains(const vector< vector > &phraseVector - , const list &inputPhraseList - , const vector &inputFactorType) -{ - std::list::const_iterator iter; - for (iter = inputPhraseList.begin() ; iter != inputPhraseList.end() ; ++iter) - { - const Phrase &inputPhrase = *iter; - if (inputPhrase.Contains(phraseVector, inputFactorType)) - return true; - } - return false; -} - TO_STRING_BODY(PhraseDictionaryMemory); // friend diff --git a/moses/src/PhraseDictionaryMemory.h b/moses/src/PhraseDictionaryMemory.h index 8bf9a927d..360f49e86 100755 --- a/moses/src/PhraseDictionaryMemory.h +++ b/moses/src/PhraseDictionaryMemory.h @@ -35,12 +35,7 @@ class PhraseDictionaryMemory : public PhraseDictionary protected: PhraseDictionaryNode m_collection; - // 1st = source - // 2nd = target - bool Contains(const std::vector< std::vector > &phraseVector - , const std::list &inputPhraseList - , const std::vector &inputFactorType); TargetPhraseCollection *CreateTargetPhraseCollection(const Phrase &source); public: diff --git a/moses/src/PhraseDictionaryNode.h b/moses/src/PhraseDictionaryNode.h index a2746fda6..13b9ba7e5 100644 --- a/moses/src/PhraseDictionaryNode.h +++ b/moses/src/PhraseDictionaryNode.h @@ -30,17 +30,24 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA class PhraseDictionaryMemory; +/** One node of the PhraseDictionaryMemory structure +*/ class PhraseDictionaryNode { typedef std::map NodeMap; + + // only these classes are allowed to instantiate this class + friend class PhraseDictionaryMemory; + friend class NodeMap; + protected: NodeMap m_map; TargetPhraseCollection *m_targetPhraseCollection; -public: PhraseDictionaryNode() :m_targetPhraseCollection(NULL) {} +public: ~PhraseDictionaryNode(); void Sort(size_t tableLimit); diff --git a/moses/src/PhraseDictionaryTreeAdaptor.cpp b/moses/src/PhraseDictionaryTreeAdaptor.cpp index 8d1eb73ff..cced0d5a8 100644 --- a/moses/src/PhraseDictionaryTreeAdaptor.cpp +++ b/moses/src/PhraseDictionaryTreeAdaptor.cpp @@ -59,7 +59,7 @@ void PhraseDictionaryTreeAdaptor::Create(const std::vector &input <<" "< specification = Tokenize(lrFileVector[i]," "); if (specification.size() != 4 ) { - TRACE_ERR("ERROR: Expected format 'factors type weight-count filename' in specification of distortion file " << i << std::endl << lrFileVector[i] << std::endl); + TRACE_ERR("ERROR: Expected format 'factors type weight-count filePath' in specification of distortion file " << i << std::endl << lrFileVector[i] << std::endl); return false; } @@ -335,7 +335,7 @@ bool StaticData::LoadParameters(int argc, char* argv[]) vector token = Tokenize(lmVector[i]); if (token.size() != 4 ) { - TRACE_ERR("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filename'"); + TRACE_ERR("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath'"); return false; } // type = implementation, SRI, IRST etc