mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 07:42:21 +03:00
added comments
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@907 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
9b920e08a2
commit
6b1c0837b0
@ -553,10 +553,6 @@
|
||||
RelativePath=".\src\mempool.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\NGramNode.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\ObjectPool.h"
|
||||
>
|
||||
|
@ -29,9 +29,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
using namespace std;
|
||||
|
||||
void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorType, const string &fileName)
|
||||
void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorType, const string &filePath)
|
||||
{
|
||||
ifstream inFile(fileName.c_str());
|
||||
ifstream inFile(filePath.c_str());
|
||||
|
||||
string line;
|
||||
|
||||
|
@ -72,7 +72,7 @@ public:
|
||||
*/
|
||||
const Factor *AddFactor(FactorDirection direction, FactorType factorType, const std::string &factorString);
|
||||
//! Load list of factors. Deprecated
|
||||
void LoadVocab(FactorDirection direction, FactorType factorType, const std::string &fileName);
|
||||
void LoadVocab(FactorDirection direction, FactorType factorType, const std::string &filePath);
|
||||
|
||||
TO_STRING();
|
||||
|
||||
|
@ -1,28 +0,0 @@
|
||||
// $Id$
|
||||
#include "InputOutput.h"
|
||||
#include "InputType.h"
|
||||
|
||||
InputOutput::InputOutput() : m_translationId(0) {}
|
||||
|
||||
InputOutput::~InputOutput() {}
|
||||
|
||||
void InputOutput::Release(InputType *s) {delete s;}
|
||||
|
||||
|
||||
InputType* InputOutput::GetInput(InputType *inputType
|
||||
, std::istream &inputStream
|
||||
, const std::vector<FactorType> &factorOrder
|
||||
, FactorCollection &factorCollection)
|
||||
{
|
||||
if(inputType->Read(inputStream,factorOrder,factorCollection))
|
||||
{
|
||||
inputType->SetTranslationId(m_translationId++);
|
||||
return inputType;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete inputType;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -1,77 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "TypeDef.h"
|
||||
|
||||
class Hypothesis;
|
||||
class LatticePathList;
|
||||
class FactorCollection;
|
||||
class InputType;
|
||||
|
||||
/** Abstract class that represent a device which through the Moses library reads and writes data.
|
||||
* The users of the library, eg. moses-cmd, should create a class which is inherited from this
|
||||
* class
|
||||
*/
|
||||
class InputOutput
|
||||
{
|
||||
protected:
|
||||
long m_translationId;
|
||||
|
||||
// constructor
|
||||
InputOutput();
|
||||
|
||||
/** fill inputType (currently either a Sentence or ConfusionNet) by calling its Read() function.
|
||||
* Return the same inputType, or delete and return NULL if unsuccessful
|
||||
*/
|
||||
InputType* GetInput(InputType * inputType
|
||||
, std::istream &inputStream
|
||||
, const std::vector<FactorType> &factorOrder
|
||||
, FactorCollection &factorCollection);
|
||||
|
||||
public:
|
||||
virtual ~InputOutput();
|
||||
|
||||
/** return a sentence or confusion network with data read from file or stdin
|
||||
\param in empty InputType to be filled with data
|
||||
*/
|
||||
virtual InputType* GetInput(InputType *in) = 0;
|
||||
|
||||
/** return the best translation in hypo, or NULL if no translation was possible
|
||||
\param hypo return arg of best translation found by decoder
|
||||
\param translationId id of the input
|
||||
\param reportSegmentation set to true if segmentation info required. Outputs to stdout
|
||||
\reportAllFactors output all factors, rather than just output factors. Not sure if needed now we know which output factors we want
|
||||
*/
|
||||
virtual void SetOutput(const Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors) = 0;
|
||||
|
||||
/** return n-best list via the arg nBestList */
|
||||
virtual void SetNBest(const LatticePathList &nBestList, long translationId) = 0;
|
||||
|
||||
//! delete InputType
|
||||
virtual void Release(InputType *inputType);
|
||||
|
||||
void ResetTranslationId() { m_translationId = 0; }
|
||||
};
|
@ -8,6 +8,7 @@ class Phrase;
|
||||
class ScoreColl;
|
||||
class ScoreComponentCollection;
|
||||
|
||||
//! List of language models
|
||||
class LMList : public std::list < LanguageModel* >
|
||||
{
|
||||
public:
|
||||
|
@ -38,7 +38,7 @@ class LanguageModel : public ScoreProducer
|
||||
{
|
||||
protected:
|
||||
float m_weight; //! scoring weight. Shouldn't this now be superceded by ScoreProducer???
|
||||
std::string m_filename; //! for debugging purposes
|
||||
std::string m_filePath; //! for debugging purposes
|
||||
size_t m_nGramOrder; //! max n-gram length contained in this LM
|
||||
Word m_sentenceStartArray, m_sentenceEndArray; //! Contains factors which represents the beging and end words for this LM.
|
||||
//! Usually <s> and </s>
|
||||
|
@ -28,6 +28,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "Phrase.h"
|
||||
#include "FactorCollection.h"
|
||||
|
||||
/* Hacked up LM which skips any factor with string '---'
|
||||
* order of chunk hardcoded to 3 (m_realNGramOrder)
|
||||
*/
|
||||
class LanguageModelChunking : public LanguageModelSingleFactor
|
||||
{
|
||||
protected:
|
||||
@ -35,6 +38,9 @@ protected:
|
||||
LanguageModelSingleFactor *m_lmImpl;
|
||||
|
||||
public:
|
||||
/** Constructor
|
||||
* \param lmImpl SRI or IRST LM which this LM can use to load data
|
||||
*/
|
||||
LanguageModelChunking(LanguageModelSingleFactor *lmImpl, bool registerScore)
|
||||
: LanguageModelSingleFactor(registerScore)
|
||||
{
|
||||
@ -44,7 +50,7 @@ public:
|
||||
{
|
||||
delete m_lmImpl;
|
||||
}
|
||||
void Load(const std::string &fileName
|
||||
void Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, FactorType factorType
|
||||
, float weight
|
||||
@ -52,16 +58,15 @@ public:
|
||||
{
|
||||
m_factorType = factorType;
|
||||
m_weight = weight;
|
||||
m_filename = fileName;
|
||||
m_filePath = filePath;
|
||||
m_nGramOrder = nGramOrder;
|
||||
|
||||
// hack. this LM is a joint factor of morph and low POS tag & hacked-up TIGER tag
|
||||
m_realNGramOrder = 3;
|
||||
|
||||
m_sentenceStartArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, BOS_);
|
||||
m_sentenceEndArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, EOS_);
|
||||
|
||||
m_lmImpl->Load(fileName, factorCollection, m_factorType, weight, nGramOrder);
|
||||
m_lmImpl->Load(filePath, factorCollection, m_factorType, weight, nGramOrder);
|
||||
}
|
||||
|
||||
float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
|
||||
@ -78,7 +83,7 @@ public:
|
||||
// only process context where last word is a word we want
|
||||
const Factor *factor = (*contextFactor.back())[m_factorType];
|
||||
std::string strWord = factor->GetString();
|
||||
if (strWord.find("???") == 0)
|
||||
if (strWord.find("---") == 0)
|
||||
return 0;
|
||||
|
||||
// add last word
|
||||
@ -93,7 +98,7 @@ public:
|
||||
const Word &word = *contextFactor[currPos];
|
||||
factor = word[m_factorType];
|
||||
std::string strWord = factor->GetString();
|
||||
bool skip = strWord.find("???") == 0;
|
||||
bool skip = strWord.find("---") == 0;
|
||||
if (skip)
|
||||
continue;
|
||||
|
||||
|
@ -49,7 +49,7 @@ LanguageModelIRST::~LanguageModelIRST()
|
||||
}
|
||||
|
||||
|
||||
void LanguageModelIRST::Load(const std::string &fileName
|
||||
void LanguageModelIRST::Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, FactorType factorType
|
||||
, float weight
|
||||
@ -58,10 +58,10 @@ void LanguageModelIRST::Load(const std::string &fileName
|
||||
m_factorType = factorType;
|
||||
m_weight = weight;
|
||||
m_nGramOrder = nGramOrder;
|
||||
m_filename = fileName;
|
||||
m_filePath = filePath;
|
||||
|
||||
// Open the input file (possibly gzipped) and load the (possibly binary) model
|
||||
InputFileStream inp(fileName);
|
||||
InputFileStream inp(filePath);
|
||||
m_lmtb = new lmtable;
|
||||
m_lmtb->load(inp);
|
||||
|
||||
|
@ -35,6 +35,9 @@ class Phrase;
|
||||
class lmtable; // irst lm table
|
||||
class ngram;
|
||||
|
||||
/** Implementation of single factor LM using IRST's code.
|
||||
* This is the default LM for Moses and is available from the same sourceforge repository
|
||||
*/
|
||||
class LanguageModelIRST : public LanguageModelSingleFactor
|
||||
{
|
||||
protected:
|
||||
@ -61,7 +64,7 @@ protected:
|
||||
public:
|
||||
LanguageModelIRST(bool registerScore);
|
||||
~LanguageModelIRST();
|
||||
void Load(const std::string &fileName
|
||||
void Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, FactorType factorType
|
||||
, float weight
|
||||
|
@ -33,6 +33,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
class Phrase;
|
||||
class FactorCollection;
|
||||
|
||||
/** LM of multiple factors. A simple extension of single factor LM - factors backoff together.
|
||||
* Rather slow as this uses string concatenation/split
|
||||
*/
|
||||
class LanguageModelJoint : public LanguageModelMultiFactor
|
||||
{
|
||||
protected:
|
||||
@ -53,7 +56,7 @@ public:
|
||||
delete m_lmImpl;
|
||||
}
|
||||
|
||||
void Load(const std::string &fileName
|
||||
void Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, const std::vector<FactorType> &factorTypes
|
||||
, float weight
|
||||
@ -61,7 +64,7 @@ public:
|
||||
{
|
||||
m_factorTypes = FactorMask(factorTypes);
|
||||
m_weight = weight;
|
||||
m_filename = fileName;
|
||||
m_filePath = filePath;
|
||||
m_nGramOrder = nGramOrder;
|
||||
|
||||
m_factorTypesOrdered= factorTypes;
|
||||
@ -76,7 +79,7 @@ public:
|
||||
m_sentenceEndArray[factorType] = factorCollection.AddFactor(Output, factorType, EOS_);
|
||||
}
|
||||
|
||||
m_lmImpl->Load(fileName, factorCollection, m_implFactor, weight, nGramOrder);
|
||||
m_lmImpl->Load(filePath, factorCollection, m_implFactor, weight, nGramOrder);
|
||||
}
|
||||
|
||||
float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
|
||||
|
@ -30,7 +30,7 @@ const std::string LanguageModelMultiFactor::GetScoreProducerDescription() const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
// what about LMs that are over multiple factors at once, POS + stem, for example?
|
||||
oss << GetNGramOrder() << "-gram LM score, factor-type= ??? " << ", file=" << m_filename;
|
||||
oss << GetNGramOrder() << "-gram LM score, factor-type= ??? " << ", file=" << m_filePath;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
class Phrase;
|
||||
|
||||
//! Abstract class for for multi factor LM
|
||||
class LanguageModelMultiFactor : public LanguageModel
|
||||
{
|
||||
protected:
|
||||
@ -37,7 +38,7 @@ protected:
|
||||
LanguageModelMultiFactor(bool registerScore);
|
||||
|
||||
public:
|
||||
virtual void Load(const std::string &fileName
|
||||
virtual void Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, const std::vector<FactorType> &factorTypes
|
||||
, float weight
|
||||
|
@ -47,7 +47,7 @@ const std::string LanguageModelSingleFactor::GetScoreProducerDescription() const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
// what about LMs that are over multiple factors at once, POS + stem, for example?
|
||||
oss << GetNGramOrder() << "-gram LM score, factor-type=" << GetFactorType() << ", file=" << m_filename;
|
||||
oss << GetNGramOrder() << "-gram LM score, factor-type=" << GetFactorType() << ", file=" << m_filePath;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
class FactorCollection;
|
||||
class Factor;
|
||||
|
||||
//! Abstract class for for single factor LM
|
||||
class LanguageModelSingleFactor : public LanguageModel
|
||||
{
|
||||
protected:
|
||||
@ -39,7 +40,7 @@ public:
|
||||
static State UnknownState;
|
||||
|
||||
virtual ~LanguageModelSingleFactor();
|
||||
virtual void Load(const std::string &fileName
|
||||
virtual void Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, FactorType factorType
|
||||
, float weight
|
||||
|
@ -26,7 +26,7 @@ using namespace std;
|
||||
|
||||
LatticePath::LatticePath(const Hypothesis *hypo)
|
||||
: m_prevEdgeChanged(NOT_FOUND)
|
||||
{ // create path OF pure hypo
|
||||
{
|
||||
m_scoreBreakdown = hypo->GetScoreBreakdown();
|
||||
m_totalScore = hypo->GetTotalScore();
|
||||
|
||||
@ -70,15 +70,15 @@ LatticePath::LatticePath(const LatticePath ©, size_t edgeIndex, const Hypoth
|
||||
CalcScore(copy, edgeIndex, arc);
|
||||
}
|
||||
|
||||
void LatticePath::CalcScore(const LatticePath ©, size_t edgeIndex, const Hypothesis *arc)
|
||||
void LatticePath::CalcScore(const LatticePath &origPath, size_t edgeIndex, const Hypothesis *arc)
|
||||
{
|
||||
ScoreComponentCollection adj = arc->GetScoreBreakdown();
|
||||
adj.MinusEquals(copy.m_path[edgeIndex]->GetScoreBreakdown());
|
||||
m_scoreBreakdown = copy.m_scoreBreakdown;
|
||||
adj.MinusEquals(origPath.m_path[edgeIndex]->GetScoreBreakdown());
|
||||
m_scoreBreakdown = origPath.m_scoreBreakdown;
|
||||
m_scoreBreakdown.PlusEquals(adj);
|
||||
|
||||
float fadj = arc->GetTotalScore() - copy.m_path[edgeIndex]->GetTotalScore();
|
||||
m_totalScore = copy.GetTotalScore() + fadj;
|
||||
float fadj = arc->GetTotalScore() - origPath.m_path[edgeIndex]->GetTotalScore();
|
||||
m_totalScore = origPath.GetTotalScore() + fadj;
|
||||
}
|
||||
|
||||
void LatticePath::CreateDeviantPaths(LatticePathCollection &pathColl) const
|
||||
|
@ -29,44 +29,63 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
class LatticePathCollection;
|
||||
|
||||
/** Encapsulate the set of hypotheses/arcs that goes from decoding 1 phrase to all the source phrases
|
||||
* to reach a final translation. For the best translation, this consist of all hypotheses, for the other
|
||||
* n-best paths, the node on the path can consist of hypotheses or arcs
|
||||
*/
|
||||
class LatticePath
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const LatticePath&);
|
||||
|
||||
protected:
|
||||
std::vector<const Hypothesis *> m_path;
|
||||
size_t m_prevEdgeChanged;
|
||||
std::vector<const Hypothesis *> m_path; //< list of hypotheses/arcs
|
||||
size_t m_prevEdgeChanged; /**< the last node that was wiggled to create this path
|
||||
, or NOT_FOUND if this path is the best trans so consist of only hypos
|
||||
*/
|
||||
|
||||
ScoreComponentCollection m_scoreBreakdown;
|
||||
float m_totalScore;
|
||||
|
||||
void CalcScore(const LatticePath ©, size_t edgeIndex, const Hypothesis *arc);
|
||||
/** Calculate m_totalScore & m_scoreBreakdown, taking into account the same score in the
|
||||
* original path, copy, and the deviation arc
|
||||
* TODO - check that this is correct when applied to path that just deviated from pure hypo, ie the 2nd constructor below
|
||||
*/
|
||||
void CalcScore(const LatticePath &origPath, size_t edgeIndex, const Hypothesis *arc);
|
||||
|
||||
public:
|
||||
LatticePath(); // not implemented
|
||||
|
||||
//! create path OF pure hypo
|
||||
LatticePath(const Hypothesis *hypo);
|
||||
// create path OF pure hypo
|
||||
|
||||
/** create path FROM pure hypo, deviate at edgeIndex by using arc instead,
|
||||
* which may change other hypo back from there
|
||||
*/
|
||||
LatticePath(const LatticePath ©, size_t edgeIndex, const Hypothesis *arc);
|
||||
// create path FROM pure hypo
|
||||
// deviate from edgeIndex backwards
|
||||
|
||||
/** create path from ANY hypo
|
||||
* \param reserve arg not used. To differentiate from other constructor
|
||||
* deviate from edgeIndex. however, all other edges the same - only correct if prev hypo of original
|
||||
* & replacing arc are the same
|
||||
*/
|
||||
LatticePath(const LatticePath ©, size_t edgeIndex, const Hypothesis *arc, bool reserve);
|
||||
// create path from ANY hypo
|
||||
// reserve arg not used. to differential from other constructor
|
||||
// deviate from edgeIndex. however, all other edges the same
|
||||
|
||||
inline float GetTotalScore() const { return m_totalScore; }
|
||||
|
||||
/** list of each hypo/arcs in path. For anything other than the best hypo, it is not possible just to follow the
|
||||
* m_prevHypo variable in the hypothesis object
|
||||
*/
|
||||
inline const std::vector<const Hypothesis *> &GetEdges() const
|
||||
{
|
||||
return m_path;
|
||||
}
|
||||
|
||||
//! whether or not this consists of only hypos
|
||||
inline bool IsPurePath() const
|
||||
{
|
||||
return m_prevEdgeChanged == NOT_FOUND;
|
||||
}
|
||||
|
||||
|
||||
//! create a set of next best paths by wiggling 1 of the node at a time.
|
||||
void CreateDeviantPaths(LatticePathCollection &pathColl) const;
|
||||
|
||||
inline const ScoreComponentCollection &GetScoreBreakdown() const
|
||||
|
@ -12,10 +12,10 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
/** Load the file pointed to by filename; set up the table according to
|
||||
/** Load the file pointed to by filePath; set up the table according to
|
||||
* the orientation and condition parameters. Direction will be used
|
||||
* later for computing the score.
|
||||
* \param filename file that contains the table
|
||||
* \param filePath file that contains the table
|
||||
* \param orientation orientation as defined in DistortionOrientationType (monotone/msd)
|
||||
* \param direction direction as defined in LexReorderType (forward/backward/bidirectional)
|
||||
* \param condition either conditioned on foreign or foreign+english
|
||||
@ -23,11 +23,11 @@ using namespace std;
|
||||
* \param input input factors
|
||||
* \param output output factors
|
||||
*/
|
||||
LexicalReordering::LexicalReordering(const std::string &filename,
|
||||
LexicalReordering::LexicalReordering(const std::string &filePath,
|
||||
int orientation, int direction,
|
||||
int condition, const std::vector<float>& weights,
|
||||
vector<FactorType> input, vector<FactorType> output) :
|
||||
m_orientation(orientation), m_condition(condition), m_numScores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output)
|
||||
m_orientation(orientation), m_condition(condition), m_numScores(weights.size()), m_filePath(filePath), m_sourceFactors(input), m_targetFactors(output)
|
||||
{
|
||||
//add score producer
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
|
||||
@ -59,7 +59,7 @@ LexicalReordering::LexicalReordering(const std::string &filename,
|
||||
*/
|
||||
void LexicalReordering::LoadFile()
|
||||
{
|
||||
InputFileStream inFile(m_filename);
|
||||
InputFileStream inFile(m_filePath);
|
||||
string line = "", key = "";
|
||||
while (getline(inFile,line))
|
||||
{
|
||||
@ -256,5 +256,5 @@ size_t LexicalReordering::GetNumScoreComponents() const
|
||||
/** returns description of the model */
|
||||
const std::string LexicalReordering::GetScoreProducerDescription() const
|
||||
{
|
||||
return "Lexicalized reordering score, file=" + m_filename;
|
||||
return "Lexicalized reordering score, file=" + m_filePath;
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ private:
|
||||
int m_condition; /**< fe or f */
|
||||
int m_numScores; /**< 1, 2, 3, or 6 */
|
||||
int m_numOrientationTypes; /**< 2(mono) or 3(msd) */
|
||||
std::string m_filename; /**< probability table location */
|
||||
std::string m_filePath; /**< probability table location */
|
||||
vector<FactorType> m_sourceFactors; /**< source factors to condition on */
|
||||
vector<FactorType> m_targetFactors; /**< target factors to condition on */
|
||||
|
||||
@ -86,10 +86,10 @@ private:
|
||||
void LoadFile(void);
|
||||
|
||||
public:
|
||||
// Constructor: takes 3 arguments -- filename is the path to the
|
||||
// Constructor: takes 3 arguments -- filePath is the path to the
|
||||
// orientation probability table, orientation is one of {MSD, MONO},
|
||||
// direction is one of {FOR,BACK,BI}, and condition is one of {F,FE}.
|
||||
LexicalReordering(const std::string &filename, int orientation, int direction,
|
||||
LexicalReordering(const std::string &filePath, int orientation, int direction,
|
||||
int condition, const std::vector<float>& weights,
|
||||
vector<FactorType> input, vector<FactorType> output);
|
||||
|
||||
|
@ -325,7 +325,7 @@ void Manager::CalcNBest(size_t count, LatticePathList &ret,bool onlyDistinct) co
|
||||
bool addPath = true;
|
||||
if(onlyDistinct)
|
||||
{
|
||||
// not entirely correct.
|
||||
// TODO - not entirely correct.
|
||||
// output phrase can't be assumed to only contain factor 0.
|
||||
// have to look in StaticData.GetOutputFactorOrder() to find out what output factors should be
|
||||
std::vector<size_t> tgtPhrase;
|
||||
|
@ -3,9 +3,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
inline bool existsFile(const char* filename) {
|
||||
inline bool existsFile(const char* filePath) {
|
||||
struct stat mystat;
|
||||
return (stat(filename,&mystat)==0);
|
||||
return (stat(filePath,&mystat)==0);
|
||||
}
|
||||
|
||||
double addLogScale(double x,double y)
|
||||
|
@ -40,7 +40,7 @@ GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const
|
||||
|
||||
const std::string PhraseDictionary::GetScoreProducerDescription() const
|
||||
{
|
||||
return "Translation score, file=" + m_filename;
|
||||
return "Translation score, file=" + m_filePath;
|
||||
}
|
||||
|
||||
size_t PhraseDictionary::GetNumScoreComponents() const
|
||||
|
@ -35,27 +35,36 @@ class StaticData;
|
||||
class InputType;
|
||||
class WordsRange;
|
||||
|
||||
/** abstract base class for phrase table classes
|
||||
*/
|
||||
class PhraseDictionary : public Dictionary, public ScoreProducer
|
||||
{
|
||||
protected:
|
||||
size_t m_tableLimit;
|
||||
std::string m_filename; // just for debugging purposes
|
||||
std::string m_filePath; // just for debugging purposes
|
||||
|
||||
public:
|
||||
PhraseDictionary(size_t numScoreComponent);
|
||||
virtual ~PhraseDictionary();
|
||||
|
||||
|
||||
DecodeType GetDecodeType() const { return Translate; }
|
||||
//! table limit number.
|
||||
size_t GetTableLimit() const { return m_tableLimit; }
|
||||
|
||||
virtual void InitializeForInput(InputType const&) {}
|
||||
//! Overriden by load on demand phrase tables classes to load data for each input
|
||||
virtual void InitializeForInput(InputType const &source) {}
|
||||
const std::string GetScoreProducerDescription() const;
|
||||
size_t GetNumScoreComponents() const;
|
||||
|
||||
/** set/change translation weights and recalc weighted score for each translation.
|
||||
* TODO This may be redundant now we use ScoreCollection
|
||||
*/
|
||||
virtual void SetWeightTransModel(const std::vector<float> &weightT)=0;
|
||||
|
||||
//! find list of translations that can translates src. Only for phrase input
|
||||
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const=0;
|
||||
//! find list of translations that can translates a portion of src. Used by confusion network decoding
|
||||
virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const;
|
||||
|
||||
//! Create entry for translation of source to targetPhrase
|
||||
virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0;
|
||||
};
|
||||
|
@ -49,7 +49,7 @@ void PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
||||
, const StaticData& staticData)
|
||||
{
|
||||
m_tableLimit = tableLimit;
|
||||
m_filename = filePath;
|
||||
m_filePath = filePath;
|
||||
|
||||
//factors
|
||||
m_inputFactors = FactorMask(input);
|
||||
@ -168,20 +168,6 @@ void PhraseDictionaryMemory::SetWeightTransModel(const vector<float> &weightT)
|
||||
}
|
||||
}
|
||||
|
||||
bool PhraseDictionaryMemory::Contains(const vector< vector<string> > &phraseVector
|
||||
, const list<Phrase> &inputPhraseList
|
||||
, const vector<FactorType> &inputFactorType)
|
||||
{
|
||||
std::list<Phrase>::const_iterator iter;
|
||||
for (iter = inputPhraseList.begin() ; iter != inputPhraseList.end() ; ++iter)
|
||||
{
|
||||
const Phrase &inputPhrase = *iter;
|
||||
if (inputPhrase.Contains(phraseVector, inputFactorType))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
TO_STRING_BODY(PhraseDictionaryMemory);
|
||||
|
||||
// friend
|
||||
|
@ -35,12 +35,7 @@ class PhraseDictionaryMemory : public PhraseDictionary
|
||||
|
||||
protected:
|
||||
PhraseDictionaryNode m_collection;
|
||||
// 1st = source
|
||||
// 2nd = target
|
||||
|
||||
bool Contains(const std::vector< std::vector<std::string> > &phraseVector
|
||||
, const std::list<Phrase> &inputPhraseList
|
||||
, const std::vector<FactorType> &inputFactorType);
|
||||
TargetPhraseCollection *CreateTargetPhraseCollection(const Phrase &source);
|
||||
|
||||
public:
|
||||
|
@ -30,17 +30,24 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
class PhraseDictionaryMemory;
|
||||
|
||||
/** One node of the PhraseDictionaryMemory structure
|
||||
*/
|
||||
class PhraseDictionaryNode
|
||||
{
|
||||
typedef std::map<Word, PhraseDictionaryNode> NodeMap;
|
||||
|
||||
// only these classes are allowed to instantiate this class
|
||||
friend class PhraseDictionaryMemory;
|
||||
friend class NodeMap;
|
||||
|
||||
protected:
|
||||
NodeMap m_map;
|
||||
TargetPhraseCollection *m_targetPhraseCollection;
|
||||
|
||||
public:
|
||||
PhraseDictionaryNode()
|
||||
:m_targetPhraseCollection(NULL)
|
||||
{}
|
||||
public:
|
||||
~PhraseDictionaryNode();
|
||||
|
||||
void Sort(size_t tableLimit);
|
||||
|
@ -59,7 +59,7 @@ void PhraseDictionaryTreeAdaptor::Create(const std::vector<FactorType> &input
|
||||
<<" "<<m_numScoreComponent<<"\n";
|
||||
abort();
|
||||
}
|
||||
m_filename = filePath;
|
||||
m_filePath = filePath;
|
||||
|
||||
// set Dictionary members
|
||||
m_inputFactors = FactorMask(input);
|
||||
|
@ -192,7 +192,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
|
||||
vector<string> specification = Tokenize<string>(lrFileVector[i]," ");
|
||||
if (specification.size() != 4 )
|
||||
{
|
||||
TRACE_ERR("ERROR: Expected format 'factors type weight-count filename' in specification of distortion file " << i << std::endl << lrFileVector[i] << std::endl);
|
||||
TRACE_ERR("ERROR: Expected format 'factors type weight-count filePath' in specification of distortion file " << i << std::endl << lrFileVector[i] << std::endl);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -335,7 +335,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
|
||||
vector<string> token = Tokenize(lmVector[i]);
|
||||
if (token.size() != 4 )
|
||||
{
|
||||
TRACE_ERR("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filename'");
|
||||
TRACE_ERR("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath'");
|
||||
return false;
|
||||
}
|
||||
// type = implementation, SRI, IRST etc
|
||||
|
Loading…
Reference in New Issue
Block a user