mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-11 19:27:11 +03:00
code cleanup - make FactorCollection and StaticData totally accessible only globally
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1218 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
59c4ba9f4d
commit
f3cbacba3e
@ -26,8 +26,6 @@ std::ostream& operator<<(std::ostream& out,const std::vector<T>& x)
|
||||
return out;
|
||||
}
|
||||
|
||||
FactorCollection factorCollection;
|
||||
|
||||
inline bool existsFile(const char* filename) {
|
||||
struct stat mystat;
|
||||
return (stat(filename,&mystat)==0);
|
||||
|
@ -49,13 +49,11 @@ IOStream::IOStream(
|
||||
const vector<FactorType> &inputFactorOrder
|
||||
, const vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, FactorCollection &factorCollection
|
||||
, size_t nBestSize
|
||||
, const string &nBestFilePath)
|
||||
:m_inputFactorOrder(inputFactorOrder)
|
||||
,m_outputFactorOrder(outputFactorOrder)
|
||||
,m_inputFactorUsed(inputFactorUsed)
|
||||
,m_factorCollection(factorCollection)
|
||||
,m_inputFile(NULL)
|
||||
,m_inputStream(&std::cin)
|
||||
,m_nBestStream(NULL)
|
||||
@ -80,14 +78,12 @@ IOStream::IOStream(
|
||||
IOStream::IOStream(const std::vector<FactorType> &inputFactorOrder
|
||||
, const std::vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, FactorCollection &factorCollection
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath
|
||||
, const std::string &inputFilePath)
|
||||
:m_inputFactorOrder(inputFactorOrder)
|
||||
,m_outputFactorOrder(outputFactorOrder)
|
||||
,m_inputFactorUsed(inputFactorUsed)
|
||||
,m_factorCollection(factorCollection)
|
||||
,m_inputFilePath(inputFilePath)
|
||||
,m_inputFile(new InputFileStream(inputFilePath))
|
||||
,m_nBestStream(NULL)
|
||||
@ -123,7 +119,7 @@ IOStream::~IOStream()
|
||||
|
||||
InputType*IOStream::GetInput(InputType* inputType)
|
||||
{
|
||||
if(inputType->Read(*m_inputStream, m_inputFactorOrder, m_factorCollection))
|
||||
if(inputType->Read(*m_inputStream, m_inputFactorOrder))
|
||||
{
|
||||
inputType->SetTranslationId(m_translationId++);
|
||||
return inputType;
|
||||
@ -210,7 +206,7 @@ void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bo
|
||||
|
||||
void IOStream::OutputNBestList(const LatticePathList &nBestList, long translationId)
|
||||
{
|
||||
bool labeledOutput = StaticData::Instance()->IsLabeledNBestList();
|
||||
bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
|
||||
|
||||
LatticePathList::const_iterator iter;
|
||||
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
|
||||
@ -234,10 +230,10 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
|
||||
// basic distortion
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "d: ";
|
||||
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance()->GetDistortionScoreProducer()) << " ";
|
||||
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetDistortionScoreProducer()) << " ";
|
||||
|
||||
// reordering
|
||||
vector<LexicalReordering*> rms = StaticData::Instance()->GetReorderModels();
|
||||
vector<LexicalReordering*> rms = StaticData::Instance().GetReorderModels();
|
||||
if(rms.size() > 0)
|
||||
{
|
||||
vector<LexicalReordering*>::iterator iter;
|
||||
@ -252,7 +248,7 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
|
||||
}
|
||||
|
||||
// lm
|
||||
const LMList& lml = StaticData::Instance()->GetAllLM();
|
||||
const LMList& lml = StaticData::Instance().GetAllLM();
|
||||
if (lml.size() > 0) {
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "lm: ";
|
||||
@ -263,9 +259,9 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
|
||||
}
|
||||
|
||||
// translation components
|
||||
if (StaticData::Instance()->GetInputType()==0){
|
||||
if (StaticData::Instance().GetInputType()==0){
|
||||
// translation components for text input
|
||||
vector<PhraseDictionary*> pds = StaticData::Instance()->GetPhraseDictionaries();
|
||||
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
|
||||
if (pds.size() > 0) {
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "tm: ";
|
||||
@ -281,7 +277,7 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
|
||||
// translation components for Confusion Network input
|
||||
// first translation component has GetNumInputScores() scores from the input Confusion Network
|
||||
// at the beginning of the vector
|
||||
vector<PhraseDictionary*> pds = StaticData::Instance()->GetPhraseDictionaries();
|
||||
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
|
||||
if (pds.size() > 0) {
|
||||
vector<PhraseDictionary*>::iterator iter;
|
||||
|
||||
@ -318,10 +314,10 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
|
||||
// word penalty
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "w: ";
|
||||
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance()->GetWordPenaltyProducer()) << " ";
|
||||
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetWordPenaltyProducer()) << " ";
|
||||
|
||||
// generation
|
||||
vector<GenerationDictionary*> gds = StaticData::Instance()->GetGenerationDictionaries();
|
||||
vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries();
|
||||
if (gds.size() > 0) {
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "g: ";
|
||||
|
@ -53,7 +53,6 @@ protected:
|
||||
const std::vector<FactorType> &m_inputFactorOrder;
|
||||
const std::vector<FactorType> &m_outputFactorOrder;
|
||||
const FactorMask &m_inputFactorUsed;
|
||||
FactorCollection &m_factorCollection;
|
||||
std::ostream *m_nBestStream;
|
||||
std::string m_inputFilePath;
|
||||
std::istream *m_inputStream;
|
||||
@ -64,14 +63,12 @@ public:
|
||||
IOStream(const std::vector<FactorType> &inputFactorOrder
|
||||
, const std::vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, FactorCollection &factorCollection
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath);
|
||||
|
||||
IOStream(const std::vector<FactorType> &inputFactorOrder
|
||||
, const std::vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, FactorCollection &factorCollection
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath
|
||||
, const std::string &inputFilePath);
|
||||
|
@ -85,8 +85,8 @@ int main(int argc, char* argv[])
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
StaticData staticData;
|
||||
if (!staticData.LoadData(parameter))
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
if (!StaticData::LoadDataStatic(parameter))
|
||||
return EXIT_FAILURE;
|
||||
|
||||
// set up read/writing class
|
||||
@ -120,7 +120,7 @@ int main(int argc, char* argv[])
|
||||
VERBOSE(2,"\nTRANSLATING(" << ++lineCount << "): " << *source);
|
||||
|
||||
staticData.InitializeBeforeSentenceProcessing(*source);
|
||||
Manager manager(*source, staticData);
|
||||
Manager manager(*source);
|
||||
manager.ProcessSentence();
|
||||
ioStream->OutputBestHypo(manager.GetBestHypothesis(), source->GetTranslationId(),
|
||||
staticData.GetReportSegmentation(),
|
||||
@ -164,7 +164,7 @@ int main(int argc, char* argv[])
|
||||
#endif
|
||||
}
|
||||
|
||||
IOStream *GetIODevice(StaticData &staticData)
|
||||
IOStream *GetIODevice(const StaticData &staticData)
|
||||
{
|
||||
IOStream *ioStream;
|
||||
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
|
||||
@ -178,7 +178,6 @@ IOStream *GetIODevice(StaticData &staticData)
|
||||
string filePath = staticData.GetParam("input-file")[0];
|
||||
|
||||
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
||||
, staticData.GetFactorCollection()
|
||||
, staticData.GetNBestSize()
|
||||
, staticData.GetNBestFilePath()
|
||||
, filePath);
|
||||
@ -187,7 +186,6 @@ IOStream *GetIODevice(StaticData &staticData)
|
||||
{
|
||||
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
|
||||
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
||||
, staticData.GetFactorCollection()
|
||||
, staticData.GetNBestSize()
|
||||
, staticData.GetNBestFilePath());
|
||||
}
|
||||
|
@ -39,4 +39,4 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
class IOStream;
|
||||
|
||||
int main(int argc, char* argv[]);
|
||||
IOStream *GetIODevice(StaticData &staticData);
|
||||
IOStream *GetIODevice(const StaticData &staticData);
|
||||
|
@ -91,7 +91,7 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
|
||||
os << std::endl << std::endl;
|
||||
if (doLMStats && lmCalls > 0) {
|
||||
std::vector<unsigned int>::iterator acc = lmAcc.begin();
|
||||
const LMList& lmlist = StaticData::Instance()->GetAllLM();
|
||||
const LMList& lmlist = StaticData::Instance().GetAllLM();
|
||||
LMList::const_iterator i = lmlist.begin();
|
||||
for (; acc != lmAcc.end(); ++acc, ++i) {
|
||||
char buf[256];
|
||||
@ -108,7 +108,7 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
|
||||
}
|
||||
}
|
||||
os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED):" << std::endl;
|
||||
StaticData::Instance()->GetScoreIndexManager().Debug_PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance()->GetAllWeights());
|
||||
StaticData::Instance().GetScoreIndexManager().Debug_PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
|
@ -48,8 +48,11 @@ struct CNStats {
|
||||
CNStats stats;
|
||||
|
||||
|
||||
ConfusionNet::ConfusionNet(FactorCollection* p)
|
||||
: InputType(),m_factorCollection(p) {stats.createOne();}
|
||||
ConfusionNet::ConfusionNet()
|
||||
: InputType()
|
||||
{
|
||||
stats.createOne();
|
||||
}
|
||||
ConfusionNet::~ConfusionNet() {stats.destroyOne();}
|
||||
|
||||
ConfusionNet::ConfusionNet(Sentence const& s)
|
||||
@ -59,11 +62,6 @@ ConfusionNet::ConfusionNet(Sentence const& s)
|
||||
data[i].push_back(std::make_pair(s.GetWord(i),0.0));
|
||||
}
|
||||
|
||||
|
||||
void ConfusionNet::SetFactorCollection(FactorCollection *p)
|
||||
{
|
||||
m_factorCollection=p;
|
||||
}
|
||||
bool ConfusionNet::ReadF(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder,
|
||||
int format)
|
||||
@ -83,10 +81,8 @@ bool ConfusionNet::ReadF(std::istream& in,
|
||||
}
|
||||
|
||||
int ConfusionNet::Read(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder,
|
||||
FactorCollection &factorCollection)
|
||||
const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
SetFactorCollection(&factorCollection);
|
||||
int rv=ReadF(in,factorOrder,0);
|
||||
if(rv) stats.collect(*this);
|
||||
return rv;
|
||||
@ -99,14 +95,13 @@ void ConfusionNet::String2Word(const std::string& s,Word& w,
|
||||
std::vector<std::string> factorStrVector = Tokenize(s, "|");
|
||||
for(size_t i=0;i<factorOrder.size();++i)
|
||||
w.SetFactor(factorOrder[i],
|
||||
m_factorCollection->AddFactor(Input,factorOrder[i],
|
||||
FactorCollection::Instance().AddFactor(Input,factorOrder[i],
|
||||
factorStrVector[i]));
|
||||
}
|
||||
|
||||
bool ConfusionNet::ReadFormat0(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
assert(m_factorCollection);
|
||||
Clear();
|
||||
std::string line;
|
||||
while(getline(in,line)) {
|
||||
@ -140,7 +135,6 @@ bool ConfusionNet::ReadFormat0(std::istream& in,
|
||||
bool ConfusionNet::ReadFormat1(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
assert(m_factorCollection);
|
||||
Clear();
|
||||
std::string line;
|
||||
if(!getline(in,line)) return 0;
|
||||
@ -202,7 +196,7 @@ std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
|
||||
TranslationOptionCollection*
|
||||
ConfusionNet::CreateTranslationOptionCollection() const
|
||||
{
|
||||
size_t maxNoTransOptPerCoverage = StaticData::Instance()->GetMaxNoTransOptPerCoverage();
|
||||
size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
|
||||
TranslationOptionCollection *rv= new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage);
|
||||
assert(rv);
|
||||
return rv;
|
||||
|
@ -17,19 +17,16 @@ class ConfusionNet : public InputType {
|
||||
|
||||
private:
|
||||
std::vector<Column> data;
|
||||
FactorCollection *m_factorCollection;
|
||||
|
||||
bool ReadFormat0(std::istream&,const std::vector<FactorType>& factorOrder);
|
||||
bool ReadFormat1(std::istream&,const std::vector<FactorType>& factorOrder);
|
||||
void String2Word(const std::string& s,Word& w,const std::vector<FactorType>& factorOrder);
|
||||
|
||||
public:
|
||||
ConfusionNet(FactorCollection* p=0);
|
||||
ConfusionNet();
|
||||
~ConfusionNet();
|
||||
|
||||
ConfusionNet(Sentence const& s);
|
||||
|
||||
void SetFactorCollection(FactorCollection*);
|
||||
|
||||
const Column& GetColumn(size_t i) const {assert(i<data.size());return data[i];}
|
||||
const Column& operator[](size_t i) const {return GetColumn(i);}
|
||||
@ -41,8 +38,7 @@ class ConfusionNet : public InputType {
|
||||
bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0);
|
||||
void Print(std::ostream&) const;
|
||||
|
||||
int Read(std::istream& in,const std::vector<FactorType>& factorOrder, FactorCollection &factorCollection);
|
||||
|
||||
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
|
||||
|
||||
Phrase GetSubString(const WordsRange&) const; //TODO not defined
|
||||
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
|
||||
|
@ -100,8 +100,7 @@ public:
|
||||
virtual void Process(const TranslationOption &inputPartialTranslOpt
|
||||
, const DecodeStep &decodeStep
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, FactorCollection &factorCollection
|
||||
, TranslationOptionCollection *toc
|
||||
, TranslationOptionCollection *toc
|
||||
, bool adhereTableLimit) const = 0;
|
||||
|
||||
};
|
||||
|
@ -78,7 +78,6 @@ inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
|
||||
void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
|
||||
, const DecodeStep &decodeStep
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, FactorCollection &factorCollection
|
||||
, TranslationOptionCollection *toc
|
||||
, bool adhereTableLimit) const
|
||||
{
|
||||
|
@ -38,7 +38,6 @@ public:
|
||||
virtual void Process(const TranslationOption &inputPartialTranslOpt
|
||||
, const DecodeStep &decodeStep
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, FactorCollection &factorCollection
|
||||
, TranslationOptionCollection *toc
|
||||
, bool adhereTableLimit) const;
|
||||
|
||||
|
@ -51,7 +51,6 @@ TranslationOption *DecodeStepTranslation::MergeTranslation(const TranslationOpti
|
||||
void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslOpt
|
||||
, const DecodeStep &decodeStep
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, FactorCollection &factorCollection
|
||||
, TranslationOptionCollection *toc
|
||||
, bool adhereTableLimit) const
|
||||
{
|
||||
|
@ -37,7 +37,6 @@ public:
|
||||
virtual void Process(const TranslationOption &inputPartialTranslOpt
|
||||
, const DecodeStep &decodeStep
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, FactorCollection &factorCollection
|
||||
, TranslationOptionCollection *toc
|
||||
, bool adhereTableLimit) const;
|
||||
private:
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
DistortionScoreProducer::DistortionScoreProducer()
|
||||
{
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
|
||||
}
|
||||
|
||||
size_t DistortionScoreProducer::GetNumScoreComponents() const
|
||||
@ -34,7 +34,7 @@ float DistortionScoreProducer::CalculateDistortionScore(const WordsRange &prev,
|
||||
|
||||
WordPenaltyProducer::WordPenaltyProducer()
|
||||
{
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
|
||||
}
|
||||
|
||||
size_t WordPenaltyProducer::GetNumScoreComponents() const
|
||||
|
@ -29,6 +29,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
using namespace std;
|
||||
|
||||
FactorCollection FactorCollection::s_instance;
|
||||
|
||||
void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorType, const string &filePath)
|
||||
{
|
||||
ifstream inFile(filePath.c_str());
|
||||
|
@ -43,16 +43,20 @@ class FactorCollection
|
||||
friend std::ostream& operator<<(std::ostream&, const FactorCollection&);
|
||||
|
||||
protected:
|
||||
size_t m_factorId; /**< unique, contiguous ids, starting from 0, for each factor */
|
||||
|
||||
static FactorCollection s_instance;
|
||||
|
||||
size_t m_factorId; /**< unique, contiguous ids, starting from 0, for each factor */
|
||||
FactorSet m_collection; /**< collection of all factors */
|
||||
StringSet m_factorStringCollection; /**< collection of unique string used by factors */
|
||||
public:
|
||||
//! constructor
|
||||
|
||||
//! constructor. only the 1 static variable can be created
|
||||
FactorCollection()
|
||||
:m_factorId(0)
|
||||
{}
|
||||
|
||||
|
||||
public:
|
||||
static FactorCollection& Instance() { return s_instance; }
|
||||
|
||||
//! Destructor
|
||||
~FactorCollection();
|
||||
|
||||
|
@ -34,15 +34,16 @@ using namespace std;
|
||||
GenerationDictionary::GenerationDictionary(size_t numFeatures)
|
||||
: Dictionary(numFeatures)
|
||||
{
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
|
||||
}
|
||||
|
||||
bool GenerationDictionary::Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, FactorCollection &factorCollection
|
||||
, const std::string &filePath
|
||||
, FactorDirection direction)
|
||||
{
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
const size_t numFeatureValuesInConfig = this->GetNumScoreComponents();
|
||||
|
||||
//factors
|
||||
|
@ -62,7 +62,6 @@ public:
|
||||
//! load data file
|
||||
bool Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, FactorCollection &factorCollection
|
||||
, const std::string &filePath
|
||||
, FactorDirection direction);
|
||||
|
||||
|
@ -53,7 +53,7 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
|
||||
, m_currSourceWordsRange(NOT_FOUND, NOT_FOUND)
|
||||
, m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
|
||||
, m_wordDeleted(false)
|
||||
, m_languageModelStates(StaticData::Instance()->GetLMSize(), LanguageModelSingleFactor::UnknownState)
|
||||
, m_languageModelStates(StaticData::Instance().GetLMSize(), LanguageModelSingleFactor::UnknownState)
|
||||
, m_arcList(NULL)
|
||||
, m_id(0)
|
||||
, m_lmstats(NULL)
|
||||
@ -187,7 +187,7 @@ int Hypothesis::NGramCompare(const Hypothesis &compare) const
|
||||
if (m_sourceCompleted.GetCompressedRepresentation() > compare.m_sourceCompleted.GetCompressedRepresentation()) return 1;
|
||||
if (m_currSourceWordsRange.GetEndPos() < compare.m_currSourceWordsRange.GetEndPos()) return -1;
|
||||
if (m_currSourceWordsRange.GetEndPos() > compare.m_currSourceWordsRange.GetEndPos()) return 1;
|
||||
if (! StaticData::Instance()->GetSourceStartPosMattersForRecombination()) return 0;
|
||||
if (! StaticData::Instance().GetSourceStartPosMattersForRecombination()) return 0;
|
||||
if (m_currSourceWordsRange.GetStartPos() < compare.m_currSourceWordsRange.GetStartPos()) return -1;
|
||||
if (m_currSourceWordsRange.GetStartPos() > compare.m_currSourceWordsRange.GetStartPos()) return 1;
|
||||
return 0;
|
||||
@ -207,7 +207,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
|
||||
LMList::const_iterator iterLM;
|
||||
|
||||
// will be null if LM stats collection is disabled
|
||||
if (StaticData::Instance()->IsComputeLMBackoffStats()) {
|
||||
if (StaticData::Instance().IsComputeLMBackoffStats()) {
|
||||
m_lmstats = new vector<vector<unsigned int> >(languageModels.size(), vector<unsigned int>(0));
|
||||
}
|
||||
|
||||
@ -299,7 +299,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
|
||||
void Hypothesis::CalcDistortionScore()
|
||||
|
||||
{
|
||||
const DistortionScoreProducer *dsp = StaticData::Instance()->GetDistortionScoreProducer();
|
||||
const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
|
||||
float distortionScore = dsp->CalculateDistortionScore(
|
||||
m_prevHypo->GetCurrSourceWordsRange(),
|
||||
this->GetCurrSourceWordsRange()
|
||||
@ -316,8 +316,10 @@ void Hypothesis::ResetScore()
|
||||
/***
|
||||
* calculate the logarithm of our total translation score (sum up components)
|
||||
*/
|
||||
void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &futureScore)
|
||||
void Hypothesis::CalcScore(const SquareMatrix &futureScore)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// DISTORTION COST
|
||||
CalcDistortionScore();
|
||||
|
||||
@ -367,9 +369,9 @@ void Hypothesis::CalcFutureScore(const SquareMatrix &futureScore)
|
||||
}
|
||||
|
||||
// add future costs for distortion model
|
||||
if(StaticData::Instance()->UseDistortionFutureCosts())
|
||||
if(StaticData::Instance().UseDistortionFutureCosts())
|
||||
m_futureScore += m_sourceCompleted.GetFutureCosts( (int)m_currSourceWordsRange.GetEndPos() )
|
||||
* StaticData::Instance()->GetWeightDistortion();
|
||||
* StaticData::Instance().GetWeightDistortion();
|
||||
|
||||
}
|
||||
|
||||
@ -420,9 +422,9 @@ void Hypothesis::CleanupArcList()
|
||||
* However, may not be enough if only unique candidates are needed,
|
||||
* so we'll keep all of arc list if nedd distinct n-best list
|
||||
*/
|
||||
const StaticData *staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData->GetNBestSize();
|
||||
bool distinctNBest = staticData->GetDistinctNBest();
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
bool distinctNBest = staticData.GetDistinctNBest();
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize * 5)
|
||||
{ // prune arc list only if there too many arcs
|
||||
@ -488,7 +490,7 @@ std::string Hypothesis::GetTargetPhraseStringRep(const vector<FactorType> factor
|
||||
std::string Hypothesis::GetSourcePhraseStringRep() const
|
||||
{
|
||||
vector<FactorType> allFactors;
|
||||
const size_t maxSourceFactors = StaticData::Instance()->GetMaxNumFactors(Input);
|
||||
const size_t maxSourceFactors = StaticData::Instance().GetMaxNumFactors(Input);
|
||||
for(size_t i=0; i < maxSourceFactors; i++)
|
||||
{
|
||||
allFactors.push_back(i);
|
||||
@ -498,7 +500,7 @@ std::string Hypothesis::GetSourcePhraseStringRep() const
|
||||
std::string Hypothesis::GetTargetPhraseStringRep() const
|
||||
{
|
||||
vector<FactorType> allFactors;
|
||||
const size_t maxTargetFactors = StaticData::Instance()->GetMaxNumFactors(Output);
|
||||
const size_t maxTargetFactors = StaticData::Instance().GetMaxNumFactors(Output);
|
||||
for(size_t i=0; i < maxTargetFactors; i++)
|
||||
{
|
||||
allFactors.push_back(i);
|
||||
|
@ -140,7 +140,7 @@ public:
|
||||
|
||||
void ResetScore();
|
||||
|
||||
void CalcScore(const StaticData& staticData, const SquareMatrix &futureScore);
|
||||
void CalcScore(const SquareMatrix &futureScore);
|
||||
|
||||
int GetId()const
|
||||
{
|
||||
|
@ -31,7 +31,7 @@ using namespace std;
|
||||
|
||||
HypothesisCollection::HypothesisCollection()
|
||||
{
|
||||
m_nBestIsEnabled = StaticData::Instance()->IsNBestEnabled();
|
||||
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
|
||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||
m_worstScore = -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
@ -80,7 +80,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
|
||||
{
|
||||
if (hypo->GetTotalScore() < m_worstScore)
|
||||
{ // really bad score. don't bother adding hypo into collection
|
||||
StaticData::Instance()->GetSentenceStats().AddDiscarded();
|
||||
StaticData::Instance().GetSentenceStats().AddDiscarded();
|
||||
VERBOSE(3,"discarded, too bad for stack" << std::endl);
|
||||
FREEHYPO(hypo);
|
||||
return;
|
||||
@ -98,7 +98,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
|
||||
Hypothesis *hypoExisting = *iterExisting;
|
||||
assert(iterExisting != m_hypos.end());
|
||||
|
||||
StaticData::Instance()->GetSentenceStats().AddRecombination(*hypo, **iterExisting);
|
||||
StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
|
||||
|
||||
// found existing hypo with same target ending.
|
||||
// keep the best 1
|
||||
@ -174,7 +174,7 @@ void HypothesisCollection::PruneToSize(size_t newSize)
|
||||
{
|
||||
iterator iterRemove = iter++;
|
||||
Remove(iterRemove);
|
||||
StaticData::Instance()->GetSentenceStats().AddPruning();
|
||||
StaticData::Instance().GetSentenceStats().AddPruning();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
virtual size_t GetSize() const =0;
|
||||
|
||||
//! populate this InputType with data from in stream
|
||||
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder, FactorCollection &factorCollection) =0;
|
||||
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder) =0;
|
||||
|
||||
//! Output debugging info to stream out
|
||||
virtual void Print(std::ostream&) const =0;
|
||||
|
@ -36,7 +36,7 @@ using namespace std;
|
||||
LanguageModel::LanguageModel(bool registerScore)
|
||||
{
|
||||
if (registerScore)
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
|
||||
}
|
||||
LanguageModel::~LanguageModel() {}
|
||||
|
||||
|
@ -41,7 +41,7 @@ namespace LanguageModelFactory
|
||||
{
|
||||
|
||||
LanguageModel* CreateLanguageModel(LMImplementation lmImplementation, const std::vector<FactorType> &factorTypes
|
||||
, size_t nGramOrder, const std::string &languageModelFile, float weight, FactorCollection &factorCollection)
|
||||
, size_t nGramOrder, const std::string &languageModelFile, float weight)
|
||||
{
|
||||
LanguageModel *lm = NULL;
|
||||
switch (lmImplementation)
|
||||
@ -103,14 +103,14 @@ namespace LanguageModelFactory
|
||||
switch (lm->GetLMType())
|
||||
{
|
||||
case SingleFactor:
|
||||
if (! static_cast<LanguageModelSingleFactor*>(lm)->Load(languageModelFile, factorCollection, factorTypes[0], weight, nGramOrder))
|
||||
if (! static_cast<LanguageModelSingleFactor*>(lm)->Load(languageModelFile, factorTypes[0], weight, nGramOrder))
|
||||
{
|
||||
delete lm;
|
||||
lm = NULL;
|
||||
}
|
||||
break;
|
||||
case MultiFactor:
|
||||
if (! static_cast<LanguageModelMultiFactor*>(lm)->Load(languageModelFile, factorCollection, factorTypes, weight, nGramOrder))
|
||||
if (! static_cast<LanguageModelMultiFactor*>(lm)->Load(languageModelFile, factorTypes, weight, nGramOrder))
|
||||
{
|
||||
delete lm;
|
||||
lm = NULL;
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "TypeDef.h"
|
||||
|
||||
class LanguageModel;
|
||||
class FactorCollection;
|
||||
|
||||
namespace LanguageModelFactory {
|
||||
|
||||
@ -17,7 +16,7 @@ namespace LanguageModelFactory {
|
||||
* language model toolkit as its underlying implementation
|
||||
*/
|
||||
LanguageModel* CreateLanguageModel(LMImplementation lmImplementation, const std::vector<FactorType> &factorTypes
|
||||
, size_t nGramOrder, const std::string &languageModelFile, float weight, FactorCollection &factorCollection);
|
||||
, size_t nGramOrder, const std::string &languageModelFile, float weight);
|
||||
|
||||
};
|
||||
|
||||
|
@ -12,14 +12,15 @@ LanguageModelInternal::LanguageModelInternal(bool registerScore)
|
||||
}
|
||||
|
||||
bool LanguageModelInternal::Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, FactorType factorType
|
||||
, float weight
|
||||
, size_t nGramOrder)
|
||||
, FactorType factorType
|
||||
, float weight
|
||||
, size_t nGramOrder)
|
||||
{
|
||||
assert(nGramOrder <= 3);
|
||||
TRACE_ERR( "Loading Internal LM: " << filePath << endl);
|
||||
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
m_filePath = filePath;
|
||||
m_factorType = factorType;
|
||||
m_weight = weight;
|
||||
|
@ -25,7 +25,6 @@ protected:
|
||||
public:
|
||||
LanguageModelInternal(bool registerScore);
|
||||
bool Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, FactorType factorType
|
||||
, float weight
|
||||
, size_t nGramOrder);
|
||||
|
@ -40,7 +40,6 @@ class LanguageModelJoint : public LanguageModelMultiFactor
|
||||
{
|
||||
protected:
|
||||
LanguageModelSingleFactor *m_lmImpl;
|
||||
FactorCollection *m_factorCollection;
|
||||
std::vector<FactorType> m_factorTypesOrdered;
|
||||
|
||||
size_t m_implFactor;
|
||||
@ -57,7 +56,6 @@ public:
|
||||
}
|
||||
|
||||
bool Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, const std::vector<FactorType> &factorTypes
|
||||
, float weight
|
||||
, size_t nGramOrder)
|
||||
@ -68,9 +66,10 @@ public:
|
||||
m_nGramOrder = nGramOrder;
|
||||
|
||||
m_factorTypesOrdered= factorTypes;
|
||||
m_factorCollection = &factorCollection;
|
||||
m_implFactor = 0;
|
||||
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
// sentence markers
|
||||
for (size_t index = 0 ; index < factorTypes.size() ; ++index)
|
||||
{
|
||||
@ -79,7 +78,7 @@ public:
|
||||
m_sentenceEndArray[factorType] = factorCollection.AddFactor(Output, factorType, EOS_);
|
||||
}
|
||||
|
||||
return m_lmImpl->Load(filePath, factorCollection, m_implFactor, weight, nGramOrder);
|
||||
return m_lmImpl->Load(filePath, m_implFactor, weight, nGramOrder);
|
||||
}
|
||||
|
||||
float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
|
||||
@ -114,7 +113,7 @@ public:
|
||||
stream << "|" << factor->GetString();
|
||||
}
|
||||
|
||||
factor = m_factorCollection->AddFactor(Output, m_implFactor, stream.str());
|
||||
factor = FactorCollection::Instance().AddFactor(Output, m_implFactor, stream.str());
|
||||
|
||||
Word* jointWord = new Word;
|
||||
jointWord->SetFactor(m_implFactor, factor);
|
||||
|
@ -39,7 +39,6 @@ protected:
|
||||
|
||||
public:
|
||||
virtual bool Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, const std::vector<FactorType> &factorTypes
|
||||
, float weight
|
||||
, size_t nGramOrder) = 0;
|
||||
|
@ -41,7 +41,6 @@ public:
|
||||
|
||||
virtual ~LanguageModelSingleFactor();
|
||||
virtual bool Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, FactorType factorType
|
||||
, float weight
|
||||
, size_t nGramOrder) = 0;
|
||||
|
@ -51,7 +51,6 @@ public:
|
||||
delete m_lmImpl;
|
||||
}
|
||||
bool Load(const std::string &filePath
|
||||
, FactorCollection &factorCollection
|
||||
, FactorType factorType
|
||||
, float weight
|
||||
, size_t nGramOrder)
|
||||
@ -63,10 +62,12 @@ public:
|
||||
|
||||
m_realNGramOrder = 3;
|
||||
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
m_sentenceStartArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, BOS_);
|
||||
m_sentenceEndArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, EOS_);
|
||||
|
||||
return m_lmImpl->Load(filePath, factorCollection, m_factorType, weight, nGramOrder);
|
||||
return m_lmImpl->Load(filePath, m_factorType, weight, nGramOrder);
|
||||
}
|
||||
|
||||
float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
|
||||
|
@ -30,7 +30,7 @@ LexicalReordering::LexicalReordering(const std::string &filePath,
|
||||
m_orientation(orientation), m_condition(condition), m_numScores(weights.size()), m_filePath(filePath), m_sourceFactors(input), m_targetFactors(output)
|
||||
{
|
||||
//add score producer
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
|
||||
//manage the weights by SetWeightsForScoreProducer method of static data.
|
||||
if(direction == LexReorderType::Bidirectional)
|
||||
{
|
||||
@ -48,7 +48,7 @@ LexicalReordering::LexicalReordering(const std::string &filePath,
|
||||
else if ( orientation == DistortionOrientationType::Msd) {
|
||||
m_numOrientationTypes = 3;
|
||||
}
|
||||
const_cast<StaticData*>(StaticData::Instance())->SetWeightsForScoreProducer(this, weights);
|
||||
const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
|
||||
// Load the file
|
||||
LoadFile();
|
||||
// PrintTable();
|
||||
|
@ -39,20 +39,21 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
using namespace std;
|
||||
|
||||
Manager::Manager(InputType const& source, StaticData &staticData)
|
||||
Manager::Manager(InputType const& source)
|
||||
:m_source(source)
|
||||
,m_hypoStack(source.GetSize() + 1)
|
||||
,m_staticData(staticData)
|
||||
,m_possibleTranslations(source.CreateTranslationOptionCollection())
|
||||
,m_initialTargetPhrase(Output)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
TRACE_ERR("Translating: " << m_source << endl);
|
||||
std::vector < HypothesisCollection >::iterator iterStack;
|
||||
for (iterStack = m_hypoStack.begin() ; iterStack != m_hypoStack.end() ; ++iterStack)
|
||||
{
|
||||
HypothesisCollection &sourceHypoColl = *iterStack;
|
||||
sourceHypoColl.SetMaxHypoStackSize(m_staticData.GetMaxHypoStackSize());
|
||||
sourceHypoColl.SetBeamThreshold(m_staticData.GetBeamThreshold());
|
||||
sourceHypoColl.SetMaxHypoStackSize(staticData.GetMaxHypoStackSize());
|
||||
sourceHypoColl.SetBeamThreshold(staticData.GetBeamThreshold());
|
||||
}
|
||||
}
|
||||
|
||||
@ -68,16 +69,17 @@ Manager::~Manager()
|
||||
*/
|
||||
void Manager::ProcessSentence()
|
||||
{
|
||||
m_staticData.ResetSentenceStats(m_source);
|
||||
vector < list < DecodeStep* > * >&decodeStepVL = m_staticData.GetDecodeStepVL();
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
staticData.ResetSentenceStats(m_source);
|
||||
const vector < list < DecodeStep* > * >
|
||||
&decodeStepVL = staticData.GetDecodeStepVL();
|
||||
|
||||
// create list of all possible translations
|
||||
// this is only valid if:
|
||||
// 1. generation of source sentence is not done 1st
|
||||
// 2. initial hypothesis factors are given in the sentence
|
||||
//CreateTranslationOptions(m_source, phraseDictionary, lmListInitial);
|
||||
m_possibleTranslations->CreateTranslationOptions(decodeStepVL
|
||||
, m_staticData.GetFactorCollection());
|
||||
m_possibleTranslations->CreateTranslationOptions(decodeStepVL);
|
||||
|
||||
// initial seed hypothesis: nothing translated, no words produced
|
||||
{
|
||||
@ -93,7 +95,7 @@ void Manager::ProcessSentence()
|
||||
|
||||
// the stack is pruned before processing (lazy pruning):
|
||||
VERBOSE(3,"processing hypothesis from next stack");
|
||||
sourceHypoColl.PruneToSize(m_staticData.GetMaxHypoStackSize());
|
||||
sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
|
||||
VERBOSE(3,std::endl);
|
||||
sourceHypoColl.CleanupArcList();
|
||||
|
||||
@ -109,7 +111,7 @@ void Manager::ProcessSentence()
|
||||
}
|
||||
|
||||
// some more logging
|
||||
VERBOSE(2,m_staticData.GetSentenceStats());
|
||||
VERBOSE(2, staticData.GetSentenceStats());
|
||||
}
|
||||
|
||||
/** Find all translation options to expand one hypothesis, trigger expansion
|
||||
@ -120,7 +122,7 @@ void Manager::ProcessSentence()
|
||||
void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
{
|
||||
// since we check for reordering limits, its good to have that limit handy
|
||||
int maxDistortion = m_staticData.GetMaxDistortion();
|
||||
int maxDistortion = StaticData::Instance().GetMaxDistortion();
|
||||
|
||||
// no limit of reordering: only check for overlap
|
||||
if (maxDistortion < 0)
|
||||
@ -218,11 +220,14 @@ void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOp
|
||||
{
|
||||
// create hypothesis and calculate all its scores
|
||||
Hypothesis *newHypo = hypothesis.CreateNext(transOpt);
|
||||
newHypo->CalcScore(m_staticData, m_possibleTranslations->GetFutureScore());
|
||||
newHypo->CalcScore(m_possibleTranslations->GetFutureScore());
|
||||
|
||||
// logging for the curious
|
||||
IFVERBOSE(3) {
|
||||
newHypo->PrintHypothesis(m_source, m_staticData.GetWeightDistortion(), m_staticData.GetWeightWordPenalty());
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
newHypo->PrintHypothesis(m_source
|
||||
, staticData.GetWeightDistortion()
|
||||
, staticData.GetWeightWordPenalty());
|
||||
}
|
||||
|
||||
// add to hypothesis stack
|
||||
@ -347,7 +352,7 @@ void Manager::CalcNBest(size_t count, LatticePathList &ret,bool onlyDistinct) co
|
||||
|
||||
if(onlyDistinct)
|
||||
{
|
||||
size_t nBestFactor = StaticData::Instance()->GetNBestFactor();
|
||||
size_t nBestFactor = StaticData::Instance().GetNBestFactor();
|
||||
if (nBestFactor > 0)
|
||||
contenders.Prune(count * nBestFactor);
|
||||
}
|
||||
|
@ -76,7 +76,6 @@ protected:
|
||||
|
||||
std::vector < HypothesisCollection > m_hypoStack; /**< stacks to store hypothesis (partial translations) */
|
||||
// no of elements = no of words in source + 1
|
||||
StaticData &m_staticData; /**< holds various kinds of constants, counters, and global data structures */
|
||||
TranslationOptionCollection *m_possibleTranslations; /**< pre-computed list of translation options for the phrases in this sentence */
|
||||
TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */
|
||||
|
||||
@ -89,7 +88,7 @@ protected:
|
||||
void OutputHypoStack(int stack = -1);
|
||||
void OutputHypoStackSize();
|
||||
public:
|
||||
Manager(InputType const& source, StaticData &staticData);
|
||||
Manager(InputType const& source);
|
||||
~Manager();
|
||||
|
||||
void ProcessSentence();
|
||||
|
@ -57,7 +57,7 @@ public:
|
||||
CleanUp();
|
||||
delete m_dict;
|
||||
|
||||
if (StaticData::Instance()->GetVerboseLevel() >= 2)
|
||||
if (StaticData::Instance().GetVerboseLevel() >= 2)
|
||||
{
|
||||
|
||||
TRACE_ERR("tgt candidates stats: total="<<totalE<<"; distinct="
|
||||
@ -326,7 +326,7 @@ public:
|
||||
for(size_t len=1;len<=srcSize;++len) path1Best[len]+=srcSize-len+1;
|
||||
|
||||
|
||||
if (StaticData::Instance()->GetVerboseLevel() >= 2 && exPathsD.size())
|
||||
if (StaticData::Instance().GetVerboseLevel() >= 2 && exPathsD.size())
|
||||
{
|
||||
TRACE_ERR("path stats for current CN: \n");
|
||||
std::cerr.setf(std::ios::scientific);
|
||||
@ -437,7 +437,7 @@ public:
|
||||
} // end while(!stack.empty())
|
||||
|
||||
|
||||
if (StaticData::Instance()->GetVerboseLevel() >= 2 && exploredPaths.size())
|
||||
if (StaticData::Instance().GetVerboseLevel() >= 2 && exploredPaths.size())
|
||||
{
|
||||
TRACE_ERR("CN (explored): ");
|
||||
std::copy(exploredPaths.begin()+1,exploredPaths.end(),
|
||||
|
@ -27,7 +27,7 @@ PartialTranslOptColl::PartialTranslOptColl()
|
||||
{
|
||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||
m_worstScore = -std::numeric_limits<float>::infinity();
|
||||
m_maxSize = StaticData::Instance()->GetMaxNoPartTransOpt();
|
||||
m_maxSize = StaticData::Instance().GetMaxNoPartTransOpt();
|
||||
m_totalPruned = 0;
|
||||
}
|
||||
|
||||
|
@ -82,7 +82,7 @@ void Phrase::MergeFactors(const Phrase ©)
|
||||
{
|
||||
assert(GetSize() == copy.GetSize());
|
||||
size_t size = GetSize();
|
||||
const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
|
||||
const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());
|
||||
for (size_t currPos = 0 ; currPos < size ; currPos++)
|
||||
{
|
||||
for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
|
||||
@ -180,7 +180,7 @@ vector< vector<string> > Phrase::Parse(const std::string &phraseString, const st
|
||||
// to
|
||||
// "KOMMA" "none"
|
||||
if (factorStrVector.size() != factorOrder.size()) {
|
||||
TRACE_ERR( "[ERROR] Malformed input at " << /*StaticData::Instance()->GetCurrentInputPosition() <<*/ std::endl
|
||||
TRACE_ERR( "[ERROR] Malformed input at " << /*StaticData::Instance().GetCurrentInputPosition() <<*/ std::endl
|
||||
<< " Expected input to have words composed of " << factorOrder.size() << " factor(s) (form FAC1|FAC2|...)" << std::endl
|
||||
<< " but instead received input with " << factorStrVector.size() << " factor(s).\n");
|
||||
abort();
|
||||
@ -191,9 +191,10 @@ vector< vector<string> > Phrase::Parse(const std::string &phraseString, const st
|
||||
}
|
||||
|
||||
void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
, const vector< vector<string> > &phraseVector
|
||||
, FactorCollection &factorCollection)
|
||||
, const vector< vector<string> > &phraseVector)
|
||||
{
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++)
|
||||
{
|
||||
// add word this phrase
|
||||
@ -210,11 +211,10 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
|
||||
void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
, const string &phraseString
|
||||
, FactorCollection &factorCollection
|
||||
, const string &factorDelimiter)
|
||||
, const string &factorDelimiter)
|
||||
{
|
||||
vector< vector<string> > phraseVector = Parse(phraseString, factorOrder, factorDelimiter);
|
||||
CreateFromString(factorOrder, phraseVector, factorCollection);
|
||||
CreateFromString(factorOrder, phraseVector);
|
||||
}
|
||||
|
||||
bool Phrase::operator < (const Phrase &compare) const
|
||||
@ -234,7 +234,7 @@ bool Phrase::operator < (const Phrase &compare) const
|
||||
{
|
||||
size_t minSize = std::min( thisSize , compareSize );
|
||||
|
||||
const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
|
||||
const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());
|
||||
// taken from word.Compare()
|
||||
for (size_t i = 0 ; i < maxNumFactors ; i++)
|
||||
{
|
||||
@ -311,7 +311,7 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase) const
|
||||
|
||||
const size_t size = GetSize();
|
||||
|
||||
const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
|
||||
const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());
|
||||
for (size_t currPos = 0 ; currPos < size ; currPos++)
|
||||
{
|
||||
for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
|
||||
|
@ -80,8 +80,7 @@ public:
|
||||
* \param phraseVector 2D string vector
|
||||
*/
|
||||
void CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
, const std::vector< std::vector<std::string> > &phraseVector
|
||||
, FactorCollection &factorCollection);
|
||||
, const std::vector< std::vector<std::string> > &phraseVector);
|
||||
/** Fills phrase with words from format string, typically from phrase table or sentence input
|
||||
* \param factorOrder factor types of each element in 2D string vector
|
||||
* \param phraseString formatted input string to parse
|
||||
@ -89,7 +88,6 @@ public:
|
||||
*/
|
||||
void CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
, const std::string &phraseString
|
||||
, FactorCollection &factorCollection
|
||||
, const std::string &factorDelimiter);
|
||||
|
||||
/** copy factors from the other phrase to this phrase.
|
||||
|
@ -27,7 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
PhraseDictionary::PhraseDictionary(size_t numScoreComponent)
|
||||
: Dictionary(numScoreComponent),m_tableLimit(0)
|
||||
{
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
|
||||
}
|
||||
|
||||
PhraseDictionary::~PhraseDictionary() {}
|
||||
|
@ -38,7 +38,6 @@ using namespace std;
|
||||
|
||||
bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, FactorCollection &factorCollection
|
||||
, const string &filePath
|
||||
, const vector<float> &weight
|
||||
, size_t tableLimit
|
||||
@ -83,7 +82,7 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
||||
continue;
|
||||
}
|
||||
|
||||
const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
|
||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||
if (tokens[0] != prevSourcePhrase)
|
||||
phraseVector = Phrase::Parse(tokens[0], input, factorDelimiter);
|
||||
|
||||
@ -99,10 +98,10 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
||||
|
||||
// source
|
||||
Phrase sourcePhrase(Input);
|
||||
sourcePhrase.CreateFromString( input, phraseVector, factorCollection);
|
||||
sourcePhrase.CreateFromString( input, phraseVector);
|
||||
//target
|
||||
TargetPhrase targetPhrase(Output);
|
||||
targetPhrase.CreateFromString( output, tokens[1], factorCollection, factorDelimiter);
|
||||
targetPhrase.CreateFromString( output, tokens[1], factorDelimiter);
|
||||
|
||||
// component score, for n-best output
|
||||
std::vector<float> scv(scoreVector.size());
|
||||
|
@ -47,7 +47,6 @@ public:
|
||||
|
||||
bool Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, FactorCollection &factorCollection
|
||||
, const std::string &filePath
|
||||
, const std::vector<float> &weight
|
||||
, size_t tableLimit
|
||||
|
@ -47,7 +47,6 @@ void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
|
||||
|
||||
bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, FactorCollection &factorCollection
|
||||
, const std::string &filePath
|
||||
, const std::vector<float> &weight
|
||||
, size_t tableLimit
|
||||
@ -55,6 +54,8 @@ bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
|
||||
, float weightWP
|
||||
)
|
||||
{
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
if(m_numScoreComponent!=weight.size()) {
|
||||
stringstream strme;
|
||||
strme << "ERROR: mismatch of number of scaling factors: "<<weight.size()
|
||||
|
@ -39,7 +39,6 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary {
|
||||
// initialize ...
|
||||
bool Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, FactorCollection &factorCollection
|
||||
, const std::string &filePath
|
||||
, const std::vector<float> &weight
|
||||
, size_t tableLimit
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include "StaticData.h"
|
||||
|
||||
ScoreComponentCollection::ScoreComponentCollection()
|
||||
: m_scores(StaticData::Instance()->GetTotalScoreComponents(), 0.0f)
|
||||
, m_sim(&StaticData::Instance()->GetScoreIndexManager())
|
||||
: m_scores(StaticData::Instance().GetTotalScoreComponents(), 0.0f)
|
||||
, m_sim(&StaticData::Instance().GetScoreIndexManager())
|
||||
{}
|
||||
|
||||
|
@ -26,23 +26,22 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "StaticData.h"
|
||||
#include "Util.h"
|
||||
|
||||
int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder,
|
||||
FactorCollection &factorCollection)
|
||||
int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
|
||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||
std::string line;
|
||||
if (getline(in, line, '\n').eof())
|
||||
return 0;
|
||||
line = Trim(line);
|
||||
|
||||
Phrase::CreateFromString(factorOrder, line, factorCollection, factorDelimiter);
|
||||
Phrase::CreateFromString(factorOrder, line, factorDelimiter);
|
||||
return 1;
|
||||
}
|
||||
|
||||
TranslationOptionCollection*
|
||||
Sentence::CreateTranslationOptionCollection() const
|
||||
{
|
||||
size_t maxNoTransOptPerCoverage = StaticData::Instance()->GetMaxNoTransOptPerCoverage();
|
||||
size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
|
||||
TranslationOptionCollection *rv= new TranslationOptionCollectionText(*this, maxNoTransOptPerCoverage);
|
||||
assert(rv);
|
||||
return rv;
|
||||
|
@ -61,7 +61,7 @@ class Sentence : public Phrase, public InputType
|
||||
return Phrase::GetSize();
|
||||
}
|
||||
|
||||
int Read(std::istream& in,const std::vector<FactorType>& factorOrder, FactorCollection &factorCollection);
|
||||
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
|
||||
void Print(std::ostream& out) const;
|
||||
|
||||
TranslationOptionCollection* CreateTranslationOptionCollection() const;
|
||||
|
@ -58,7 +58,7 @@ static size_t CalcMax(size_t x, const vector<size_t>& y, const vector<size_t>& z
|
||||
return max;
|
||||
}
|
||||
|
||||
StaticData* StaticData::s_instance(0);
|
||||
StaticData StaticData::s_instance;
|
||||
|
||||
StaticData::StaticData()
|
||||
:m_fLMsLoaded(false)
|
||||
@ -75,8 +75,6 @@ StaticData::StaticData()
|
||||
m_maxFactorIdx[0] = 0; // source side
|
||||
m_maxFactorIdx[1] = 0; // target side
|
||||
|
||||
s_instance = this;
|
||||
|
||||
// memory pools
|
||||
Phrase::InitializeMemPool();
|
||||
}
|
||||
@ -469,7 +467,7 @@ bool StaticData::LoadLanguageModels()
|
||||
PrintUserTime(string("Start loading LanguageModel ") + languageModelFile);
|
||||
|
||||
LanguageModel *lm = LanguageModelFactory::CreateLanguageModel(lmImplementation, factorTypes
|
||||
, nGramOrder, languageModelFile, weightAll[i], m_factorCollection);
|
||||
, nGramOrder, languageModelFile, weightAll[i]);
|
||||
if (lm == NULL)
|
||||
{
|
||||
UserMessage::Add("no LM created. We probably don't have it compiled");
|
||||
@ -523,7 +521,6 @@ bool StaticData::LoadGenerationTables()
|
||||
assert(m_generationDictionary.back() && "could not create GenerationDictionary");
|
||||
if (!m_generationDictionary.back()->Load(input
|
||||
, output
|
||||
, m_factorCollection
|
||||
, filePath
|
||||
, Output))
|
||||
{
|
||||
@ -623,7 +620,6 @@ bool StaticData::LoadPhraseTables()
|
||||
PhraseDictionaryMemory *pd=new PhraseDictionaryMemory(numScoreComponent);
|
||||
if (!pd->Load(input
|
||||
, output
|
||||
, m_factorCollection
|
||||
, filePath
|
||||
, weight
|
||||
, maxTargetPhrase[index]
|
||||
@ -640,7 +636,7 @@ bool StaticData::LoadPhraseTables()
|
||||
{
|
||||
TRACE_ERR( "using binary phrase tables for idx "<<currDict<<"\n");
|
||||
PhraseDictionaryTreeAdaptor *pd=new PhraseDictionaryTreeAdaptor(numScoreComponent,(currDict==0 ? m_numInputScores : 0));
|
||||
if (!pd->Load(input,output,m_factorCollection,filePath,weight,
|
||||
if (!pd->Load(input,output,filePath,weight,
|
||||
maxTargetPhrase[index],
|
||||
GetAllLM(),
|
||||
GetWeightWordPenalty()))
|
||||
@ -739,7 +735,7 @@ bool StaticData::LoadMapping()
|
||||
return true;
|
||||
}
|
||||
|
||||
void StaticData::CleanUpAfterSentenceProcessing()
|
||||
void StaticData::CleanUpAfterSentenceProcessing() const
|
||||
{
|
||||
for(size_t i=0;i<m_phraseDictionary.size();++i)
|
||||
m_phraseDictionary[i]->CleanUp();
|
||||
@ -758,7 +754,7 @@ void StaticData::CleanUpAfterSentenceProcessing()
|
||||
/** initialize the translation and language models for this sentence
|
||||
(includes loading of translation table entries on demand, if
|
||||
binary format is used) */
|
||||
void StaticData::InitializeBeforeSentenceProcessing(InputType const& in)
|
||||
void StaticData::InitializeBeforeSentenceProcessing(InputType const& in) const
|
||||
{
|
||||
for(size_t i=0;i<m_phraseDictionary.size();++i)
|
||||
{
|
||||
|
@ -46,9 +46,8 @@ class DecodeStep;
|
||||
class StaticData
|
||||
{
|
||||
private:
|
||||
static StaticData* s_instance;
|
||||
static StaticData s_instance;
|
||||
protected:
|
||||
FactorCollection m_factorCollection;
|
||||
std::vector<PhraseDictionary*> m_phraseDictionary;
|
||||
std::vector<GenerationDictionary*> m_generationDictionary;
|
||||
std::vector < std::list < DecodeStep*> * > m_decodeStepVL;
|
||||
@ -106,6 +105,9 @@ protected:
|
||||
size_t m_maxFactorIdx[2]; //! number of factors on source and target side
|
||||
size_t m_maxNumFactors; //! max number of factors on both source and target sides
|
||||
|
||||
//! constructor. only the 1 static variable can be created
|
||||
StaticData();
|
||||
|
||||
//! helper fn to set bool param from ini file/command line
|
||||
void SetBooleanParameter(bool *paramter, string parameterName, bool defaultValue);
|
||||
|
||||
@ -125,17 +127,21 @@ protected:
|
||||
bool LoadLexicalReorderingModel();
|
||||
|
||||
public:
|
||||
StaticData();
|
||||
~StaticData();
|
||||
|
||||
static const StaticData* Instance() { return s_instance; }
|
||||
static const StaticData& Instance() { return s_instance; }
|
||||
|
||||
static bool LoadDataStatic(Parameter *parameter)
|
||||
{
|
||||
return s_instance.LoadData(parameter);
|
||||
}
|
||||
|
||||
/** Main function to load everything.
|
||||
* Also initialize the Parameter object
|
||||
*/
|
||||
bool LoadData(Parameter *parameter);
|
||||
|
||||
const PARAM_VEC &GetParam(const std::string ¶mName)
|
||||
const PARAM_VEC &GetParam(const std::string ¶mName) const
|
||||
{
|
||||
return m_parameter->GetParam(paramName);
|
||||
}
|
||||
@ -153,7 +159,7 @@ public:
|
||||
return m_outputFactorOrder;
|
||||
}
|
||||
|
||||
std::vector < std::list < DecodeStep* > * > &GetDecodeStepVL()
|
||||
const std::vector < std::list < DecodeStep* > * > &GetDecodeStepVL() const
|
||||
{
|
||||
return m_decodeStepVL;
|
||||
}
|
||||
@ -174,10 +180,6 @@ public:
|
||||
{
|
||||
return m_maxNoPartTransOpt;
|
||||
}
|
||||
FactorCollection &GetFactorCollection()
|
||||
{
|
||||
return m_factorCollection;
|
||||
}
|
||||
std::vector<LexicalReordering*> GetReorderModels() const
|
||||
{
|
||||
return m_reorderModels;
|
||||
@ -290,8 +292,8 @@ public:
|
||||
void SetWeightsForScoreProducer(const ScoreProducer* sp, const std::vector<float>& weights);
|
||||
int GetInputType() const {return m_inputType;}
|
||||
size_t GetNumInputScores() const {return m_numInputScores;}
|
||||
void InitializeBeforeSentenceProcessing(InputType const&);
|
||||
void CleanUpAfterSentenceProcessing();
|
||||
void InitializeBeforeSentenceProcessing(InputType const&) const;
|
||||
void CleanUpAfterSentenceProcessing() const;
|
||||
SentenceStats& GetSentenceStats() const
|
||||
{
|
||||
return *m_sentenceStats;
|
||||
|
@ -38,7 +38,7 @@ TargetPhrase::TargetPhrase(FactorDirection direction)
|
||||
void TargetPhrase::SetScore()
|
||||
{ // used when creating translations of unknown words:
|
||||
m_transScore = m_ngramScore = 0;
|
||||
m_fullScore = - StaticData::Instance()->GetWeightWordPenalty();
|
||||
m_fullScore = - StaticData::Instance().GetWeightWordPenalty();
|
||||
}
|
||||
|
||||
void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
|
||||
@ -84,7 +84,7 @@ void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
|
||||
void TargetPhrase::SetWeights(const ScoreProducer* translationScoreProducer, const vector<float> &weightT)
|
||||
{
|
||||
// calling this function in case of confusion net input is undefined
|
||||
assert(StaticData::Instance()->GetInputType()==0);
|
||||
assert(StaticData::Instance().GetInputType()==0);
|
||||
|
||||
/* one way to fix this, you have to make sure the weightT contains (in
|
||||
addition to the usual phrase translation scaling factors) the input
|
||||
|
@ -83,14 +83,14 @@ void TranslationOption::CalcScore()
|
||||
float m_ngramScore = 0;
|
||||
float retFullScore = 0;
|
||||
|
||||
const LMList &allLM = StaticData::Instance()->GetAllLM();
|
||||
const LMList &allLM = StaticData::Instance().GetAllLM();
|
||||
|
||||
allLM.CalcScore(GetTargetPhrase(), retFullScore, m_ngramScore, &m_scoreBreakdown);
|
||||
// future score
|
||||
m_futureScore = retFullScore - m_ngramScore;
|
||||
|
||||
size_t phraseSize = GetTargetPhrase().GetSize();
|
||||
m_futureScore += m_scoreBreakdown.InnerProduct(StaticData::Instance()->GetAllWeights()) - phraseSize * StaticData::Instance()->GetWeightWordPenalty();
|
||||
m_futureScore += m_scoreBreakdown.InnerProduct(StaticData::Instance().GetAllWeights()) - phraseSize * StaticData::Instance().GetWeightWordPenalty();
|
||||
}
|
||||
|
||||
TO_STRING_BODY(TranslationOption);
|
||||
|
@ -125,8 +125,7 @@ void TranslationOptionCollection::Prune()
|
||||
* \param factorCollection input sentence with all factors
|
||||
*/
|
||||
|
||||
void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::list < DecodeStep* > * > &decodeStepVL
|
||||
, FactorCollection &factorCollection)
|
||||
void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::list < DecodeStep* > * > &decodeStepVL)
|
||||
{
|
||||
size_t size = m_source.GetSize();
|
||||
// try to translation for coverage with no trans by expanding table limit
|
||||
@ -139,7 +138,7 @@ void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::li
|
||||
size_t numTransOpt = fullList.size();
|
||||
if (numTransOpt == 0)
|
||||
{
|
||||
CreateTranslationOptionsForRange(*decodeStepList, factorCollection
|
||||
CreateTranslationOptionsForRange(*decodeStepList
|
||||
, pos, pos, false);
|
||||
}
|
||||
}
|
||||
@ -165,7 +164,7 @@ void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::li
|
||||
for (size_t currPos = 0 ; currPos < size ; ++currPos)
|
||||
{
|
||||
if (process[currPos])
|
||||
ProcessUnknownWord(currPos, *m_factorCollection);
|
||||
ProcessUnknownWord(currPos);
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,54 +181,54 @@ void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::li
|
||||
* \param factorCollection input sentence with all factors
|
||||
*/
|
||||
void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,
|
||||
size_t sourcePos
|
||||
, FactorCollection &factorCollection)
|
||||
size_t sourcePos)
|
||||
{
|
||||
// unknown word, add as trans opt
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
size_t isDigit = 0;
|
||||
if (StaticData::Instance()->GetDropUnknown())
|
||||
{
|
||||
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
|
||||
const string &s = f->GetString();
|
||||
isDigit = s.find_first_of("0123456789");
|
||||
if (isDigit == string::npos)
|
||||
isDigit = 0;
|
||||
else
|
||||
isDigit = 1;
|
||||
// modify the starting bitmap
|
||||
}
|
||||
|
||||
TranslationOption *transOpt;
|
||||
if (! StaticData::Instance()->GetDropUnknown() || isDigit)
|
||||
{
|
||||
// add to dictionary
|
||||
TargetPhrase targetPhrase(Output);
|
||||
Word &targetWord = targetPhrase.AddWord();
|
||||
|
||||
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
|
||||
{
|
||||
FactorType factorType = static_cast<FactorType>(currFactor);
|
||||
|
||||
const Factor *sourceFactor = sourceWord[currFactor];
|
||||
if (sourceFactor == NULL)
|
||||
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
|
||||
else
|
||||
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
|
||||
}
|
||||
|
||||
targetPhrase.SetScore();
|
||||
|
||||
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
|
||||
}
|
||||
size_t isDigit = 0;
|
||||
if (StaticData::Instance().GetDropUnknown())
|
||||
{
|
||||
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
|
||||
const string &s = f->GetString();
|
||||
isDigit = s.find_first_of("0123456789");
|
||||
if (isDigit == string::npos)
|
||||
isDigit = 0;
|
||||
else
|
||||
{ // drop source word. create blank trans opt
|
||||
const TargetPhrase targetPhrase(Output);
|
||||
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
|
||||
isDigit = 1;
|
||||
// modify the starting bitmap
|
||||
}
|
||||
|
||||
TranslationOption *transOpt;
|
||||
if (! StaticData::Instance().GetDropUnknown() || isDigit)
|
||||
{
|
||||
// add to dictionary
|
||||
TargetPhrase targetPhrase(Output);
|
||||
Word &targetWord = targetPhrase.AddWord();
|
||||
|
||||
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
|
||||
{
|
||||
FactorType factorType = static_cast<FactorType>(currFactor);
|
||||
|
||||
const Factor *sourceFactor = sourceWord[currFactor];
|
||||
if (sourceFactor == NULL)
|
||||
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
|
||||
else
|
||||
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
|
||||
}
|
||||
|
||||
transOpt->CalcScore();
|
||||
Add(transOpt);
|
||||
targetPhrase.SetScore();
|
||||
|
||||
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
|
||||
}
|
||||
else
|
||||
{ // drop source word. create blank trans opt
|
||||
const TargetPhrase targetPhrase(Output);
|
||||
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
|
||||
}
|
||||
|
||||
transOpt->CalcScore();
|
||||
Add(transOpt);
|
||||
}
|
||||
|
||||
/** compute future score matrix in a dynamic programming fashion.
|
||||
@ -319,11 +318,8 @@ void TranslationOptionCollection::CalcFutureScore()
|
||||
* \param decodeStepList list of decoding steps
|
||||
* \param factorCollection input sentence with all factors
|
||||
*/
|
||||
void TranslationOptionCollection::CreateTranslationOptions(const vector <list < DecodeStep* > * > &decodeStepVL
|
||||
, FactorCollection &factorCollection)
|
||||
{
|
||||
m_factorCollection = &factorCollection;
|
||||
|
||||
void TranslationOptionCollection::CreateTranslationOptions(const vector <list < DecodeStep* > * > &decodeStepVL)
|
||||
{
|
||||
// loop over all substrings of the source sentence, look them up
|
||||
// in the phraseDictionary (which is the- possibly filtered-- phrase
|
||||
// table loaded on initialization), generate TranslationOption objects
|
||||
@ -335,14 +331,14 @@ void TranslationOptionCollection::CreateTranslationOptions(const vector <list <
|
||||
{
|
||||
for (size_t endPos = startPos ; endPos < m_source.GetSize() ; endPos++)
|
||||
{
|
||||
CreateTranslationOptionsForRange( *decodeStepList, factorCollection, startPos, endPos, true);
|
||||
CreateTranslationOptionsForRange( *decodeStepList, startPos, endPos, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VERBOSE(3,"Translation Option Collection\n " << *this << endl);
|
||||
|
||||
ProcessUnknownWord(decodeStepVL, factorCollection);
|
||||
ProcessUnknownWord(decodeStepVL);
|
||||
|
||||
// Prune
|
||||
Prune();
|
||||
@ -361,7 +357,6 @@ void TranslationOptionCollection::CreateTranslationOptions(const vector <list <
|
||||
*/
|
||||
void TranslationOptionCollection::CreateTranslationOptionsForRange(
|
||||
const list < DecodeStep* > &decodeStepList
|
||||
, FactorCollection &factorCollection
|
||||
, size_t startPos
|
||||
, size_t endPos
|
||||
, bool adhereTableLimit)
|
||||
@ -373,8 +368,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
|
||||
list < DecodeStep* >::const_iterator iterStep = decodeStepList.begin();
|
||||
const DecodeStep &decodeStep = **iterStep;
|
||||
|
||||
ProcessInitialTranslation(decodeStep, factorCollection
|
||||
, *oldPtoc
|
||||
ProcessInitialTranslation(decodeStep, *oldPtoc
|
||||
, startPos, endPos, adhereTableLimit );
|
||||
|
||||
// do rest of decode steps
|
||||
@ -394,7 +388,6 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
|
||||
decodeStep.Process(inputPartialTranslOpt
|
||||
, decodeStep
|
||||
, *newPtoc
|
||||
, factorCollection
|
||||
, this
|
||||
, adhereTableLimit);
|
||||
}
|
||||
@ -427,7 +420,6 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
|
||||
*/
|
||||
void TranslationOptionCollection::ProcessInitialTranslation(
|
||||
const DecodeStep &decodeStep
|
||||
, FactorCollection &factorCollection
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, size_t startPos
|
||||
, size_t endPos
|
||||
|
@ -63,23 +63,20 @@ protected:
|
||||
InputType const &m_source; /*< reference to the input */
|
||||
SquareMatrix m_futureScore; /*< matrix of future costs for contiguous parts (span) of the input */
|
||||
const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span (phrase???) */
|
||||
FactorCollection *m_factorCollection;
|
||||
|
||||
TranslationOptionCollection(InputType const& src, size_t maxNoTransOptPerCoverage);
|
||||
|
||||
void CalcFutureScore();
|
||||
|
||||
virtual void ProcessInitialTranslation(const DecodeStep &decodeStep
|
||||
, FactorCollection &factorCollection
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, size_t startPos, size_t endPos, bool adhereTableLimit );
|
||||
|
||||
//! Force a creation of a translation option where there are none for a particular source position.
|
||||
void ProcessUnknownWord(const std::vector < std::list < DecodeStep* > *> &decodeStepVL, FactorCollection &factorCollection);
|
||||
void ProcessUnknownWord(const std::vector < std::list < DecodeStep* > *> &decodeStepVL);
|
||||
//! special handling of ONE unknown words.
|
||||
virtual void ProcessOneUnknownWord(const Word &sourceWord
|
||||
, size_t sourcePos
|
||||
, FactorCollection &factorCollection);
|
||||
, size_t sourcePos);
|
||||
//! pruning: only keep the top n (m_maxNoTransOptPerCoverage) elements */
|
||||
void Prune();
|
||||
|
||||
@ -95,8 +92,7 @@ protected:
|
||||
void Add(const TranslationOption *translationOption);
|
||||
|
||||
//! implemented by inherited class, called by this class
|
||||
virtual void ProcessUnknownWord(size_t sourcePos
|
||||
, FactorCollection &factorCollection)=0;
|
||||
virtual void ProcessUnknownWord(size_t sourcePos)=0;
|
||||
|
||||
public:
|
||||
virtual ~TranslationOptionCollection();
|
||||
@ -108,11 +104,9 @@ public:
|
||||
size_t GetSize() const { return m_source.GetSize(); };
|
||||
|
||||
//! Create all possible translations from the phrase tables
|
||||
virtual void CreateTranslationOptions(const std::vector < std::list < DecodeStep* > * > &decodeStepVL
|
||||
, FactorCollection &factorCollection);
|
||||
virtual void CreateTranslationOptions(const std::vector < std::list < DecodeStep* > * > &decodeStepVL);
|
||||
//! Create translation options that exactly cover a specific input span.
|
||||
virtual void CreateTranslationOptionsForRange(const std::list < DecodeStep* > &decodeStepList
|
||||
, FactorCollection &factorCollection
|
||||
, size_t startPosition
|
||||
, size_t endPosition
|
||||
, bool adhereTableLimit);
|
||||
|
@ -19,14 +19,13 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet
|
||||
* at a particular source position
|
||||
*/
|
||||
void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(
|
||||
size_t sourcePos
|
||||
, FactorCollection &factorCollection)
|
||||
size_t sourcePos)
|
||||
{
|
||||
ConfusionNet const& source=dynamic_cast<ConfusionNet const&>(m_source);
|
||||
|
||||
ConfusionNet::Column const& coll=source.GetColumn(sourcePos);
|
||||
for(ConfusionNet::Column::const_iterator i=coll.begin();i!=coll.end();++i)
|
||||
ProcessOneUnknownWord(i->first,sourcePos,factorCollection);
|
||||
ProcessOneUnknownWord(i->first ,sourcePos);
|
||||
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,6 @@ class TranslationOptionCollectionConfusionNet : public TranslationOptionCollecti
|
||||
public:
|
||||
TranslationOptionCollectionConfusionNet(const ConfusionNet &source, size_t maxNoTransOptPerCoverage);
|
||||
|
||||
void ProcessUnknownWord( size_t sourcePos
|
||||
, FactorCollection &factorCollection);
|
||||
void ProcessUnknownWord( size_t sourcePos);
|
||||
|
||||
};
|
||||
|
@ -37,9 +37,8 @@ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const
|
||||
/* forcibly create translation option for a particular source word.
|
||||
* For text, this function is easy, just call the base class' ProcessOneUnknownWord()
|
||||
*/
|
||||
void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos
|
||||
, FactorCollection &factorCollection)
|
||||
void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos)
|
||||
{
|
||||
const Word &sourceWord = m_source.GetWord(sourcePos);
|
||||
ProcessOneUnknownWord(sourceWord,sourcePos,factorCollection);
|
||||
ProcessOneUnknownWord(sourceWord,sourcePos);
|
||||
}
|
||||
|
@ -28,8 +28,7 @@ class LMList;
|
||||
|
||||
class TranslationOptionCollectionText : public TranslationOptionCollection {
|
||||
public:
|
||||
void ProcessUnknownWord( size_t sourcePos
|
||||
, FactorCollection &factorCollection);
|
||||
void ProcessUnknownWord( size_t sourcePos);
|
||||
|
||||
TranslationOptionCollectionText(Sentence const& inputSentence, size_t maxNoTransOptPerCoverage);
|
||||
|
||||
|
@ -47,8 +47,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
/** verbose macros
|
||||
* */
|
||||
#define VERBOSE(level,str) { if (StaticData::Instance()->GetVerboseLevel() >= level) { TRACE_ERR(str); } }
|
||||
#define IFVERBOSE(level) if (StaticData::Instance()->GetVerboseLevel() >= level)
|
||||
#define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } }
|
||||
#define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)
|
||||
|
||||
//! get string representation of any object/variable, as long as it can pipe to a stream
|
||||
template<typename T>
|
||||
|
@ -64,7 +64,7 @@ std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlan
|
||||
{
|
||||
stringstream strme;
|
||||
assert(factorType.size() <= MAX_NUM_FACTORS);
|
||||
const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
|
||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||
bool firstPass = true;
|
||||
for (unsigned int i = 0 ; i < factorType.size() ; i++)
|
||||
{
|
||||
@ -86,7 +86,7 @@ ostream& operator<<(ostream& out, const Word& word)
|
||||
{
|
||||
stringstream strme;
|
||||
|
||||
const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
|
||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||
bool firstPass = true;
|
||||
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user