code cleanup - make FactorCollection and StaticData totally accessible only globally

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1218 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
hieuhoang1972 2007-02-16 18:08:37 +00:00
parent 59c4ba9f4d
commit f3cbacba3e
58 changed files with 217 additions and 259 deletions

View File

@ -26,8 +26,6 @@ std::ostream& operator<<(std::ostream& out,const std::vector<T>& x)
return out;
}
FactorCollection factorCollection;
inline bool existsFile(const char* filename) {
struct stat mystat;
return (stat(filename,&mystat)==0);

View File

@ -49,13 +49,11 @@ IOStream::IOStream(
const vector<FactorType> &inputFactorOrder
, const vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, FactorCollection &factorCollection
, size_t nBestSize
, const string &nBestFilePath)
:m_inputFactorOrder(inputFactorOrder)
,m_outputFactorOrder(outputFactorOrder)
,m_inputFactorUsed(inputFactorUsed)
,m_factorCollection(factorCollection)
,m_inputFile(NULL)
,m_inputStream(&std::cin)
,m_nBestStream(NULL)
@ -80,14 +78,12 @@ IOStream::IOStream(
IOStream::IOStream(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, FactorCollection &factorCollection
, size_t nBestSize
, const std::string &nBestFilePath
, const std::string &inputFilePath)
:m_inputFactorOrder(inputFactorOrder)
,m_outputFactorOrder(outputFactorOrder)
,m_inputFactorUsed(inputFactorUsed)
,m_factorCollection(factorCollection)
,m_inputFilePath(inputFilePath)
,m_inputFile(new InputFileStream(inputFilePath))
,m_nBestStream(NULL)
@ -123,7 +119,7 @@ IOStream::~IOStream()
InputType*IOStream::GetInput(InputType* inputType)
{
if(inputType->Read(*m_inputStream, m_inputFactorOrder, m_factorCollection))
if(inputType->Read(*m_inputStream, m_inputFactorOrder))
{
inputType->SetTranslationId(m_translationId++);
return inputType;
@ -210,7 +206,7 @@ void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bo
void IOStream::OutputNBestList(const LatticePathList &nBestList, long translationId)
{
bool labeledOutput = StaticData::Instance()->IsLabeledNBestList();
bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
LatticePathList::const_iterator iter;
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
@ -234,10 +230,10 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
// basic distortion
if (labeledOutput)
*m_nBestStream << "d: ";
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance()->GetDistortionScoreProducer()) << " ";
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetDistortionScoreProducer()) << " ";
// reordering
vector<LexicalReordering*> rms = StaticData::Instance()->GetReorderModels();
vector<LexicalReordering*> rms = StaticData::Instance().GetReorderModels();
if(rms.size() > 0)
{
vector<LexicalReordering*>::iterator iter;
@ -252,7 +248,7 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
}
// lm
const LMList& lml = StaticData::Instance()->GetAllLM();
const LMList& lml = StaticData::Instance().GetAllLM();
if (lml.size() > 0) {
if (labeledOutput)
*m_nBestStream << "lm: ";
@ -263,9 +259,9 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
}
// translation components
if (StaticData::Instance()->GetInputType()==0){
if (StaticData::Instance().GetInputType()==0){
// translation components for text input
vector<PhraseDictionary*> pds = StaticData::Instance()->GetPhraseDictionaries();
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
if (pds.size() > 0) {
if (labeledOutput)
*m_nBestStream << "tm: ";
@ -281,7 +277,7 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
// translation components for Confusion Network input
// first translation component has GetNumInputScores() scores from the input Confusion Network
// at the beginning of the vector
vector<PhraseDictionary*> pds = StaticData::Instance()->GetPhraseDictionaries();
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
if (pds.size() > 0) {
vector<PhraseDictionary*>::iterator iter;
@ -318,10 +314,10 @@ void IOStream::OutputNBestList(const LatticePathList &nBestList, long translatio
// word penalty
if (labeledOutput)
*m_nBestStream << "w: ";
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance()->GetWordPenaltyProducer()) << " ";
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetWordPenaltyProducer()) << " ";
// generation
vector<GenerationDictionary*> gds = StaticData::Instance()->GetGenerationDictionaries();
vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries();
if (gds.size() > 0) {
if (labeledOutput)
*m_nBestStream << "g: ";

View File

@ -53,7 +53,6 @@ protected:
const std::vector<FactorType> &m_inputFactorOrder;
const std::vector<FactorType> &m_outputFactorOrder;
const FactorMask &m_inputFactorUsed;
FactorCollection &m_factorCollection;
std::ostream *m_nBestStream;
std::string m_inputFilePath;
std::istream *m_inputStream;
@ -64,14 +63,12 @@ public:
IOStream(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, FactorCollection &factorCollection
, size_t nBestSize
, const std::string &nBestFilePath);
IOStream(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, FactorCollection &factorCollection
, size_t nBestSize
, const std::string &nBestFilePath
, const std::string &inputFilePath);

View File

@ -85,8 +85,8 @@ int main(int argc, char* argv[])
return EXIT_FAILURE;
}
StaticData staticData;
if (!staticData.LoadData(parameter))
const StaticData &staticData = StaticData::Instance();
if (!StaticData::LoadDataStatic(parameter))
return EXIT_FAILURE;
// set up read/writing class
@ -120,7 +120,7 @@ int main(int argc, char* argv[])
VERBOSE(2,"\nTRANSLATING(" << ++lineCount << "): " << *source);
staticData.InitializeBeforeSentenceProcessing(*source);
Manager manager(*source, staticData);
Manager manager(*source);
manager.ProcessSentence();
ioStream->OutputBestHypo(manager.GetBestHypothesis(), source->GetTranslationId(),
staticData.GetReportSegmentation(),
@ -164,7 +164,7 @@ int main(int argc, char* argv[])
#endif
}
IOStream *GetIODevice(StaticData &staticData)
IOStream *GetIODevice(const StaticData &staticData)
{
IOStream *ioStream;
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
@ -178,7 +178,6 @@ IOStream *GetIODevice(StaticData &staticData)
string filePath = staticData.GetParam("input-file")[0];
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
, staticData.GetFactorCollection()
, staticData.GetNBestSize()
, staticData.GetNBestFilePath()
, filePath);
@ -187,7 +186,6 @@ IOStream *GetIODevice(StaticData &staticData)
{
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
, staticData.GetFactorCollection()
, staticData.GetNBestSize()
, staticData.GetNBestFilePath());
}

View File

@ -39,4 +39,4 @@ POSSIBILITY OF SUCH DAMAGE.
class IOStream;
int main(int argc, char* argv[]);
IOStream *GetIODevice(StaticData &staticData);
IOStream *GetIODevice(const StaticData &staticData);

View File

@ -91,7 +91,7 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
os << std::endl << std::endl;
if (doLMStats && lmCalls > 0) {
std::vector<unsigned int>::iterator acc = lmAcc.begin();
const LMList& lmlist = StaticData::Instance()->GetAllLM();
const LMList& lmlist = StaticData::Instance().GetAllLM();
LMList::const_iterator i = lmlist.begin();
for (; acc != lmAcc.end(); ++acc, ++i) {
char buf[256];
@ -108,7 +108,7 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
}
}
os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED):" << std::endl;
StaticData::Instance()->GetScoreIndexManager().Debug_PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance()->GetAllWeights());
StaticData::Instance().GetScoreIndexManager().Debug_PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
os << std::endl;
}

View File

@ -48,8 +48,11 @@ struct CNStats {
CNStats stats;
ConfusionNet::ConfusionNet(FactorCollection* p)
: InputType(),m_factorCollection(p) {stats.createOne();}
ConfusionNet::ConfusionNet()
: InputType()
{
stats.createOne();
}
ConfusionNet::~ConfusionNet() {stats.destroyOne();}
ConfusionNet::ConfusionNet(Sentence const& s)
@ -59,11 +62,6 @@ ConfusionNet::ConfusionNet(Sentence const& s)
data[i].push_back(std::make_pair(s.GetWord(i),0.0));
}
void ConfusionNet::SetFactorCollection(FactorCollection *p)
{
m_factorCollection=p;
}
bool ConfusionNet::ReadF(std::istream& in,
const std::vector<FactorType>& factorOrder,
int format)
@ -83,10 +81,8 @@ bool ConfusionNet::ReadF(std::istream& in,
}
int ConfusionNet::Read(std::istream& in,
const std::vector<FactorType>& factorOrder,
FactorCollection &factorCollection)
const std::vector<FactorType>& factorOrder)
{
SetFactorCollection(&factorCollection);
int rv=ReadF(in,factorOrder,0);
if(rv) stats.collect(*this);
return rv;
@ -99,14 +95,13 @@ void ConfusionNet::String2Word(const std::string& s,Word& w,
std::vector<std::string> factorStrVector = Tokenize(s, "|");
for(size_t i=0;i<factorOrder.size();++i)
w.SetFactor(factorOrder[i],
m_factorCollection->AddFactor(Input,factorOrder[i],
FactorCollection::Instance().AddFactor(Input,factorOrder[i],
factorStrVector[i]));
}
bool ConfusionNet::ReadFormat0(std::istream& in,
const std::vector<FactorType>& factorOrder)
{
assert(m_factorCollection);
Clear();
std::string line;
while(getline(in,line)) {
@ -140,7 +135,6 @@ bool ConfusionNet::ReadFormat0(std::istream& in,
bool ConfusionNet::ReadFormat1(std::istream& in,
const std::vector<FactorType>& factorOrder)
{
assert(m_factorCollection);
Clear();
std::string line;
if(!getline(in,line)) return 0;
@ -202,7 +196,7 @@ std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
TranslationOptionCollection*
ConfusionNet::CreateTranslationOptionCollection() const
{
size_t maxNoTransOptPerCoverage = StaticData::Instance()->GetMaxNoTransOptPerCoverage();
size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
TranslationOptionCollection *rv= new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage);
assert(rv);
return rv;

View File

@ -17,19 +17,16 @@ class ConfusionNet : public InputType {
private:
std::vector<Column> data;
FactorCollection *m_factorCollection;
bool ReadFormat0(std::istream&,const std::vector<FactorType>& factorOrder);
bool ReadFormat1(std::istream&,const std::vector<FactorType>& factorOrder);
void String2Word(const std::string& s,Word& w,const std::vector<FactorType>& factorOrder);
public:
ConfusionNet(FactorCollection* p=0);
ConfusionNet();
~ConfusionNet();
ConfusionNet(Sentence const& s);
void SetFactorCollection(FactorCollection*);
const Column& GetColumn(size_t i) const {assert(i<data.size());return data[i];}
const Column& operator[](size_t i) const {return GetColumn(i);}
@ -41,8 +38,7 @@ class ConfusionNet : public InputType {
bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0);
void Print(std::ostream&) const;
int Read(std::istream& in,const std::vector<FactorType>& factorOrder, FactorCollection &factorCollection);
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
Phrase GetSubString(const WordsRange&) const; //TODO not defined
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined

View File

@ -100,8 +100,7 @@ public:
virtual void Process(const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, FactorCollection &factorCollection
, TranslationOptionCollection *toc
, TranslationOptionCollection *toc
, bool adhereTableLimit) const = 0;
};

View File

@ -78,7 +78,6 @@ inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, FactorCollection &factorCollection
, TranslationOptionCollection *toc
, bool adhereTableLimit) const
{

View File

@ -38,7 +38,6 @@ public:
virtual void Process(const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, FactorCollection &factorCollection
, TranslationOptionCollection *toc
, bool adhereTableLimit) const;

View File

@ -51,7 +51,6 @@ TranslationOption *DecodeStepTranslation::MergeTranslation(const TranslationOpti
void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, FactorCollection &factorCollection
, TranslationOptionCollection *toc
, bool adhereTableLimit) const
{

View File

@ -37,7 +37,6 @@ public:
virtual void Process(const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, FactorCollection &factorCollection
, TranslationOptionCollection *toc
, bool adhereTableLimit) const;
private:

View File

@ -6,7 +6,7 @@
DistortionScoreProducer::DistortionScoreProducer()
{
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
}
size_t DistortionScoreProducer::GetNumScoreComponents() const
@ -34,7 +34,7 @@ float DistortionScoreProducer::CalculateDistortionScore(const WordsRange &prev,
WordPenaltyProducer::WordPenaltyProducer()
{
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
}
size_t WordPenaltyProducer::GetNumScoreComponents() const

View File

@ -29,6 +29,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
FactorCollection FactorCollection::s_instance;
void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorType, const string &filePath)
{
ifstream inFile(filePath.c_str());

View File

@ -43,16 +43,20 @@ class FactorCollection
friend std::ostream& operator<<(std::ostream&, const FactorCollection&);
protected:
size_t m_factorId; /**< unique, contiguous ids, starting from 0, for each factor */
static FactorCollection s_instance;
size_t m_factorId; /**< unique, contiguous ids, starting from 0, for each factor */
FactorSet m_collection; /**< collection of all factors */
StringSet m_factorStringCollection; /**< collection of unique string used by factors */
public:
//! constructor
//! constructor. only the 1 static variable can be created
FactorCollection()
:m_factorId(0)
{}
public:
static FactorCollection& Instance() { return s_instance; }
//! Destructor
~FactorCollection();

View File

@ -34,15 +34,16 @@ using namespace std;
GenerationDictionary::GenerationDictionary(size_t numFeatures)
: Dictionary(numFeatures)
{
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
}
bool GenerationDictionary::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, FactorCollection &factorCollection
, const std::string &filePath
, FactorDirection direction)
{
FactorCollection &factorCollection = FactorCollection::Instance();
const size_t numFeatureValuesInConfig = this->GetNumScoreComponents();
//factors

View File

@ -62,7 +62,6 @@ public:
//! load data file
bool Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, FactorCollection &factorCollection
, const std::string &filePath
, FactorDirection direction);

View File

@ -53,7 +53,7 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
, m_currSourceWordsRange(NOT_FOUND, NOT_FOUND)
, m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
, m_wordDeleted(false)
, m_languageModelStates(StaticData::Instance()->GetLMSize(), LanguageModelSingleFactor::UnknownState)
, m_languageModelStates(StaticData::Instance().GetLMSize(), LanguageModelSingleFactor::UnknownState)
, m_arcList(NULL)
, m_id(0)
, m_lmstats(NULL)
@ -187,7 +187,7 @@ int Hypothesis::NGramCompare(const Hypothesis &compare) const
if (m_sourceCompleted.GetCompressedRepresentation() > compare.m_sourceCompleted.GetCompressedRepresentation()) return 1;
if (m_currSourceWordsRange.GetEndPos() < compare.m_currSourceWordsRange.GetEndPos()) return -1;
if (m_currSourceWordsRange.GetEndPos() > compare.m_currSourceWordsRange.GetEndPos()) return 1;
if (! StaticData::Instance()->GetSourceStartPosMattersForRecombination()) return 0;
if (! StaticData::Instance().GetSourceStartPosMattersForRecombination()) return 0;
if (m_currSourceWordsRange.GetStartPos() < compare.m_currSourceWordsRange.GetStartPos()) return -1;
if (m_currSourceWordsRange.GetStartPos() > compare.m_currSourceWordsRange.GetStartPos()) return 1;
return 0;
@ -207,7 +207,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
LMList::const_iterator iterLM;
// will be null if LM stats collection is disabled
if (StaticData::Instance()->IsComputeLMBackoffStats()) {
if (StaticData::Instance().IsComputeLMBackoffStats()) {
m_lmstats = new vector<vector<unsigned int> >(languageModels.size(), vector<unsigned int>(0));
}
@ -299,7 +299,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
void Hypothesis::CalcDistortionScore()
{
const DistortionScoreProducer *dsp = StaticData::Instance()->GetDistortionScoreProducer();
const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
float distortionScore = dsp->CalculateDistortionScore(
m_prevHypo->GetCurrSourceWordsRange(),
this->GetCurrSourceWordsRange()
@ -316,8 +316,10 @@ void Hypothesis::ResetScore()
/***
* calculate the logarithm of our total translation score (sum up components)
*/
void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &futureScore)
void Hypothesis::CalcScore(const SquareMatrix &futureScore)
{
const StaticData &staticData = StaticData::Instance();
// DISTORTION COST
CalcDistortionScore();
@ -367,9 +369,9 @@ void Hypothesis::CalcFutureScore(const SquareMatrix &futureScore)
}
// add future costs for distortion model
if(StaticData::Instance()->UseDistortionFutureCosts())
if(StaticData::Instance().UseDistortionFutureCosts())
m_futureScore += m_sourceCompleted.GetFutureCosts( (int)m_currSourceWordsRange.GetEndPos() )
* StaticData::Instance()->GetWeightDistortion();
* StaticData::Instance().GetWeightDistortion();
}
@ -420,9 +422,9 @@ void Hypothesis::CleanupArcList()
* However, may not be enough if only unique candidates are needed,
* so we'll keep all of arc list if nedd distinct n-best list
*/
const StaticData *staticData = StaticData::Instance();
size_t nBestSize = staticData->GetNBestSize();
bool distinctNBest = staticData->GetDistinctNBest();
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
bool distinctNBest = staticData.GetDistinctNBest();
if (!distinctNBest && m_arcList->size() > nBestSize * 5)
{ // prune arc list only if there too many arcs
@ -488,7 +490,7 @@ std::string Hypothesis::GetTargetPhraseStringRep(const vector<FactorType> factor
std::string Hypothesis::GetSourcePhraseStringRep() const
{
vector<FactorType> allFactors;
const size_t maxSourceFactors = StaticData::Instance()->GetMaxNumFactors(Input);
const size_t maxSourceFactors = StaticData::Instance().GetMaxNumFactors(Input);
for(size_t i=0; i < maxSourceFactors; i++)
{
allFactors.push_back(i);
@ -498,7 +500,7 @@ std::string Hypothesis::GetSourcePhraseStringRep() const
std::string Hypothesis::GetTargetPhraseStringRep() const
{
vector<FactorType> allFactors;
const size_t maxTargetFactors = StaticData::Instance()->GetMaxNumFactors(Output);
const size_t maxTargetFactors = StaticData::Instance().GetMaxNumFactors(Output);
for(size_t i=0; i < maxTargetFactors; i++)
{
allFactors.push_back(i);

View File

@ -140,7 +140,7 @@ public:
void ResetScore();
void CalcScore(const StaticData& staticData, const SquareMatrix &futureScore);
void CalcScore(const SquareMatrix &futureScore);
int GetId()const
{

View File

@ -31,7 +31,7 @@ using namespace std;
HypothesisCollection::HypothesisCollection()
{
m_nBestIsEnabled = StaticData::Instance()->IsNBestEnabled();
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = -std::numeric_limits<float>::infinity();
}
@ -80,7 +80,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
{
if (hypo->GetTotalScore() < m_worstScore)
{ // really bad score. don't bother adding hypo into collection
StaticData::Instance()->GetSentenceStats().AddDiscarded();
StaticData::Instance().GetSentenceStats().AddDiscarded();
VERBOSE(3,"discarded, too bad for stack" << std::endl);
FREEHYPO(hypo);
return;
@ -98,7 +98,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
Hypothesis *hypoExisting = *iterExisting;
assert(iterExisting != m_hypos.end());
StaticData::Instance()->GetSentenceStats().AddRecombination(*hypo, **iterExisting);
StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
// found existing hypo with same target ending.
// keep the best 1
@ -174,7 +174,7 @@ void HypothesisCollection::PruneToSize(size_t newSize)
{
iterator iterRemove = iter++;
Remove(iterRemove);
StaticData::Instance()->GetSentenceStats().AddPruning();
StaticData::Instance().GetSentenceStats().AddPruning();
}
else
{

View File

@ -55,7 +55,7 @@ public:
virtual size_t GetSize() const =0;
//! populate this InputType with data from in stream
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder, FactorCollection &factorCollection) =0;
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder) =0;
//! Output debugging info to stream out
virtual void Print(std::ostream&) const =0;

View File

@ -36,7 +36,7 @@ using namespace std;
LanguageModel::LanguageModel(bool registerScore)
{
if (registerScore)
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
}
LanguageModel::~LanguageModel() {}

View File

@ -41,7 +41,7 @@ namespace LanguageModelFactory
{
LanguageModel* CreateLanguageModel(LMImplementation lmImplementation, const std::vector<FactorType> &factorTypes
, size_t nGramOrder, const std::string &languageModelFile, float weight, FactorCollection &factorCollection)
, size_t nGramOrder, const std::string &languageModelFile, float weight)
{
LanguageModel *lm = NULL;
switch (lmImplementation)
@ -103,14 +103,14 @@ namespace LanguageModelFactory
switch (lm->GetLMType())
{
case SingleFactor:
if (! static_cast<LanguageModelSingleFactor*>(lm)->Load(languageModelFile, factorCollection, factorTypes[0], weight, nGramOrder))
if (! static_cast<LanguageModelSingleFactor*>(lm)->Load(languageModelFile, factorTypes[0], weight, nGramOrder))
{
delete lm;
lm = NULL;
}
break;
case MultiFactor:
if (! static_cast<LanguageModelMultiFactor*>(lm)->Load(languageModelFile, factorCollection, factorTypes, weight, nGramOrder))
if (! static_cast<LanguageModelMultiFactor*>(lm)->Load(languageModelFile, factorTypes, weight, nGramOrder))
{
delete lm;
lm = NULL;

View File

@ -8,7 +8,6 @@
#include "TypeDef.h"
class LanguageModel;
class FactorCollection;
namespace LanguageModelFactory {
@ -17,7 +16,7 @@ namespace LanguageModelFactory {
* language model toolkit as its underlying implementation
*/
LanguageModel* CreateLanguageModel(LMImplementation lmImplementation, const std::vector<FactorType> &factorTypes
, size_t nGramOrder, const std::string &languageModelFile, float weight, FactorCollection &factorCollection);
, size_t nGramOrder, const std::string &languageModelFile, float weight);
};

View File

@ -12,14 +12,15 @@ LanguageModelInternal::LanguageModelInternal(bool registerScore)
}
bool LanguageModelInternal::Load(const std::string &filePath
, FactorCollection &factorCollection
, FactorType factorType
, float weight
, size_t nGramOrder)
, FactorType factorType
, float weight
, size_t nGramOrder)
{
assert(nGramOrder <= 3);
TRACE_ERR( "Loading Internal LM: " << filePath << endl);
FactorCollection &factorCollection = FactorCollection::Instance();
m_filePath = filePath;
m_factorType = factorType;
m_weight = weight;

View File

@ -25,7 +25,6 @@ protected:
public:
LanguageModelInternal(bool registerScore);
bool Load(const std::string &filePath
, FactorCollection &factorCollection
, FactorType factorType
, float weight
, size_t nGramOrder);

View File

@ -40,7 +40,6 @@ class LanguageModelJoint : public LanguageModelMultiFactor
{
protected:
LanguageModelSingleFactor *m_lmImpl;
FactorCollection *m_factorCollection;
std::vector<FactorType> m_factorTypesOrdered;
size_t m_implFactor;
@ -57,7 +56,6 @@ public:
}
bool Load(const std::string &filePath
, FactorCollection &factorCollection
, const std::vector<FactorType> &factorTypes
, float weight
, size_t nGramOrder)
@ -68,9 +66,10 @@ public:
m_nGramOrder = nGramOrder;
m_factorTypesOrdered= factorTypes;
m_factorCollection = &factorCollection;
m_implFactor = 0;
FactorCollection &factorCollection = FactorCollection::Instance();
// sentence markers
for (size_t index = 0 ; index < factorTypes.size() ; ++index)
{
@ -79,7 +78,7 @@ public:
m_sentenceEndArray[factorType] = factorCollection.AddFactor(Output, factorType, EOS_);
}
return m_lmImpl->Load(filePath, factorCollection, m_implFactor, weight, nGramOrder);
return m_lmImpl->Load(filePath, m_implFactor, weight, nGramOrder);
}
float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
@ -114,7 +113,7 @@ public:
stream << "|" << factor->GetString();
}
factor = m_factorCollection->AddFactor(Output, m_implFactor, stream.str());
factor = FactorCollection::Instance().AddFactor(Output, m_implFactor, stream.str());
Word* jointWord = new Word;
jointWord->SetFactor(m_implFactor, factor);

View File

@ -39,7 +39,6 @@ protected:
public:
virtual bool Load(const std::string &filePath
, FactorCollection &factorCollection
, const std::vector<FactorType> &factorTypes
, float weight
, size_t nGramOrder) = 0;

View File

@ -41,7 +41,6 @@ public:
virtual ~LanguageModelSingleFactor();
virtual bool Load(const std::string &filePath
, FactorCollection &factorCollection
, FactorType factorType
, float weight
, size_t nGramOrder) = 0;

View File

@ -51,7 +51,6 @@ public:
delete m_lmImpl;
}
bool Load(const std::string &filePath
, FactorCollection &factorCollection
, FactorType factorType
, float weight
, size_t nGramOrder)
@ -63,10 +62,12 @@ public:
m_realNGramOrder = 3;
FactorCollection &factorCollection = FactorCollection::Instance();
m_sentenceStartArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, BOS_);
m_sentenceEndArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, EOS_);
return m_lmImpl->Load(filePath, factorCollection, m_factorType, weight, nGramOrder);
return m_lmImpl->Load(filePath, m_factorType, weight, nGramOrder);
}
float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const

View File

@ -30,7 +30,7 @@ LexicalReordering::LexicalReordering(const std::string &filePath,
m_orientation(orientation), m_condition(condition), m_numScores(weights.size()), m_filePath(filePath), m_sourceFactors(input), m_targetFactors(output)
{
//add score producer
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
//manage the weights by SetWeightsForScoreProducer method of static data.
if(direction == LexReorderType::Bidirectional)
{
@ -48,7 +48,7 @@ LexicalReordering::LexicalReordering(const std::string &filePath,
else if ( orientation == DistortionOrientationType::Msd) {
m_numOrientationTypes = 3;
}
const_cast<StaticData*>(StaticData::Instance())->SetWeightsForScoreProducer(this, weights);
const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
// Load the file
LoadFile();
// PrintTable();

View File

@ -39,20 +39,21 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
Manager::Manager(InputType const& source, StaticData &staticData)
Manager::Manager(InputType const& source)
:m_source(source)
,m_hypoStack(source.GetSize() + 1)
,m_staticData(staticData)
,m_possibleTranslations(source.CreateTranslationOptionCollection())
,m_initialTargetPhrase(Output)
{
const StaticData &staticData = StaticData::Instance();
TRACE_ERR("Translating: " << m_source << endl);
std::vector < HypothesisCollection >::iterator iterStack;
for (iterStack = m_hypoStack.begin() ; iterStack != m_hypoStack.end() ; ++iterStack)
{
HypothesisCollection &sourceHypoColl = *iterStack;
sourceHypoColl.SetMaxHypoStackSize(m_staticData.GetMaxHypoStackSize());
sourceHypoColl.SetBeamThreshold(m_staticData.GetBeamThreshold());
sourceHypoColl.SetMaxHypoStackSize(staticData.GetMaxHypoStackSize());
sourceHypoColl.SetBeamThreshold(staticData.GetBeamThreshold());
}
}
@ -68,16 +69,17 @@ Manager::~Manager()
*/
void Manager::ProcessSentence()
{
m_staticData.ResetSentenceStats(m_source);
vector < list < DecodeStep* > * >&decodeStepVL = m_staticData.GetDecodeStepVL();
const StaticData &staticData = StaticData::Instance();
staticData.ResetSentenceStats(m_source);
const vector < list < DecodeStep* > * >
&decodeStepVL = staticData.GetDecodeStepVL();
// create list of all possible translations
// this is only valid if:
// 1. generation of source sentence is not done 1st
// 2. initial hypothesis factors are given in the sentence
//CreateTranslationOptions(m_source, phraseDictionary, lmListInitial);
m_possibleTranslations->CreateTranslationOptions(decodeStepVL
, m_staticData.GetFactorCollection());
m_possibleTranslations->CreateTranslationOptions(decodeStepVL);
// initial seed hypothesis: nothing translated, no words produced
{
@ -93,7 +95,7 @@ void Manager::ProcessSentence()
// the stack is pruned before processing (lazy pruning):
VERBOSE(3,"processing hypothesis from next stack");
sourceHypoColl.PruneToSize(m_staticData.GetMaxHypoStackSize());
sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
VERBOSE(3,std::endl);
sourceHypoColl.CleanupArcList();
@ -109,7 +111,7 @@ void Manager::ProcessSentence()
}
// some more logging
VERBOSE(2,m_staticData.GetSentenceStats());
VERBOSE(2, staticData.GetSentenceStats());
}
/** Find all translation options to expand one hypothesis, trigger expansion
@ -120,7 +122,7 @@ void Manager::ProcessSentence()
void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
{
// since we check for reordering limits, its good to have that limit handy
int maxDistortion = m_staticData.GetMaxDistortion();
int maxDistortion = StaticData::Instance().GetMaxDistortion();
// no limit of reordering: only check for overlap
if (maxDistortion < 0)
@ -218,11 +220,14 @@ void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOp
{
// create hypothesis and calculate all its scores
Hypothesis *newHypo = hypothesis.CreateNext(transOpt);
newHypo->CalcScore(m_staticData, m_possibleTranslations->GetFutureScore());
newHypo->CalcScore(m_possibleTranslations->GetFutureScore());
// logging for the curious
IFVERBOSE(3) {
newHypo->PrintHypothesis(m_source, m_staticData.GetWeightDistortion(), m_staticData.GetWeightWordPenalty());
const StaticData &staticData = StaticData::Instance();
newHypo->PrintHypothesis(m_source
, staticData.GetWeightDistortion()
, staticData.GetWeightWordPenalty());
}
// add to hypothesis stack
@ -347,7 +352,7 @@ void Manager::CalcNBest(size_t count, LatticePathList &ret,bool onlyDistinct) co
if(onlyDistinct)
{
size_t nBestFactor = StaticData::Instance()->GetNBestFactor();
size_t nBestFactor = StaticData::Instance().GetNBestFactor();
if (nBestFactor > 0)
contenders.Prune(count * nBestFactor);
}

View File

@ -76,7 +76,6 @@ protected:
std::vector < HypothesisCollection > m_hypoStack; /**< stacks to store hypothesis (partial translations) */
// no of elements = no of words in source + 1
StaticData &m_staticData; /**< holds various kinds of constants, counters, and global data structures */
TranslationOptionCollection *m_possibleTranslations; /**< pre-computed list of translation options for the phrases in this sentence */
TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */
@ -89,7 +88,7 @@ protected:
void OutputHypoStack(int stack = -1);
void OutputHypoStackSize();
public:
Manager(InputType const& source, StaticData &staticData);
Manager(InputType const& source);
~Manager();
void ProcessSentence();

View File

@ -57,7 +57,7 @@ public:
CleanUp();
delete m_dict;
if (StaticData::Instance()->GetVerboseLevel() >= 2)
if (StaticData::Instance().GetVerboseLevel() >= 2)
{
TRACE_ERR("tgt candidates stats: total="<<totalE<<"; distinct="
@ -326,7 +326,7 @@ public:
for(size_t len=1;len<=srcSize;++len) path1Best[len]+=srcSize-len+1;
if (StaticData::Instance()->GetVerboseLevel() >= 2 && exPathsD.size())
if (StaticData::Instance().GetVerboseLevel() >= 2 && exPathsD.size())
{
TRACE_ERR("path stats for current CN: \n");
std::cerr.setf(std::ios::scientific);
@ -437,7 +437,7 @@ public:
} // end while(!stack.empty())
if (StaticData::Instance()->GetVerboseLevel() >= 2 && exploredPaths.size())
if (StaticData::Instance().GetVerboseLevel() >= 2 && exploredPaths.size())
{
TRACE_ERR("CN (explored): ");
std::copy(exploredPaths.begin()+1,exploredPaths.end(),

View File

@ -27,7 +27,7 @@ PartialTranslOptColl::PartialTranslOptColl()
{
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = -std::numeric_limits<float>::infinity();
m_maxSize = StaticData::Instance()->GetMaxNoPartTransOpt();
m_maxSize = StaticData::Instance().GetMaxNoPartTransOpt();
m_totalPruned = 0;
}

View File

@ -82,7 +82,7 @@ void Phrase::MergeFactors(const Phrase &copy)
{
assert(GetSize() == copy.GetSize());
size_t size = GetSize();
const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());
for (size_t currPos = 0 ; currPos < size ; currPos++)
{
for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
@ -180,7 +180,7 @@ vector< vector<string> > Phrase::Parse(const std::string &phraseString, const st
// to
// "KOMMA" "none"
if (factorStrVector.size() != factorOrder.size()) {
TRACE_ERR( "[ERROR] Malformed input at " << /*StaticData::Instance()->GetCurrentInputPosition() <<*/ std::endl
TRACE_ERR( "[ERROR] Malformed input at " << /*StaticData::Instance().GetCurrentInputPosition() <<*/ std::endl
<< " Expected input to have words composed of " << factorOrder.size() << " factor(s) (form FAC1|FAC2|...)" << std::endl
<< " but instead received input with " << factorStrVector.size() << " factor(s).\n");
abort();
@ -191,9 +191,10 @@ vector< vector<string> > Phrase::Parse(const std::string &phraseString, const st
}
void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
, const vector< vector<string> > &phraseVector
, FactorCollection &factorCollection)
, const vector< vector<string> > &phraseVector)
{
FactorCollection &factorCollection = FactorCollection::Instance();
for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++)
{
// add word this phrase
@ -210,11 +211,10 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
, const string &phraseString
, FactorCollection &factorCollection
, const string &factorDelimiter)
, const string &factorDelimiter)
{
vector< vector<string> > phraseVector = Parse(phraseString, factorOrder, factorDelimiter);
CreateFromString(factorOrder, phraseVector, factorCollection);
CreateFromString(factorOrder, phraseVector);
}
bool Phrase::operator < (const Phrase &compare) const
@ -234,7 +234,7 @@ bool Phrase::operator < (const Phrase &compare) const
{
size_t minSize = std::min( thisSize , compareSize );
const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());
// taken from word.Compare()
for (size_t i = 0 ; i < maxNumFactors ; i++)
{
@ -311,7 +311,7 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase) const
const size_t size = GetSize();
const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());
for (size_t currPos = 0 ; currPos < size ; currPos++)
{
for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)

View File

@ -80,8 +80,7 @@ public:
* \param phraseVector 2D string vector
*/
void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::vector< std::vector<std::string> > &phraseVector
, FactorCollection &factorCollection);
, const std::vector< std::vector<std::string> > &phraseVector);
/** Fills phrase with words from format string, typically from phrase table or sentence input
* \param factorOrder factor types of each element in 2D string vector
* \param phraseString formatted input string to parse
@ -89,7 +88,6 @@ public:
*/
void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::string &phraseString
, FactorCollection &factorCollection
, const std::string &factorDelimiter);
/** copy factors from the other phrase to this phrase.

View File

@ -27,7 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
PhraseDictionary::PhraseDictionary(size_t numScoreComponent)
: Dictionary(numScoreComponent),m_tableLimit(0)
{
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
}
PhraseDictionary::~PhraseDictionary() {}

View File

@ -38,7 +38,6 @@ using namespace std;
bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, FactorCollection &factorCollection
, const string &filePath
, const vector<float> &weight
, size_t tableLimit
@ -83,7 +82,7 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
continue;
}
const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
if (tokens[0] != prevSourcePhrase)
phraseVector = Phrase::Parse(tokens[0], input, factorDelimiter);
@ -99,10 +98,10 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
// source
Phrase sourcePhrase(Input);
sourcePhrase.CreateFromString( input, phraseVector, factorCollection);
sourcePhrase.CreateFromString( input, phraseVector);
//target
TargetPhrase targetPhrase(Output);
targetPhrase.CreateFromString( output, tokens[1], factorCollection, factorDelimiter);
targetPhrase.CreateFromString( output, tokens[1], factorDelimiter);
// component score, for n-best output
std::vector<float> scv(scoreVector.size());

View File

@ -47,7 +47,6 @@ public:
bool Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, FactorCollection &factorCollection
, const std::string &filePath
, const std::vector<float> &weight
, size_t tableLimit

View File

@ -47,7 +47,6 @@ void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, FactorCollection &factorCollection
, const std::string &filePath
, const std::vector<float> &weight
, size_t tableLimit
@ -55,6 +54,8 @@ bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
, float weightWP
)
{
FactorCollection &factorCollection = FactorCollection::Instance();
if(m_numScoreComponent!=weight.size()) {
stringstream strme;
strme << "ERROR: mismatch of number of scaling factors: "<<weight.size()

View File

@ -39,7 +39,6 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary {
// initialize ...
bool Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, FactorCollection &factorCollection
, const std::string &filePath
, const std::vector<float> &weight
, size_t tableLimit

View File

@ -4,7 +4,7 @@
#include "StaticData.h"
ScoreComponentCollection::ScoreComponentCollection()
: m_scores(StaticData::Instance()->GetTotalScoreComponents(), 0.0f)
, m_sim(&StaticData::Instance()->GetScoreIndexManager())
: m_scores(StaticData::Instance().GetTotalScoreComponents(), 0.0f)
, m_sim(&StaticData::Instance().GetScoreIndexManager())
{}

View File

@ -26,23 +26,22 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "StaticData.h"
#include "Util.h"
int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder,
FactorCollection &factorCollection)
int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
{
const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
std::string line;
if (getline(in, line, '\n').eof())
return 0;
line = Trim(line);
Phrase::CreateFromString(factorOrder, line, factorCollection, factorDelimiter);
Phrase::CreateFromString(factorOrder, line, factorDelimiter);
return 1;
}
TranslationOptionCollection*
Sentence::CreateTranslationOptionCollection() const
{
size_t maxNoTransOptPerCoverage = StaticData::Instance()->GetMaxNoTransOptPerCoverage();
size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
TranslationOptionCollection *rv= new TranslationOptionCollectionText(*this, maxNoTransOptPerCoverage);
assert(rv);
return rv;

View File

@ -61,7 +61,7 @@ class Sentence : public Phrase, public InputType
return Phrase::GetSize();
}
int Read(std::istream& in,const std::vector<FactorType>& factorOrder, FactorCollection &factorCollection);
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
void Print(std::ostream& out) const;
TranslationOptionCollection* CreateTranslationOptionCollection() const;

View File

@ -58,7 +58,7 @@ static size_t CalcMax(size_t x, const vector<size_t>& y, const vector<size_t>& z
return max;
}
StaticData* StaticData::s_instance(0);
StaticData StaticData::s_instance;
StaticData::StaticData()
:m_fLMsLoaded(false)
@ -75,8 +75,6 @@ StaticData::StaticData()
m_maxFactorIdx[0] = 0; // source side
m_maxFactorIdx[1] = 0; // target side
s_instance = this;
// memory pools
Phrase::InitializeMemPool();
}
@ -469,7 +467,7 @@ bool StaticData::LoadLanguageModels()
PrintUserTime(string("Start loading LanguageModel ") + languageModelFile);
LanguageModel *lm = LanguageModelFactory::CreateLanguageModel(lmImplementation, factorTypes
, nGramOrder, languageModelFile, weightAll[i], m_factorCollection);
, nGramOrder, languageModelFile, weightAll[i]);
if (lm == NULL)
{
UserMessage::Add("no LM created. We probably don't have it compiled");
@ -523,7 +521,6 @@ bool StaticData::LoadGenerationTables()
assert(m_generationDictionary.back() && "could not create GenerationDictionary");
if (!m_generationDictionary.back()->Load(input
, output
, m_factorCollection
, filePath
, Output))
{
@ -623,7 +620,6 @@ bool StaticData::LoadPhraseTables()
PhraseDictionaryMemory *pd=new PhraseDictionaryMemory(numScoreComponent);
if (!pd->Load(input
, output
, m_factorCollection
, filePath
, weight
, maxTargetPhrase[index]
@ -640,7 +636,7 @@ bool StaticData::LoadPhraseTables()
{
TRACE_ERR( "using binary phrase tables for idx "<<currDict<<"\n");
PhraseDictionaryTreeAdaptor *pd=new PhraseDictionaryTreeAdaptor(numScoreComponent,(currDict==0 ? m_numInputScores : 0));
if (!pd->Load(input,output,m_factorCollection,filePath,weight,
if (!pd->Load(input,output,filePath,weight,
maxTargetPhrase[index],
GetAllLM(),
GetWeightWordPenalty()))
@ -739,7 +735,7 @@ bool StaticData::LoadMapping()
return true;
}
void StaticData::CleanUpAfterSentenceProcessing()
void StaticData::CleanUpAfterSentenceProcessing() const
{
for(size_t i=0;i<m_phraseDictionary.size();++i)
m_phraseDictionary[i]->CleanUp();
@ -758,7 +754,7 @@ void StaticData::CleanUpAfterSentenceProcessing()
/** initialize the translation and language models for this sentence
(includes loading of translation table entries on demand, if
binary format is used) */
void StaticData::InitializeBeforeSentenceProcessing(InputType const& in)
void StaticData::InitializeBeforeSentenceProcessing(InputType const& in) const
{
for(size_t i=0;i<m_phraseDictionary.size();++i)
{

View File

@ -46,9 +46,8 @@ class DecodeStep;
class StaticData
{
private:
static StaticData* s_instance;
static StaticData s_instance;
protected:
FactorCollection m_factorCollection;
std::vector<PhraseDictionary*> m_phraseDictionary;
std::vector<GenerationDictionary*> m_generationDictionary;
std::vector < std::list < DecodeStep*> * > m_decodeStepVL;
@ -106,6 +105,9 @@ protected:
size_t m_maxFactorIdx[2]; //! number of factors on source and target side
size_t m_maxNumFactors; //! max number of factors on both source and target sides
//! constructor. only the 1 static variable can be created
StaticData();
//! helper fn to set bool param from ini file/command line
void SetBooleanParameter(bool *paramter, string parameterName, bool defaultValue);
@ -125,17 +127,21 @@ protected:
bool LoadLexicalReorderingModel();
public:
StaticData();
~StaticData();
static const StaticData* Instance() { return s_instance; }
static const StaticData& Instance() { return s_instance; }
static bool LoadDataStatic(Parameter *parameter)
{
return s_instance.LoadData(parameter);
}
/** Main function to load everything.
* Also initialize the Parameter object
*/
bool LoadData(Parameter *parameter);
const PARAM_VEC &GetParam(const std::string &paramName)
const PARAM_VEC &GetParam(const std::string &paramName) const
{
return m_parameter->GetParam(paramName);
}
@ -153,7 +159,7 @@ public:
return m_outputFactorOrder;
}
std::vector < std::list < DecodeStep* > * > &GetDecodeStepVL()
const std::vector < std::list < DecodeStep* > * > &GetDecodeStepVL() const
{
return m_decodeStepVL;
}
@ -174,10 +180,6 @@ public:
{
return m_maxNoPartTransOpt;
}
FactorCollection &GetFactorCollection()
{
return m_factorCollection;
}
std::vector<LexicalReordering*> GetReorderModels() const
{
return m_reorderModels;
@ -290,8 +292,8 @@ public:
void SetWeightsForScoreProducer(const ScoreProducer* sp, const std::vector<float>& weights);
int GetInputType() const {return m_inputType;}
size_t GetNumInputScores() const {return m_numInputScores;}
void InitializeBeforeSentenceProcessing(InputType const&);
void CleanUpAfterSentenceProcessing();
void InitializeBeforeSentenceProcessing(InputType const&) const;
void CleanUpAfterSentenceProcessing() const;
SentenceStats& GetSentenceStats() const
{
return *m_sentenceStats;

View File

@ -38,7 +38,7 @@ TargetPhrase::TargetPhrase(FactorDirection direction)
void TargetPhrase::SetScore()
{ // used when creating translations of unknown words:
m_transScore = m_ngramScore = 0;
m_fullScore = - StaticData::Instance()->GetWeightWordPenalty();
m_fullScore = - StaticData::Instance().GetWeightWordPenalty();
}
void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
@ -84,7 +84,7 @@ void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
void TargetPhrase::SetWeights(const ScoreProducer* translationScoreProducer, const vector<float> &weightT)
{
// calling this function in case of confusion net input is undefined
assert(StaticData::Instance()->GetInputType()==0);
assert(StaticData::Instance().GetInputType()==0);
/* one way to fix this, you have to make sure the weightT contains (in
addition to the usual phrase translation scaling factors) the input

View File

@ -83,14 +83,14 @@ void TranslationOption::CalcScore()
float m_ngramScore = 0;
float retFullScore = 0;
const LMList &allLM = StaticData::Instance()->GetAllLM();
const LMList &allLM = StaticData::Instance().GetAllLM();
allLM.CalcScore(GetTargetPhrase(), retFullScore, m_ngramScore, &m_scoreBreakdown);
// future score
m_futureScore = retFullScore - m_ngramScore;
size_t phraseSize = GetTargetPhrase().GetSize();
m_futureScore += m_scoreBreakdown.InnerProduct(StaticData::Instance()->GetAllWeights()) - phraseSize * StaticData::Instance()->GetWeightWordPenalty();
m_futureScore += m_scoreBreakdown.InnerProduct(StaticData::Instance().GetAllWeights()) - phraseSize * StaticData::Instance().GetWeightWordPenalty();
}
TO_STRING_BODY(TranslationOption);

View File

@ -125,8 +125,7 @@ void TranslationOptionCollection::Prune()
* \param factorCollection input sentence with all factors
*/
void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::list < DecodeStep* > * > &decodeStepVL
, FactorCollection &factorCollection)
void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::list < DecodeStep* > * > &decodeStepVL)
{
size_t size = m_source.GetSize();
// try to translation for coverage with no trans by expanding table limit
@ -139,7 +138,7 @@ void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::li
size_t numTransOpt = fullList.size();
if (numTransOpt == 0)
{
CreateTranslationOptionsForRange(*decodeStepList, factorCollection
CreateTranslationOptionsForRange(*decodeStepList
, pos, pos, false);
}
}
@ -165,7 +164,7 @@ void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::li
for (size_t currPos = 0 ; currPos < size ; ++currPos)
{
if (process[currPos])
ProcessUnknownWord(currPos, *m_factorCollection);
ProcessUnknownWord(currPos);
}
}
@ -182,54 +181,54 @@ void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::li
* \param factorCollection input sentence with all factors
*/
void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,
size_t sourcePos
, FactorCollection &factorCollection)
size_t sourcePos)
{
// unknown word, add as trans opt
FactorCollection &factorCollection = FactorCollection::Instance();
size_t isDigit = 0;
if (StaticData::Instance()->GetDropUnknown())
{
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
const string &s = f->GetString();
isDigit = s.find_first_of("0123456789");
if (isDigit == string::npos)
isDigit = 0;
else
isDigit = 1;
// modify the starting bitmap
}
TranslationOption *transOpt;
if (! StaticData::Instance()->GetDropUnknown() || isDigit)
{
// add to dictionary
TargetPhrase targetPhrase(Output);
Word &targetWord = targetPhrase.AddWord();
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
{
FactorType factorType = static_cast<FactorType>(currFactor);
const Factor *sourceFactor = sourceWord[currFactor];
if (sourceFactor == NULL)
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
else
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
}
targetPhrase.SetScore();
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
}
size_t isDigit = 0;
if (StaticData::Instance().GetDropUnknown())
{
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
const string &s = f->GetString();
isDigit = s.find_first_of("0123456789");
if (isDigit == string::npos)
isDigit = 0;
else
{ // drop source word. create blank trans opt
const TargetPhrase targetPhrase(Output);
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
isDigit = 1;
// modify the starting bitmap
}
TranslationOption *transOpt;
if (! StaticData::Instance().GetDropUnknown() || isDigit)
{
// add to dictionary
TargetPhrase targetPhrase(Output);
Word &targetWord = targetPhrase.AddWord();
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
{
FactorType factorType = static_cast<FactorType>(currFactor);
const Factor *sourceFactor = sourceWord[currFactor];
if (sourceFactor == NULL)
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
else
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
}
transOpt->CalcScore();
Add(transOpt);
targetPhrase.SetScore();
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
}
else
{ // drop source word. create blank trans opt
const TargetPhrase targetPhrase(Output);
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
}
transOpt->CalcScore();
Add(transOpt);
}
/** compute future score matrix in a dynamic programming fashion.
@ -319,11 +318,8 @@ void TranslationOptionCollection::CalcFutureScore()
* \param decodeStepList list of decoding steps
* \param factorCollection input sentence with all factors
*/
void TranslationOptionCollection::CreateTranslationOptions(const vector <list < DecodeStep* > * > &decodeStepVL
, FactorCollection &factorCollection)
{
m_factorCollection = &factorCollection;
void TranslationOptionCollection::CreateTranslationOptions(const vector <list < DecodeStep* > * > &decodeStepVL)
{
// loop over all substrings of the source sentence, look them up
// in the phraseDictionary (which is the- possibly filtered-- phrase
// table loaded on initialization), generate TranslationOption objects
@ -335,14 +331,14 @@ void TranslationOptionCollection::CreateTranslationOptions(const vector <list <
{
for (size_t endPos = startPos ; endPos < m_source.GetSize() ; endPos++)
{
CreateTranslationOptionsForRange( *decodeStepList, factorCollection, startPos, endPos, true);
CreateTranslationOptionsForRange( *decodeStepList, startPos, endPos, true);
}
}
}
VERBOSE(3,"Translation Option Collection\n " << *this << endl);
ProcessUnknownWord(decodeStepVL, factorCollection);
ProcessUnknownWord(decodeStepVL);
// Prune
Prune();
@ -361,7 +357,6 @@ void TranslationOptionCollection::CreateTranslationOptions(const vector <list <
*/
void TranslationOptionCollection::CreateTranslationOptionsForRange(
const list < DecodeStep* > &decodeStepList
, FactorCollection &factorCollection
, size_t startPos
, size_t endPos
, bool adhereTableLimit)
@ -373,8 +368,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
list < DecodeStep* >::const_iterator iterStep = decodeStepList.begin();
const DecodeStep &decodeStep = **iterStep;
ProcessInitialTranslation(decodeStep, factorCollection
, *oldPtoc
ProcessInitialTranslation(decodeStep, *oldPtoc
, startPos, endPos, adhereTableLimit );
// do rest of decode steps
@ -394,7 +388,6 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
decodeStep.Process(inputPartialTranslOpt
, decodeStep
, *newPtoc
, factorCollection
, this
, adhereTableLimit);
}
@ -427,7 +420,6 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
*/
void TranslationOptionCollection::ProcessInitialTranslation(
const DecodeStep &decodeStep
, FactorCollection &factorCollection
, PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos
, size_t endPos

View File

@ -63,23 +63,20 @@ protected:
InputType const &m_source; /*< reference to the input */
SquareMatrix m_futureScore; /*< matrix of future costs for contiguous parts (span) of the input */
const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span (phrase???) */
FactorCollection *m_factorCollection;
TranslationOptionCollection(InputType const& src, size_t maxNoTransOptPerCoverage);
void CalcFutureScore();
virtual void ProcessInitialTranslation(const DecodeStep &decodeStep
, FactorCollection &factorCollection
, PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit );
//! Force a creation of a translation option where there are none for a particular source position.
void ProcessUnknownWord(const std::vector < std::list < DecodeStep* > *> &decodeStepVL, FactorCollection &factorCollection);
void ProcessUnknownWord(const std::vector < std::list < DecodeStep* > *> &decodeStepVL);
//! special handling of ONE unknown words.
virtual void ProcessOneUnknownWord(const Word &sourceWord
, size_t sourcePos
, FactorCollection &factorCollection);
, size_t sourcePos);
//! pruning: only keep the top n (m_maxNoTransOptPerCoverage) elements */
void Prune();
@ -95,8 +92,7 @@ protected:
void Add(const TranslationOption *translationOption);
//! implemented by inherited class, called by this class
virtual void ProcessUnknownWord(size_t sourcePos
, FactorCollection &factorCollection)=0;
virtual void ProcessUnknownWord(size_t sourcePos)=0;
public:
virtual ~TranslationOptionCollection();
@ -108,11 +104,9 @@ public:
size_t GetSize() const { return m_source.GetSize(); };
//! Create all possible translations from the phrase tables
virtual void CreateTranslationOptions(const std::vector < std::list < DecodeStep* > * > &decodeStepVL
, FactorCollection &factorCollection);
virtual void CreateTranslationOptions(const std::vector < std::list < DecodeStep* > * > &decodeStepVL);
//! Create translation options that exactly cover a specific input span.
virtual void CreateTranslationOptionsForRange(const std::list < DecodeStep* > &decodeStepList
, FactorCollection &factorCollection
, size_t startPosition
, size_t endPosition
, bool adhereTableLimit);

View File

@ -19,14 +19,13 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet
* at a particular source position
*/
void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(
size_t sourcePos
, FactorCollection &factorCollection)
size_t sourcePos)
{
ConfusionNet const& source=dynamic_cast<ConfusionNet const&>(m_source);
ConfusionNet::Column const& coll=source.GetColumn(sourcePos);
for(ConfusionNet::Column::const_iterator i=coll.begin();i!=coll.end();++i)
ProcessOneUnknownWord(i->first,sourcePos,factorCollection);
ProcessOneUnknownWord(i->first ,sourcePos);
}

View File

@ -9,7 +9,6 @@ class TranslationOptionCollectionConfusionNet : public TranslationOptionCollecti
public:
TranslationOptionCollectionConfusionNet(const ConfusionNet &source, size_t maxNoTransOptPerCoverage);
void ProcessUnknownWord( size_t sourcePos
, FactorCollection &factorCollection);
void ProcessUnknownWord( size_t sourcePos);
};

View File

@ -37,9 +37,8 @@ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const
/* forcibly create translation option for a particular source word.
* For text, this function is easy, just call the base class' ProcessOneUnknownWord()
*/
void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos
, FactorCollection &factorCollection)
void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos)
{
const Word &sourceWord = m_source.GetWord(sourcePos);
ProcessOneUnknownWord(sourceWord,sourcePos,factorCollection);
ProcessOneUnknownWord(sourceWord,sourcePos);
}

View File

@ -28,8 +28,7 @@ class LMList;
class TranslationOptionCollectionText : public TranslationOptionCollection {
public:
void ProcessUnknownWord( size_t sourcePos
, FactorCollection &factorCollection);
void ProcessUnknownWord( size_t sourcePos);
TranslationOptionCollectionText(Sentence const& inputSentence, size_t maxNoTransOptPerCoverage);

View File

@ -47,8 +47,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
/** verbose macros
* */
#define VERBOSE(level,str) { if (StaticData::Instance()->GetVerboseLevel() >= level) { TRACE_ERR(str); } }
#define IFVERBOSE(level) if (StaticData::Instance()->GetVerboseLevel() >= level)
#define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } }
#define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)
//! get string representation of any object/variable, as long as it can pipe to a stream
template<typename T>

View File

@ -64,7 +64,7 @@ std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlan
{
stringstream strme;
assert(factorType.size() <= MAX_NUM_FACTORS);
const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
bool firstPass = true;
for (unsigned int i = 0 ; i < factorType.size() ; i++)
{
@ -86,7 +86,7 @@ ostream& operator<<(ostream& out, const Word& word)
{
stringstream strme;
const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
bool firstPass = true;
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
{