Creating branch for multi-factor performance improvements

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/cdyer-multifactor@831 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
redpony 2006-09-25 21:38:04 +00:00
commit a61645851e
35 changed files with 233 additions and 403 deletions

View File

@ -55,7 +55,7 @@ ConfusionNet::ConfusionNet(Sentence const& s)
{
data.resize(s.GetSize());
for(size_t i=0;i<s.GetSize();++i)
data[i].push_back(std::make_pair(Word(s.GetFactorArray(i)),0.0));
data[i].push_back(std::make_pair(s.GetWord(i),0.0));
}
@ -185,7 +185,7 @@ std::string ConfusionNet::GetStringRep(const vector<FactorType> factorsToPrint)
return "";
}
#pragma warning(disable:4716)
const FactorArray& ConfusionNet::GetFactorArray(size_t) const {
const Word& ConfusionNet::GetWord(size_t) const {
std::cerr<<"ERROR: call to ConfusionNet::GetFactorArray\n";
abort();
}

View File

@ -41,7 +41,7 @@ class ConfusionNet : public InputType {
Phrase GetSubString(const WordsRange&) const; //TODO not defined
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
const FactorArray& GetFactorArray(size_t pos) const;
const Word& GetWord(size_t pos) const;
TargetPhraseCollection const* CreateTargetPhraseCollection(PhraseDictionaryBase const& d,const WordsRange& r) const;

View File

@ -107,10 +107,10 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
{
// generatable factors for this word to be put in wordList
WordList &wordList = wordListVector[wordListVectorPos];
const FactorArray &factorArray = targetPhrase.GetFactorArray(currPos);
const Word &word = targetPhrase.GetWord(currPos);
// consult dictionary for possible generations for this word
const OutputWordCollection *wordColl = generationDictionary.FindWord(factorArray);
const OutputWordCollection *wordColl = generationDictionary.FindWord(word);
if (wordColl == NULL)
{ // word not found in generation dictionary

View File

@ -1,62 +0,0 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "FactorArrayWrapper.h"
#include "Util.h"
#include "Word.h"
using namespace std;
FactorArrayWrapper::~FactorArrayWrapper() {}
int FactorArrayWrapper::Compare(const FactorArrayWrapper &compare) const
{
return Compare(GetFactorArray(), compare.GetFactorArray());
}
// static functions
int FactorArrayWrapper::Compare(const FactorArray &targetWord, const FactorArray &sourceWord)
{
for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++)
{
const Factor *targetFactor = targetWord[factorType]
,*sourceFactor = sourceWord[factorType];
if (targetFactor == NULL || sourceFactor == NULL)
{
continue;
}
int result = targetFactor->Compare(*sourceFactor);
if ( result )
return result;
}
return 0;
}
TO_STRING_BODY(FactorArrayWrapper);
// friend
ostream& operator<<(ostream& out, const FactorArrayWrapper& wrapper)
{
out << Word::ToString(*wrapper.m_factorArrayPtr);
return out;
}

View File

@ -1,82 +0,0 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include "TypeDef.h"
#include "Factor.h"
class FactorArrayWrapper
{
friend std::ostream& operator<<(std::ostream&, const FactorArrayWrapper&);
protected:
const FactorArray *m_factorArrayPtr;
public:
FactorArrayWrapper() {}
FactorArrayWrapper(const FactorArray &factorArray)
:m_factorArrayPtr(&factorArray) {}
virtual ~FactorArrayWrapper();
FactorArrayWrapper& operator=(const FactorArrayWrapper &other)
{
if(this != &other)
{
m_factorArrayPtr = other.m_factorArrayPtr;
}
return *this;
}
const Factor *operator[](size_t index) const
{
return (*m_factorArrayPtr)[index];
}
virtual const FactorArray &GetFactorArray() const
{
return *m_factorArrayPtr;
}
inline const Factor *GetFactor(FactorType factorType) const
{
return (*m_factorArrayPtr)[factorType];
}
int Compare(const FactorArrayWrapper &compare) const;
// -1 = less than
// +1 = more than
// 0 = same
inline bool operator< (const FactorArrayWrapper &compare) const
{ // needed to store word in GenerationDictionary map
// uses comparison of FactorKey
// 'proper' comparison, not address/id comparison
return Compare(compare) < 0;
}
TO_STRING;
//statics
static int Compare(const FactorArray &targetWord, const FactorArray &sourceWord);
};

View File

@ -62,7 +62,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
exit(1);
}
m_filename = filePath;
m_filename = filePath;
string line;
size_t lineNum = 0;
while(getline(inFile, line))
@ -71,7 +71,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
vector<string> token = Tokenize( line );
// add each line in generation file into class
Word *inputWord = new Word();
Word *inputWord = new Word(); // deleted in destructor
Word outputWord;
// create word with certain factors filled out
@ -113,7 +113,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
GenerationDictionary::~GenerationDictionary()
{
std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer>::const_iterator iter;
std::map<const Word* , OutputWordCollection, WordComparer>::const_iterator iter;
for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter)
{
delete iter->first;
@ -130,13 +130,12 @@ const std::string GenerationDictionary::GetScoreProducerDescription() const
return "Generation score, file=" + m_filename;
}
const OutputWordCollection *GenerationDictionary::FindWord(const FactorArray &factorArray) const
const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) const
{
const OutputWordCollection *ret;
FactorArrayWrapper wrapper(factorArray);
std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer>::const_iterator
iter = m_collection.find(&wrapper);
std::map<const Word* , OutputWordCollection, WordComparer>::const_iterator
iter = m_collection.find(&word);
if (iter == m_collection.end())
{ // can't find source phrase
ret = NULL;

View File

@ -31,10 +31,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
class FactorCollection;
struct FactorArrayWrapperComparer
struct WordComparer
{
//! returns true if hypoA can be recombined with hypoB
bool operator()(const FactorArrayWrapper *a, const FactorArrayWrapper *b) const
bool operator()(const Word *a, const Word *b) const
{
return *a < *b;
}
@ -47,7 +47,7 @@ typedef std::map < Word , ScoreComponentCollection2 > OutputWordCollection;
class GenerationDictionary : public Dictionary, public ScoreProducer
{
protected:
std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer> m_collection;
std::map<const Word* , OutputWordCollection, WordComparer> m_collection;
// 1st = source
// 2nd = target
std::string m_filename;
@ -75,6 +75,6 @@ public:
{
return m_collection.size();
}
const OutputWordCollection *FindWord(const FactorArray &factorArray) const;
const OutputWordCollection *FindWord(const Word &word) const;
};

View File

@ -214,14 +214,14 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
(*_lmstats)[lmIdx].resize(m_currTargetWordsRange.GetWordsCount(), 0);
// 1st n-gram
vector<FactorArrayWrapper> contextFactor(nGramOrder);
vector<const Word*> contextFactor(nGramOrder);
size_t index = 0;
for (int currPos = (int) startPos - (int) nGramOrder + 1 ; currPos <= (int) startPos ; currPos++)
{
if (currPos >= 0)
contextFactor[index++] = GetFactorArray(currPos);
contextFactor[index++] = &GetWord(currPos);
else
contextFactor[index++] = languageModel.GetSentenceStartArray();
contextFactor[index++] = &languageModel.GetSentenceStartArray();
}
lmScore = languageModel.GetValue(contextFactor);
if (_lmstats) { languageModel.GetState(contextFactor, &(*_lmstats)[lmIdx][nLmCallCount++]); }
@ -237,7 +237,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
contextFactor[i] = contextFactor[i + 1];
// add last factor
contextFactor.back() = GetFactorArray(currPos);
contextFactor.back() = &GetWord(currPos);
lmScore += languageModel.GetValue(contextFactor);
if (_lmstats)
@ -249,15 +249,15 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
if (m_sourceCompleted.IsComplete())
{
const size_t size = GetSize();
contextFactor.back() = languageModel.GetSentenceEndArray();
contextFactor.back() = &languageModel.GetSentenceEndArray();
for (size_t i = 0 ; i < nGramOrder - 1 ; i ++)
{
int currPos = size - nGramOrder + i + 1;
if (currPos < 0)
contextFactor[i] = languageModel.GetSentenceStartArray();
contextFactor[i] = &languageModel.GetSentenceStartArray();
else
contextFactor[i] = GetFactorArray((size_t)currPos);
contextFactor[i] = &GetWord((size_t)currPos);
}
if (_lmstats) {
(*_lmstats)[lmIdx].resize((*_lmstats)[lmIdx].size() + 1); // extra space for the last call
@ -268,7 +268,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
for (size_t currPos = endPos+1; currPos <= currEndPos; currPos++) {
for (size_t i = 0 ; i < nGramOrder - 1 ; i++)
contextFactor[i] = contextFactor[i + 1];
contextFactor.back() = GetFactorArray(currPos);
contextFactor.back() = &GetWord(currPos);
if (_lmstats)
languageModel.GetState(contextFactor, &(*_lmstats)[lmIdx][nLmCallCount++]);
}
@ -437,7 +437,8 @@ std::string Hypothesis::GetTargetPhraseStringRep(const vector<FactorType> factor
std::string Hypothesis::GetSourcePhraseStringRep() const
{
vector<FactorType> allFactors;
for(size_t i=0; i < MAX_NUM_FACTORS; i++)
const size_t maxSourceFactors = StaticData::Instance()->GetMaxNumFactors(Input);
for(size_t i=0; i < maxSourceFactors; i++)
{
allFactors.push_back(i);
}
@ -446,7 +447,8 @@ std::string Hypothesis::GetSourcePhraseStringRep() const
std::string Hypothesis::GetTargetPhraseStringRep() const
{
vector<FactorType> allFactors;
for(size_t i=0; i < MAX_NUM_FACTORS; i++)
const size_t maxTargetFactors = StaticData::Instance()->GetMaxNumFactors(Output);
for(size_t i=0; i < maxTargetFactors; i++)
{
allFactors.push_back(i);
}

View File

@ -1,4 +1,5 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@ -167,19 +168,19 @@ public:
std::string GetSourcePhraseStringRep() const;
std::string GetTargetPhraseStringRep() const;
// curr - pos is relative from CURRENT hypothesis's starting ind ex
// (ie, start of sentence would be some negative number, which is
// not allowed- USE WITH CAUTION)
inline const FactorArray &GetCurrFactorArray(size_t pos) const
/** curr - pos is relative from CURRENT hypothesis's starting index
* (ie, start of sentence would be some negative number, which is
* not allowed- USE WITH CAUTION) */
inline const Word &GetCurrWord(size_t pos) const
{
return m_targetPhrase.GetFactorArray(pos);
return m_targetPhrase.GetWord(pos);
}
inline const Factor *GetCurrFactor(size_t pos, FactorType factorType) const
{
return m_targetPhrase.GetFactor(pos, factorType);
}
// recursive - pos is relative from start of sentence
inline const FactorArray &GetFactorArray(size_t pos) const
/** recursive - pos is relative from start of sentence */
inline const Word &GetWord(size_t pos) const
{
const Hypothesis *hypo = this;
while (pos < hypo->GetCurrTargetWordsRange().GetStartPos())
@ -187,11 +188,11 @@ public:
hypo = hypo->GetPrevHypo();
assert(hypo != NULL);
}
return hypo->GetCurrFactorArray(pos - hypo->GetCurrTargetWordsRange().GetStartPos());
return hypo->GetCurrWord(pos - hypo->GetCurrTargetWordsRange().GetStartPos());
}
inline const Factor* GetFactor(size_t pos, FactorType factorType) const
{
return GetFactorArray(pos)[factorType];
return GetWord(pos)[factorType];
}
/***

View File

@ -42,7 +42,7 @@ protected:
virtual Phrase GetSubString(const WordsRange&) const =0;
// virtual std::string GetStringRep(const WordsRange&) const=0;
virtual const FactorArray& GetFactorArray(size_t pos) const=0;
virtual const Word& GetWord(size_t pos) const=0;
TO_STRING;

View File

@ -39,9 +39,6 @@ LanguageModel::LanguageModel(bool registerScore)
{
if (registerScore)
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
Word::Initialize(m_sentenceStartArray);
Word::Initialize(m_sentenceEndArray);
}
LanguageModel::~LanguageModel() {}
@ -59,19 +56,19 @@ void LanguageModel::CalcScore(const Phrase &phrase
ngramScore = 0;
size_t phraseSize = phrase.GetSize();
vector<FactorArrayWrapper> contextFactor;
vector<const Word*> contextFactor;
contextFactor.reserve(m_nGramOrder);
// start of sentence
for (size_t currPos = 0 ; currPos < m_nGramOrder - 1 && currPos < phraseSize ; currPos++)
{
contextFactor.push_back(phrase.GetFactorArray(currPos));
contextFactor.push_back(&phrase.GetWord(currPos));
fullScore += GetValue(contextFactor);
}
if (phraseSize >= m_nGramOrder)
{
contextFactor.push_back(phrase.GetFactorArray(m_nGramOrder - 1));
contextFactor.push_back(&phrase.GetWord(m_nGramOrder - 1));
ngramScore = GetValue(contextFactor);
}
@ -82,14 +79,14 @@ void LanguageModel::CalcScore(const Phrase &phrase
{
contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1];
}
contextFactor[m_nGramOrder - 1] = phrase.GetFactorArray(currPos);
contextFactor[m_nGramOrder - 1] = &phrase.GetWord(currPos);
float partScore = GetValue(contextFactor);
ngramScore += partScore;
}
fullScore += ngramScore;
}
LanguageModel::State LanguageModel::GetState(const std::vector<FactorArrayWrapper> &contextFactor, unsigned int* len) const
LanguageModel::State LanguageModel::GetState(const std::vector<const Word*> &contextFactor, unsigned int* len) const
{
State state;
unsigned int dummy;

View File

@ -39,7 +39,7 @@ protected:
float m_weight;
std::string m_filename;
size_t m_nGramOrder;
FactorArray m_sentenceStartArray, m_sentenceEndArray;
Word m_sentenceStartArray, m_sentenceEndArray;
LanguageModel(bool registerScore);
@ -59,19 +59,19 @@ public:
void CalcScore(const Phrase &phrase
, float &fullScore
, float &ngramScore) const;
virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = 0, unsigned int* len = 0) const = 0;
virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const = 0;
State GetState(const std::vector<FactorArrayWrapper> &contextFactor, unsigned int* len = 0) const;
State GetState(const std::vector<const Word*> &contextFactor, unsigned int* len = 0) const;
size_t GetNGramOrder() const
{
return m_nGramOrder;
}
const FactorArray &GetSentenceStartArray() const
const Word &GetSentenceStartArray() const
{
return m_sentenceStartArray;
}
const FactorArray &GetSentenceEndArray() const
const Word &GetSentenceEndArray() const
{
return m_sentenceEndArray;
}

View File

@ -64,7 +64,7 @@ public:
m_lmImpl->Load(fileName, factorCollection, m_factorType, weight, nGramOrder);
}
float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
{
if (contextFactor.size() == 0)
{
@ -76,30 +76,30 @@ public:
TRACE_ERR(std::endl);
*/
// only process context where last word is a word we want
const Factor *factor = contextFactor.back()[m_factorType];
const Factor *factor = (*contextFactor.back())[m_factorType];
std::string strWord = factor->GetString();
if (strWord.find("???") == 0)
return 0;
// add last word
std::vector<FactorArrayWrapper> chunkContext;
Word chunkWord;
chunkWord.SetFactor(m_factorType, factor);
std::vector<const Word*> chunkContext;
Word* chunkWord = new Word;
chunkWord->SetFactor(m_factorType, factor);
chunkContext.push_back(chunkWord);
// create context in reverse 'cos we skip words we don't want
for (int currPos = (int)contextFactor.size() - 2 ; currPos >= 0 && chunkContext.size() < m_realNGramOrder ; --currPos )
{
const FactorArrayWrapper &factorArray = contextFactor[currPos];
factor = factorArray[m_factorType];
const Word &word = *contextFactor[currPos];
factor = word[m_factorType];
std::string strWord = factor->GetString();
bool skip = strWord.find("???") == 0;
if (skip)
continue;
// add word to chunked context
Word chunkWord;
chunkWord.SetFactor(m_factorType, factor);
Word* chunkWord = new Word;
chunkWord->SetFactor(m_factorType, factor);
chunkContext.push_back(chunkWord);
}
@ -112,7 +112,9 @@ public:
*/
// calc score on chunked phrase
float ret = m_lmImpl->GetValue(chunkContext, finalState, len);
RemoveAllInColl(chunkContext);
return ret;
}
};

View File

@ -128,7 +128,7 @@ int LanguageModelIRST::GetLmID( const std::string &str ) const
return m_lmtb->dict->encode( str.c_str() );
}
float LanguageModelIRST::GetValue(const vector<FactorArrayWrapper> &contextFactor, State* finalState, unsigned int* len) const
float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const
{
unsigned int dummy;
if (!len) { len = &dummy; }
@ -137,24 +137,24 @@ float LanguageModelIRST::GetValue(const vector<FactorArrayWrapper> &contextFacto
// set up context
size_t count = contextFactor.size();
m_lmtb_ng->size=0;
if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);
m_lmtb_ng->size=0;
if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);
for (size_t i = 0 ; i < count ; i++)
{
int lmId = GetLmID(contextFactor[i][factorType]);
m_lmtb_ng->pushc(lmId);
int lmId = GetLmID((*contextFactor[i])[factorType]);
m_lmtb_ng->pushc(lmId);
}
if (finalState){
*finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng);
*finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng);
// back off stats not currently available
*len = 0;
}
return TransformIRSTScore(m_lmtb->clprob(*m_lmtb_ng));
return TransformIRSTScore(m_lmtb->clprob(*m_lmtb_ng));
}

View File

@ -67,7 +67,7 @@ public:
, float weight
, size_t nGramOrder);
virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
const void CleanUpAfterSentenceProcessing();
const void InitializeBeforeSentenceProcessing();

View File

@ -79,7 +79,7 @@ public:
m_lmImpl->Load(fileName, factorCollection, m_implFactor, weight, nGramOrder);
}
float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
{
if (contextFactor.size() == 0)
{
@ -92,29 +92,29 @@ public:
*/
// joint context for internal LM
std::vector<FactorArrayWrapper> jointContext;
std::vector<const Word*> jointContext;
for (size_t currPos = 0 ; currPos < m_nGramOrder ; ++currPos )
{
const FactorArrayWrapper &factorArray = contextFactor[currPos];
const Word &word = *contextFactor[currPos];
// add word to chunked context
std::stringstream stream("");
const Factor *factor = factorArray[ m_factorTypesOrdered[0] ];
const Factor *factor = word[ m_factorTypesOrdered[0] ];
stream << factor->GetString();
for (size_t index = 1 ; index < m_factorTypesOrdered.size() ; ++index)
{
FactorType factorType = m_factorTypesOrdered[index];
const Factor *factor = factorArray[factorType];
const Factor *factor = word[factorType];
stream << "|" << factor->GetString();
}
factor = m_factorCollection->AddFactor(Output, m_implFactor, stream.str());
Word jointWord;
jointWord.SetFactor(m_implFactor, factor);
Word* jointWord = new Word;
jointWord->SetFactor(m_implFactor, factor);
jointContext.push_back(jointWord);
}
@ -125,6 +125,8 @@ public:
*/
// calc score on chunked phrase
float ret = m_lmImpl->GetValue(jointContext, finalState, len);
RemoveAllInColl(jointContext);
return ret;
}

View File

@ -40,10 +40,10 @@ bool LanguageModelMultiFactor::Useable(const Phrase &phrase) const
return false;
// whether phrase contains all factors in this LM
const FactorArray &factorArray = phrase.GetFactorArray(0);
const Word &word = phrase.GetWord(0);
for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; ++currFactor)
{
if (m_factorTypes[currFactor] && factorArray[currFactor] == NULL)
if (m_factorTypes[currFactor] && word[currFactor] == NULL)
return false;
}
return true;

View File

@ -132,7 +132,7 @@ float LanguageModelSRI::GetValue(VocabIndex wordId, VocabIndex *context) const
return FloorSRIScore(TransformSRIScore(p)); // log10->log
}
float LanguageModelSRI::GetValue(const vector<FactorArrayWrapper> &contextFactor, State* finalState, unsigned int *len) const
float LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int *len) const
{
FactorType factorType = GetFactorType();
size_t count = contextFactor.size();
@ -146,13 +146,13 @@ float LanguageModelSRI::GetValue(const vector<FactorArrayWrapper> &contextFactor
VocabIndex context[MAX_NGRAM_SIZE];
for (size_t i = 0 ; i < count - 1 ; i++)
{
context[i] = GetLmID(contextFactor[count-2-i][factorType]);
context[i] = GetLmID((*contextFactor[count-2-i])[factorType]);
}
context[count-1] = Vocab_None;
assert(contextFactor[count-1][factorType] != NULL);
assert((*contextFactor[count-1])[factorType] != NULL);
// call sri lm fn
VocabIndex lmId= GetLmID(contextFactor[count-1][factorType]);
VocabIndex lmId= GetLmID((*contextFactor[count-1])[factorType]);
float ret = GetValue(lmId, context);
if (finalState) {

View File

@ -56,6 +56,6 @@ public:
, float weight
, size_t nGramOrder);
virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
};

View File

@ -9,7 +9,6 @@ libmoses_a_SOURCES_TMP = \
DistortionOrientation.cpp \
DummyScoreProducers.cpp \
Factor.cpp \
FactorArrayWrapper.cpp \
FactorCollection.cpp \
FactorTypeSet.cpp \
GenerationDictionary.cpp \

View File

@ -96,7 +96,7 @@ public:
}
void Factors2String(FactorArray const& w,std::string& s) const
void Factors2String(Word const& w,std::string& s) const
{
for(size_t j=0;j<m_input.size();++j)
{
@ -155,7 +155,7 @@ public:
std::vector<std::string> srcString(src.GetSize());
// convert source Phrase into vector of strings
for(size_t i=0;i<srcString.size();++i)
Factors2String(src.GetFactorArray(i),srcString[i]);
Factors2String(src.GetWord(i),srcString[i]);
// get target phrases in string representation
std::vector<StringTgtCand> cands;
@ -268,9 +268,9 @@ public:
for(size_t k=0;k<factorStrings.size();++k)
{
std::vector<std::string> factors=Tokenize(*factorStrings[k],"|");
FactorArray& fa=targetPhrase.AddWord();
Word& w=targetPhrase.AddWord();
for(size_t l=0;l<m_output.size();++l)
fa[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]);
w[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]);
}
targetPhrase.SetScore(m_obj, scoreVector, m_weights, m_weightWP, *m_languageModels);
targetPhrase.SetSourcePhrase(srcPtr);
@ -360,7 +360,7 @@ public:
{
const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx
std::string s;
Factors2String(w.GetFactorArray(),s);
Factors2String(w,s);
bool isEpsilon=(s=="" || s==EPSILON);
// do not start with epsilon (except at first position)

View File

@ -1,4 +1,5 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@ -27,20 +28,19 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FactorCollection.h"
#include "Phrase.h"
#include "Util.h" //malloc() replacement
#include "StaticData.h" // GetMaxNumFactors
using namespace std;
std::vector<mempool*> Phrase::s_memPool;
// std::vector<mempool*> Phrase::s_memPool;
Phrase::Phrase(const Phrase &copy)
:m_direction(copy.m_direction)
,m_phraseSize(copy.m_phraseSize)
,m_arraySize(copy.m_arraySize)
,m_memPoolIndex(copy.m_memPoolIndex)
//,m_memPoolIndex(copy.m_memPoolIndex)
,m_words(copy.m_words)
{
assert(m_memPoolIndex<s_memPool.size() && s_memPool[m_memPoolIndex]);
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
memcpy(m_factorArray, copy.m_factorArray, m_phraseSize * sizeof(FactorArray));
}
Phrase& Phrase::operator=(const Phrase& x)
@ -48,19 +48,12 @@ Phrase& Phrase::operator=(const Phrase& x)
if(this!=&x)
{
if(m_factorArray)
{
assert(m_memPoolIndex<s_memPool.size());
s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
}
m_direction=x.m_direction;
m_phraseSize=x.m_phraseSize;
m_arraySize=x.m_arraySize;
m_memPoolIndex=x.m_memPoolIndex;
// m_memPoolIndex=x.m_memPoolIndex;
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
memcpy(m_factorArray, x.m_factorArray, m_phraseSize * sizeof(FactorArray));
m_words = x.m_words;
}
return *this;
}
@ -70,55 +63,34 @@ Phrase::Phrase(FactorDirection direction)
: m_direction(direction)
, m_phraseSize(0)
, m_arraySize(ARRAY_SIZE_INCR)
, m_memPoolIndex(0)
// , m_memPoolIndex(0)
, m_words(ARRAY_SIZE_INCR)
{
assert(m_memPoolIndex<s_memPool.size());
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
}
Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords)
:m_direction(direction)
,m_phraseSize(mergeWords.size())
,m_words(mergeWords.size())
{
m_memPoolIndex = (m_phraseSize + ARRAY_SIZE_INCR - 1) / ARRAY_SIZE_INCR - 1;
m_arraySize = (m_memPoolIndex + 1) * ARRAY_SIZE_INCR;
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
for (size_t currPos = 0 ; currPos < m_phraseSize ; currPos++)
{
FactorArray &thisWord = m_factorArray[currPos];
const Word &mergeWord = *mergeWords[currPos];
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
{
FactorType factorType = static_cast<FactorType>(currFactor);
thisWord[currFactor] = mergeWord.GetFactor(factorType);
}
m_words[currPos] = *mergeWords[currPos];
}
}
Phrase::~Phrase()
{
// RZ:
// will segFault if Phrase was default constructed and AddWord was never called
// TODO not sure if this is really the intended behaviour
// assertion failure is better than segFault, but if(m_factorArray) might be more appropriate
//assert(m_factorArray);
if(m_factorArray)
{
assert(m_memPoolIndex<s_memPool.size());
assert((char*)m_factorArray);
s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
}
}
void Phrase::MergeFactors(const Phrase &copy)
{
assert(GetSize() == copy.GetSize());
size_t size = GetSize();
const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
for (size_t currPos = 0 ; currPos < size ; currPos++)
{
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
{
FactorType factorType = static_cast<FactorType>(currFactor);
const Factor *factor = copy.GetFactor(currPos, factorType);
@ -153,8 +125,8 @@ Phrase Phrase::GetSubString(const WordsRange &wordsRange) const
for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++)
{
FactorArray &newWord = retPhrase.AddWord();
Word::Copy(newWord, GetFactorArray(currPos));
Word &word = retPhrase.AddWord();
word = GetWord(currPos);
}
return retPhrase;
@ -166,31 +138,21 @@ std::string Phrase::GetStringRep(const vector<FactorType> factorsToPrint) const
stringstream strme;
for (size_t pos = 0 ; pos < GetSize() ; pos++)
{
strme << Word::ToString(factorsToPrint, GetFactorArray(pos));
strme << GetWord(pos).ToString(factorsToPrint);
}
return strme.str();
}
FactorArray &Phrase::AddWord()
Word &Phrase::AddWord()
{
if ((m_phraseSize+1) % ARRAY_SIZE_INCR == 0)
{ // need to expand array
FactorArray *newArray = (FactorArray*) s_memPool[m_memPoolIndex+1]->allocate();
memcpy(newArray, m_factorArray, m_phraseSize * sizeof(FactorArray));
s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
m_memPoolIndex++;
m_arraySize += ARRAY_SIZE_INCR;
m_factorArray = newArray;
m_words.resize(m_arraySize);
}
FactorArray &factorArray = m_factorArray[m_phraseSize];
Word::Initialize(factorArray);
m_phraseSize++;
return factorArray;
return m_words[m_phraseSize++];
}
vector< vector<string> > Phrase::Parse(const std::string &phraseString, const std::vector<FactorType> &factorOrder, const std::string& factorDelimiter)
@ -233,13 +195,13 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++)
{
// add word this phrase
FactorArray &factorArray = AddWord();
Word &word = AddWord();
for (size_t currFactorIndex= 0 ; currFactorIndex < factorOrder.size() ; currFactorIndex++)
{
FactorType factorType = factorOrder[currFactorIndex];
const string &factorStr = phraseVector[phrasePos][currFactorIndex];
const Factor *factor = factorCollection.AddFactor(m_direction, factorType, factorStr);
factorArray[factorType] = factor;
word[factorType] = factor;
}
}
}
@ -270,8 +232,9 @@ bool Phrase::operator < (const Phrase &compare) const
{
size_t minSize = std::min( thisSize , compareSize );
const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
// taken from word.Compare()
for (size_t i = 0 ; i < MAX_NUM_FACTORS ; i++)
for (size_t i = 0 ; i < maxNumFactors ; i++)
{
FactorType factorType = static_cast<FactorType>(i);
@ -346,9 +309,10 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase) const
const size_t size = GetSize();
const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
for (size_t currPos = 0 ; currPos < size ; currPos++)
{
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
{
FactorType factorType = static_cast<FactorType>(currFactor);
const Factor *thisFactor = GetFactor(currPos, factorType)
@ -389,6 +353,7 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorTyp
void Phrase::InitializeMemPool()
{
#if 0
s_memPool.push_back( new mempool(1 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 50000 ));
s_memPool.push_back( new mempool(2 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
s_memPool.push_back( new mempool(3 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
@ -399,15 +364,18 @@ void Phrase::InitializeMemPool()
for (size_t i = 8 ; i < 30 ; ++i)
s_memPool.push_back( new mempool(i * ARRAY_SIZE_INCR * sizeof(FactorArray) , 2 ));
#endif
}
void Phrase::FinalizeMemPool()
{
#if 0
std::vector<mempool*>::iterator iter;
for (iter = s_memPool.begin() ; iter != s_memPool.end() ; ++iter)
{
delete *iter;
}
#endif
}
TO_STRING_BODY(Phrase);
@ -418,8 +386,8 @@ ostream& operator<<(ostream& out, const Phrase& phrase)
// out << "(size " << phrase.GetSize() << ") ";
for (size_t pos = 0 ; pos < phrase.GetSize() ; pos++)
{
const FactorArray &factorArray = phrase.GetFactorArray(pos);
out << Word::ToString(factorArray);
const Word &word = phrase.GetWord(pos);
out << word;
}
return out;
}

View File

@ -1,4 +1,5 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@ -35,13 +36,13 @@ class Phrase
{
friend std::ostream& operator<<(std::ostream&, const Phrase&);
private:
static std::vector<mempool*> s_memPool;
// static std::vector<mempool*> s_memPool;
FactorDirection m_direction;
size_t m_phraseSize, //number of words
m_arraySize,
m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated?
FactorArray *m_factorArray;
size_t m_phraseSize; //number of words
size_t m_arraySize;
// size_t m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated?
std::vector<Word> m_words;
public:
static void InitializeMemPool();
@ -84,35 +85,37 @@ public:
{
return m_phraseSize;
}
inline const FactorArray &GetFactorArray(size_t pos) const
inline const Word &GetWord(size_t pos) const
{
return m_factorArray[pos];
return m_words[pos];
}
inline FactorArray &GetFactorArray(size_t pos)
inline Word &GetWord(size_t pos)
{
return m_factorArray[pos];
return m_words[pos];
}
inline const Factor *GetFactor(size_t pos, FactorType factorType) const
{
FactorArray &ptr = m_factorArray[pos];
const Word &ptr = m_words[pos];
return ptr[factorType];
}
inline void SetFactor(size_t pos, FactorType factorType, const Factor *factor)
{
FactorArray &ptr = m_factorArray[pos];
Word &ptr = m_words[pos];
ptr[factorType] = factor;
}
bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
, const std::vector<FactorType> &inputFactor) const;
FactorArray &AddWord();
Word &AddWord();
Phrase GetSubString(const WordsRange &wordsRange) const;
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const;
void push_back(Word const& w) {Word::Copy(AddWord(),w.GetFactorArray());}
void push_back(Word const& w) {
AddWord() = w;
}
TO_STRING;

View File

@ -122,7 +122,7 @@ TargetPhraseCollection *PhraseDictionary::CreateTargetPhraseCollection(const Phr
PhraseDictionaryNode *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos)
{
Word word(source.GetFactorArray(pos));
const Word& word = source.GetWord(pos);
currNode = currNode->GetOrCreateChild(word);
if (currNode == NULL)
return NULL;
@ -144,7 +144,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const
const PhraseDictionaryNode *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos)
{
Word word(source.GetFactorArray(pos));
const Word& word = source.GetWord(pos);
currNode = currNode->GetChild(word);
if (currNode == NULL)
return NULL;

View File

@ -50,9 +50,9 @@ class Sentence : public Phrase, public InputType
{
return Phrase::GetStringRep(factorsToPrint);
}
const FactorArray& GetFactorArray(size_t pos) const
const Word& GetWord(size_t pos) const
{
return Phrase::GetFactorArray(pos);
return Phrase::GetWord(pos);
}
size_t GetSize() const
{

View File

@ -1,4 +1,5 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@ -42,6 +43,22 @@ using namespace std;
extern Timer timer;
static size_t CalcMax(size_t x, const vector<size_t>& y) {
size_t max = x;
for (vector<size_t>::const_iterator i=y.begin(); i != y.end(); ++i)
if (*i > max) max = *i;
return max;
}
static size_t CalcMax(size_t x, const vector<size_t>& y, const vector<size_t>& z) {
size_t max = x;
for (vector<size_t>::const_iterator i=y.begin(); i != y.end(); ++i)
if (*i > max) max = *i;
for (vector<size_t>::const_iterator i=z.begin(); i != z.end(); ++i)
if (*i > max) max = *i;
return max;
}
StaticData* StaticData::s_instance(0);
StaticData::StaticData()
@ -57,6 +74,9 @@ StaticData::StaticData()
,m_computeLMBackoffStats(false)
,m_factorDelimiter("|") // default delimiter between factors
{
m_maxFactorIdx[0] = 0; // source side
m_maxFactorIdx[1] = 0; // target side
s_instance = this;
// memory pools
@ -300,9 +320,6 @@ bool StaticData::LoadParameters(int argc, char* argv[])
// initialize n-gram order for each factor. populated only by factored lm
for(size_t i=0; i < MAX_NUM_FACTORS ; i++)
m_maxNgramOrderForFactor[i] = 0;
const vector<string> &lmVector = m_parameter.GetParam("lmodel-file");
for(size_t i=0; i<lmVector.size(); i++)
@ -360,6 +377,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
bool oldFormat = (token.size() == 3);
vector<FactorType> input = Tokenize<FactorType>(token[0], ",")
,output = Tokenize<FactorType>(token[1], ",");
m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], input, output);
string filePath;
size_t numFeatures = 1;
if (oldFormat)
@ -536,6 +554,9 @@ void StaticData::LoadPhraseTables(bool filter
//characteristics of the phrase table
vector<FactorType> input = Tokenize<FactorType>(token[0], ",")
,output = Tokenize<FactorType>(token[1], ",");
m_maxFactorIdx[0] = CalcMax(m_maxFactorIdx[0], input);
m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], output);
m_maxNumFactors = std::max(m_maxFactorIdx[0], m_maxFactorIdx[1]) + 1;
string filePath= token[3];
size_t noScoreComponent = Scan<size_t>(token[2]);
// weights for this phrase dictionary

View File

@ -80,7 +80,6 @@ protected:
std::vector<std::string> m_mySQLParam;
InputOutput *m_inputOutput;
bool m_fLMsLoaded, m_labeledNBestList;
size_t m_maxNgramOrderForFactor[MAX_NUM_FACTORS];
/***
* false = treat unknown words as unknowns, and translate them as themselves;
* true = drop (ignore) them
@ -88,6 +87,7 @@ protected:
bool m_dropUnknown;
bool m_wordDeletionEnabled;
int m_inputType;
unsigned m_numInputScores;
@ -102,7 +102,9 @@ protected:
bool m_computeLMBackoffStats;
mutable std::auto_ptr<SentenceStats> m_sentenceStats;
std::string m_factorDelimiter;
std::string m_factorDelimiter; //! by default, |, but it can be changed
size_t m_maxFactorIdx[2]; //! number of factors on source and target side
size_t m_maxNumFactors; //! max number of factors on both source and target sides
public:
StaticData();
@ -299,4 +301,6 @@ public:
bool UseDistortionFutureCosts() const {return m_useDistortionFutureCosts;}
bool OnlyDistinctNBest() const {return m_onlyDistinctNBest;}
const std::string& GetFactorDelimiter() const {return m_factorDelimiter;}
size_t GetMaxNumFactors(FactorDirection direction) const { return m_maxFactorIdx[(size_t)direction]+1; }
size_t GetMaxNumFactors() const { return m_maxNumFactors; }
};

View File

@ -114,9 +114,9 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
const size_t len = GetSize();
for (size_t currPos = 0 ; currPos < len ; currPos++)
{
const FactorArray &inputWord = inputPhrase.GetFactorArray(currPos);
FactorArray &cloneWord = clone->GetFactorArray(currPos);
Word::Merge(cloneWord, inputWord);
const Word &inputWord = inputPhrase.GetWord(currPos);
Word &cloneWord = clone->GetWord(currPos);
cloneWord.Merge(inputWord);
currWord++;
}

View File

@ -332,7 +332,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
/** special handling of unknown words: add special translation (or drop) */
void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourceWord,
void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,
size_t sourcePos
, FactorCollection &factorCollection)
{
@ -356,7 +356,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourc
{
// add to dictionary
TargetPhrase targetPhrase(Output);
FactorArray &targetWord = targetPhrase.AddWord();
Word &targetWord = targetPhrase.AddWord();
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
{

View File

@ -36,6 +36,7 @@ class GenerationDictionary;
class InputType;
class LMList;
class FactorMask;
class Word;
typedef std::vector<const TranslationOption*> TranslationOptionList;
@ -70,7 +71,7 @@ protected:
, size_t startPos, size_t endPos, bool observeTableLimit );
void ProcessUnknownWord(const std::list < DecodeStep* > &decodeStepList, FactorCollection &factorCollection);
virtual void ProcessOneUnknownWord(const FactorArray &sourceWord
virtual void ProcessOneUnknownWord(const Word &sourceWord
, size_t sourcePos
, FactorCollection &factorCollection);

View File

@ -19,7 +19,7 @@ ProcessUnknownWord( size_t sourcePos
ConfusionNet::Column const& coll=source.GetColumn(sourcePos);
for(ConfusionNet::Column::const_iterator i=coll.begin();i!=coll.end();++i)
ProcessOneUnknownWord(i->first.GetFactorArray(),sourcePos,factorCollection);
ProcessOneUnknownWord(i->first,sourcePos,factorCollection);
}

View File

@ -36,6 +36,6 @@ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const
void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos
, FactorCollection &factorCollection)
{
const FactorArray &sourceWord = m_source.GetFactorArray(sourcePos);
const Word &sourceWord = m_source.GetWord(sourcePos);
ProcessOneUnknownWord(sourceWord,sourcePos,factorCollection);
}

View File

@ -81,8 +81,8 @@ const size_t MAX_NUM_FACTORS = 4;
enum FactorDirection
{
Input,
Output
Input, //! Source factors
Output //! Target factors
};
enum DecodeType
@ -144,5 +144,3 @@ enum DictionaryFind
// typedef
typedef size_t FactorType;
class Factor;
typedef const Factor * FactorArray[MAX_NUM_FACTORS];

View File

@ -1,4 +1,5 @@
// $Id$
// vim::tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@ -28,25 +29,23 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
Word::Word(const Word &copy)
:FactorArrayWrapper()
#ifdef DYNAMIC_FACTOR_ARRAY
: m_factorArray(copy.m_factorArray)
#endif
{ // deep copy
m_factorArrayPtr = &m_factorArray;
Word::Copy(m_factorArray, copy.m_factorArray);
#ifndef DYNAMIC_FACTOR_ARRAY
memcpy(m_factorArray, copy.m_factorArray, sizeof(FactorArray));
#endif
}
Word::Word()
#ifdef DYNAMIC_FACTOR_ARRAY
: m_factorArray(MAX_NUM_FACTORS, 0)
#endif
{
m_factorArrayPtr = &m_factorArray;
Word::Initialize(m_factorArray);
}
Word::Word(const FactorArray &factorArray)
{
m_factorArrayPtr = &m_factorArray;
for (size_t factor = 0 ; factor < MAX_NUM_FACTORS ; factor++)
{
m_factorArray[factor] = factorArray[factor];
}
#ifndef DYNAMIC_FACTOR_ARRAY
memset(m_factorArray, 0, sizeof(FactorArray));
#endif
}
Word::~Word()
@ -54,7 +53,7 @@ Word::~Word()
}
// static
int Word::Compare(const FactorArray &targetWord, const FactorArray &sourceWord)
int Word::Compare(const Word &targetWord, const Word &sourceWord)
{
for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++)
{
@ -73,51 +72,20 @@ int Word::Compare(const FactorArray &targetWord, const FactorArray &sourceWord)
}
void Word::Copy(FactorArray &target, const FactorArray &source)
{
memcpy(target, source, sizeof(FactorArray));
}
void Word::Initialize(FactorArray &factorArray)
{
memset(factorArray, 0, sizeof(FactorArray));
}
void Word::Merge(FactorArray &targetWord, const FactorArray &sourceWord)
void Word::Merge(const Word &sourceWord)
{
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
{
const Factor *sourcefactor = sourceWord[currFactor]
,*targetFactor = targetWord[currFactor];
const Factor *sourcefactor = sourceWord.m_factorArray[currFactor]
,*targetFactor = this ->m_factorArray[currFactor];
if (targetFactor == NULL && sourcefactor != NULL)
{
targetWord[currFactor] = sourcefactor;
m_factorArray[currFactor] = sourcefactor;
}
}
}
std::string Word::ToString(const FactorArray &factorArray)
{
stringstream strme;
const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
bool firstPass = true;
// TODO- don't loop over MAX_NUM_FACTORS here, just use the ones that
// actually participate in the xltn process.
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
{
const Factor *factor = factorArray[currFactor];
if (factor != NULL)
{
if (firstPass) { firstPass = false; } else { strme << factorDelimiter; }
strme << *factor;
}
}
strme << " ";
return strme.str();
}
std::string Word::ToString(const vector<FactorType> factorType, const FactorArray &factorArray)
std::string Word::ToString(const vector<FactorType> factorType) const
{
stringstream strme;
assert(factorType.size() <= MAX_NUM_FACTORS);
@ -125,7 +93,7 @@ std::string Word::ToString(const vector<FactorType> factorType, const FactorArra
bool firstPass = true;
for (unsigned int i = 0 ; i < factorType.size() ; i++)
{
const Factor *factor = factorArray[factorType[i]];
const Factor *factor = m_factorArray[factorType[i]];
if (factor != NULL)
{
if (firstPass) { firstPass = false; } else { strme << factorDelimiter; }

View File

@ -27,59 +27,68 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TypeDef.h"
#include "Factor.h"
#include "Util.h"
#include "FactorArrayWrapper.h"
#undef DYNAMIC_FACTOR_ARRAY
class Phrase;
/***
* hold a set of factors for a single word
*
* TODO either replace all uses of FactorArray with Word or vice versa; don't only use the wrapper in half of cases!
*/
class Word : public FactorArrayWrapper
class Word
{
friend std::ostream& operator<<(std::ostream&, const Word&);
protected:
FactorArray m_factorArray;
#ifndef DYNAMIC_FACTOR_ARRAY
typedef const Factor * FactorArray[MAX_NUM_FACTORS];
#else
typedef std::vector<const Factor*> FactorArray;
#endif
FactorArray m_factorArray;
public:
/**
* deep copy
*/
Word(const Word &copy);
Word(const FactorArray &factorArray);
Word();
~Word();
// why is this needed ? it should be inherited
const FactorArray &GetFactorArray() const
{
return m_factorArray;
const Factor*& operator[](FactorType index) {
return m_factorArray[index];
}
inline FactorArray &GetFactorArray()
{
return m_factorArray;
const Factor * const & operator[](FactorType index) const {
return m_factorArray[index];
}
inline const Factor* GetFactor(FactorType factorType) const {
return m_factorArray[factorType];
}
inline void SetFactor(FactorType factorType, const Factor *factor)
{
m_factorArray[factorType] = factor;
}
void Merge(const Word &sourceWord);
std::string ToString(const std::vector<FactorType> factorType) const;
TO_STRING;
/* static functions */
// FactorArray
static void Copy(FactorArray &target, const FactorArray &source);
static void Initialize(FactorArray &factorArray);
/***
* wherever the source word has a given factor that the target word is missing, add it to the target word
*/
static void Merge(FactorArray &targetWord, const FactorArray &sourceWord);
static std::string ToString(const FactorArray &factorArray);
static std::string ToString(const std::vector<FactorType> factorType, const FactorArray &factorArray);
static int Compare(const FactorArray &targetWord, const FactorArray &sourceWord);
static int Compare(const Word &targetWord, const Word &sourceWord);
inline bool operator< (const Word &compare) const
{ // needed to store word in GenerationDictionary map
// uses comparison of FactorKey
// 'proper' comparison, not address/id comparison
return Compare(*this, compare) < 0;
}
};