variable number of translation component scores

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@7 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
hieuhoang1972 2006-07-07 16:25:04 +00:00
parent 3431b85a37
commit b9955c5bbd
4 changed files with 21 additions and 16 deletions

View File

@ -92,7 +92,7 @@ bool Parameter::Validate()
// do files exist?
// phrase tables
if (ret)
ret = FilesExist("ttable-file", 2);
ret = FilesExist("ttable-file", 3);
// generation tables
if (ret)
ret = FilesExist("generation-file", 2);

View File

@ -39,12 +39,9 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
, const string &hashFilePath
, const vector<float> &weight
, size_t maxTargetPhrase
, size_t id
, bool filter
, const list< Phrase > &inputPhraseList)
{
m_id = id;
//factors
m_factorsUsed[Source] = new FactorTypeSet(input);
m_factorsUsed[Target] = new FactorTypeSet(output);
@ -91,6 +88,8 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
if (addPhrase)
{
vector<float> scoreVector = Tokenize<float>(token[2]);
assert(scoreVector.size() == m_noScoreComponent);
// source
Phrase sourcePhrase(Source);
sourcePhrase.CreateFromString( input, phraseVector, factorCollection);

View File

@ -36,7 +36,7 @@ class PhraseDictionary
friend std::ostream& operator<<(std::ostream&, const PhraseDictionary&);
protected:
size_t m_id;
const size_t m_id, m_noScoreComponent;
std::map<Phrase , TargetPhraseCollection > m_collection;
// 1st = source
// 2nd = target
@ -51,8 +51,10 @@ protected:
, const std::list<Phrase> &inputPhraseList
, const std::vector<FactorType> &inputFactorType);
public:
PhraseDictionary()
:m_factorsUsed(2)
PhraseDictionary(size_t id, size_t noScoreComponent)
:m_id(id)
,m_factorsUsed(2)
,m_noScoreComponent(noScoreComponent)
{
}
~PhraseDictionary();
@ -64,7 +66,6 @@ public:
, const std::string &hashFilePath
, const std::vector<float> &weight
, size_t maxTargetPhrase
, size_t id
, bool filter
, const std::list< Phrase > &inputPhraseList);
@ -76,7 +77,10 @@ public:
{
return m_collection.size();
}
size_t GetNoScoreComponent() const
{
return m_noScoreComponent;
}
const TargetPhraseCollection *FindEquivPhrase(const Phrase &source) const;
// for mert

View File

@ -299,19 +299,22 @@ void StaticData::LoadPhraseTables(bool filter
const vector<string> &translationVector = m_parameter.GetParam("ttable-file");
size_t maxTargetPhrase = Scan<size_t>(m_parameter.GetParam("ttable-limit")[0]);
size_t totalPrevNoScoreComponent = 0;
for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++)
{
vector<string> token = Tokenize(translationVector[currDict]);
vector<FactorType> input = Tokenize<FactorType>(token[0], ",")
,output = Tokenize<FactorType>(token[1], ",");
string filePath= token[2];
string filePath= token[3];
size_t noScoreComponent = Scan<size_t>(token[2]);
// weights for this phrase dictionary
vector<float> weight(NUM_PHRASE_SCORES);
for (size_t currScore = 0 ; currScore < NUM_PHRASE_SCORES ; currScore++)
vector<float> weight(noScoreComponent);
for (size_t currScore = 0 ; currScore < noScoreComponent ; currScore++)
{
weight[currScore] = weightAll[currDict * NUM_PHRASE_SCORES + currScore];
weight[currScore] = weightAll[totalPrevNoScoreComponent + currScore];
}
totalPrevNoScoreComponent += noScoreComponent;
string phraseTableHash = GetMD5Hash(filePath);
string hashFilePath = GetCachePath()
@ -337,7 +340,7 @@ void StaticData::LoadPhraseTables(bool filter
}
TRACE_ERR(filePath << endl);
m_phraseDictionary.push_back(new PhraseDictionary());
m_phraseDictionary.push_back(new PhraseDictionary(currDict, noScoreComponent));
timer.check("Start loading");
m_phraseDictionary[currDict]->Load(input
, output
@ -346,7 +349,6 @@ void StaticData::LoadPhraseTables(bool filter
, hashFilePath
, weight
, maxTargetPhrase
, currDict
, filterPhrase
, inputPhraseList);
timer.check("Finished loading");