mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
variable number of translation component scores
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@7 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
3431b85a37
commit
b9955c5bbd
@ -92,7 +92,7 @@ bool Parameter::Validate()
|
||||
// do files exist?
|
||||
// phrase tables
|
||||
if (ret)
|
||||
ret = FilesExist("ttable-file", 2);
|
||||
ret = FilesExist("ttable-file", 3);
|
||||
// generation tables
|
||||
if (ret)
|
||||
ret = FilesExist("generation-file", 2);
|
||||
|
@ -39,12 +39,9 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
|
||||
, const string &hashFilePath
|
||||
, const vector<float> &weight
|
||||
, size_t maxTargetPhrase
|
||||
, size_t id
|
||||
, bool filter
|
||||
, const list< Phrase > &inputPhraseList)
|
||||
{
|
||||
m_id = id;
|
||||
|
||||
//factors
|
||||
m_factorsUsed[Source] = new FactorTypeSet(input);
|
||||
m_factorsUsed[Target] = new FactorTypeSet(output);
|
||||
@ -91,6 +88,8 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
|
||||
if (addPhrase)
|
||||
{
|
||||
vector<float> scoreVector = Tokenize<float>(token[2]);
|
||||
assert(scoreVector.size() == m_noScoreComponent);
|
||||
|
||||
// source
|
||||
Phrase sourcePhrase(Source);
|
||||
sourcePhrase.CreateFromString( input, phraseVector, factorCollection);
|
||||
|
@ -36,7 +36,7 @@ class PhraseDictionary
|
||||
friend std::ostream& operator<<(std::ostream&, const PhraseDictionary&);
|
||||
|
||||
protected:
|
||||
size_t m_id;
|
||||
const size_t m_id, m_noScoreComponent;
|
||||
std::map<Phrase , TargetPhraseCollection > m_collection;
|
||||
// 1st = source
|
||||
// 2nd = target
|
||||
@ -51,8 +51,10 @@ protected:
|
||||
, const std::list<Phrase> &inputPhraseList
|
||||
, const std::vector<FactorType> &inputFactorType);
|
||||
public:
|
||||
PhraseDictionary()
|
||||
:m_factorsUsed(2)
|
||||
PhraseDictionary(size_t id, size_t noScoreComponent)
|
||||
:m_id(id)
|
||||
,m_factorsUsed(2)
|
||||
,m_noScoreComponent(noScoreComponent)
|
||||
{
|
||||
}
|
||||
~PhraseDictionary();
|
||||
@ -64,7 +66,6 @@ public:
|
||||
, const std::string &hashFilePath
|
||||
, const std::vector<float> &weight
|
||||
, size_t maxTargetPhrase
|
||||
, size_t id
|
||||
, bool filter
|
||||
, const std::list< Phrase > &inputPhraseList);
|
||||
|
||||
@ -76,7 +77,10 @@ public:
|
||||
{
|
||||
return m_collection.size();
|
||||
}
|
||||
|
||||
size_t GetNoScoreComponent() const
|
||||
{
|
||||
return m_noScoreComponent;
|
||||
}
|
||||
const TargetPhraseCollection *FindEquivPhrase(const Phrase &source) const;
|
||||
|
||||
// for mert
|
||||
|
@ -299,19 +299,22 @@ void StaticData::LoadPhraseTables(bool filter
|
||||
|
||||
const vector<string> &translationVector = m_parameter.GetParam("ttable-file");
|
||||
size_t maxTargetPhrase = Scan<size_t>(m_parameter.GetParam("ttable-limit")[0]);
|
||||
|
||||
|
||||
size_t totalPrevNoScoreComponent = 0;
|
||||
for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++)
|
||||
{
|
||||
vector<string> token = Tokenize(translationVector[currDict]);
|
||||
vector<FactorType> input = Tokenize<FactorType>(token[0], ",")
|
||||
,output = Tokenize<FactorType>(token[1], ",");
|
||||
string filePath= token[2];
|
||||
string filePath= token[3];
|
||||
size_t noScoreComponent = Scan<size_t>(token[2]);
|
||||
// weights for this phrase dictionary
|
||||
vector<float> weight(NUM_PHRASE_SCORES);
|
||||
for (size_t currScore = 0 ; currScore < NUM_PHRASE_SCORES ; currScore++)
|
||||
vector<float> weight(noScoreComponent);
|
||||
for (size_t currScore = 0 ; currScore < noScoreComponent ; currScore++)
|
||||
{
|
||||
weight[currScore] = weightAll[currDict * NUM_PHRASE_SCORES + currScore];
|
||||
weight[currScore] = weightAll[totalPrevNoScoreComponent + currScore];
|
||||
}
|
||||
totalPrevNoScoreComponent += noScoreComponent;
|
||||
|
||||
string phraseTableHash = GetMD5Hash(filePath);
|
||||
string hashFilePath = GetCachePath()
|
||||
@ -337,7 +340,7 @@ void StaticData::LoadPhraseTables(bool filter
|
||||
}
|
||||
TRACE_ERR(filePath << endl);
|
||||
|
||||
m_phraseDictionary.push_back(new PhraseDictionary());
|
||||
m_phraseDictionary.push_back(new PhraseDictionary(currDict, noScoreComponent));
|
||||
timer.check("Start loading");
|
||||
m_phraseDictionary[currDict]->Load(input
|
||||
, output
|
||||
@ -346,7 +349,6 @@ void StaticData::LoadPhraseTables(bool filter
|
||||
, hashFilePath
|
||||
, weight
|
||||
, maxTargetPhrase
|
||||
, currDict
|
||||
, filterPhrase
|
||||
, inputPhraseList);
|
||||
timer.check("Finished loading");
|
||||
|
Loading…
Reference in New Issue
Block a user