variable number of translation component scores

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@7 1f5c12ca-751b-0410-a591-d2e778427230
2024-12-26 05:14:36 +03:00 · 2006-07-07 16:25:04 +00:00 · 2006-07-07 16:25:04 +00:00 · b9955c5bbd
commit b9955c5bbd
parent 3431b85a37
4 changed files with 21 additions and 16 deletions
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@ -92,7 +92,7 @@ bool Parameter::Validate()
  // do files exist?
 	// phrase tables
 	if (ret)
-		ret = FilesExist("ttable-file", 2);
+		ret = FilesExist("ttable-file", 3);
 	// generation tables
 	if (ret)
 		ret = FilesExist("generation-file", 2);
--- a/moses/src/PhraseDictionary.cpp
+++ b/moses/src/PhraseDictionary.cpp
@ -39,12 +39,9 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
 																			, const string &hashFilePath
 																			, const vector<float> &weight
 																			, size_t maxTargetPhrase
-																			, size_t id
 																			, bool filter
 																			, const list< Phrase > &inputPhraseList)
 {
-	m_id = id;
-
 	//factors	
 	m_factorsUsed[Source]				= new FactorTypeSet(input);
 	m_factorsUsed[Target]	= new FactorTypeSet(output);
@ -91,6 +88,8 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
 		if (addPhrase)
 		{
 			vector<float> scoreVector = Tokenize<float>(token[2]);
+			assert(scoreVector.size() == m_noScoreComponent);
+			
 			// source
 			Phrase sourcePhrase(Source);
 			sourcePhrase.CreateFromString( input, phraseVector, factorCollection);
--- a/moses/src/PhraseDictionary.h
+++ b/moses/src/PhraseDictionary.h
@ -36,7 +36,7 @@ class PhraseDictionary
 	friend std::ostream& operator<<(std::ostream&, const PhraseDictionary&);

 protected:
-	size_t m_id;
+	const size_t m_id, m_noScoreComponent;
 	std::map<Phrase , TargetPhraseCollection > m_collection;
 	// 1st = source
 	// 2nd = target
@ -51,8 +51,10 @@ protected:
 							, const std::list<Phrase>					&inputPhraseList
 							, const std::vector<FactorType>		&inputFactorType);
 public:
-	PhraseDictionary()
-		:m_factorsUsed(2)
+	PhraseDictionary(size_t id, size_t noScoreComponent)
+		:m_id(id)
+		,m_factorsUsed(2)
+		,m_noScoreComponent(noScoreComponent)
 	{
 	}
 	~PhraseDictionary();
@ -64,7 +66,6 @@ public:
 								, const std::string &hashFilePath
 								, const std::vector<float> &weight
 								, size_t maxTargetPhrase
-								, size_t id
 								, bool filter
 								, const std::list< Phrase > &inputPhraseList);
 	
@ -76,7 +77,10 @@ public:
 	{
 		return m_collection.size();
 	}
-
+	size_t GetNoScoreComponent() const
+	{
+		return m_noScoreComponent;
+	}
 	const TargetPhraseCollection *FindEquivPhrase(const Phrase &source) const;

 	// for mert
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@ -299,19 +299,22 @@ void StaticData::LoadPhraseTables(bool filter
 		
 		const vector<string> &translationVector = m_parameter.GetParam("ttable-file");
 		size_t	maxTargetPhrase										= Scan<size_t>(m_parameter.GetParam("ttable-limit")[0]);
-		
+
+		size_t totalPrevNoScoreComponent = 0;		
 		for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) 
 		{
 			vector<string>			token		= Tokenize(translationVector[currDict]);
 			vector<FactorType> 	input		= Tokenize<FactorType>(token[0], ",")
 													,output	= Tokenize<FactorType>(token[1], ",");
-			string							filePath= token[2];
+			string							filePath= token[3];
+			size_t							noScoreComponent	= Scan<size_t>(token[2]);
 			// weights for this phrase dictionary
-			vector<float> weight(NUM_PHRASE_SCORES);
-			for (size_t currScore = 0 ; currScore < NUM_PHRASE_SCORES ; currScore++)
+			vector<float> weight(noScoreComponent);
+			for (size_t currScore = 0 ; currScore < noScoreComponent ; currScore++)
 			{
-				weight[currScore] = weightAll[currDict * NUM_PHRASE_SCORES + currScore];
+				weight[currScore] = weightAll[totalPrevNoScoreComponent + currScore]; 
 			}
+			totalPrevNoScoreComponent += noScoreComponent;

 			string phraseTableHash	= GetMD5Hash(filePath);
 			string hashFilePath			= GetCachePath() 
@ -337,7 +340,7 @@ void StaticData::LoadPhraseTables(bool filter
 			}
 			TRACE_ERR(filePath << endl);

-			m_phraseDictionary.push_back(new PhraseDictionary());
+			m_phraseDictionary.push_back(new PhraseDictionary(currDict, noScoreComponent));
 			timer.check("Start loading");
 			m_phraseDictionary[currDict]->Load(input
 																				, output
@ -346,7 +349,6 @@ void StaticData::LoadPhraseTables(bool filter
 																				, hashFilePath
 																				, weight
 																				, maxTargetPhrase
-																				, currDict
 																				, filterPhrase
 																				, inputPhraseList);
 			timer.check("Finished loading");