2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
#include <fstream>
|
|
|
|
#include <string>
|
|
|
|
#include "GenerationDictionary.h"
|
|
|
|
#include "FactorCollection.h"
|
|
|
|
#include "Word.h"
|
|
|
|
#include "Util.h"
|
|
|
|
#include "InputFileStream.h"
|
|
|
|
#include "StaticData.h"
|
2013-06-05 16:42:56 +04:00
|
|
|
#include "util/exception.hh"
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2013-11-15 21:43:41 +04:00
|
|
|
std::vector<GenerationDictionary*> GenerationDictionary::s_staticColl;
|
2013-01-18 21:57:26 +04:00
|
|
|
|
|
|
|
GenerationDictionary::GenerationDictionary(const std::string &line)
|
2015-04-29 22:16:52 +03:00
|
|
|
: DecodeFeature(line, true)
|
2013-01-18 21:57:26 +04:00
|
|
|
{
|
2013-11-15 21:43:41 +04:00
|
|
|
s_staticColl.push_back(this);
|
|
|
|
|
2013-06-20 16:06:03 +04:00
|
|
|
ReadParameters();
|
2013-01-18 21:57:26 +04:00
|
|
|
}
|
|
|
|
|
2013-06-05 16:42:56 +04:00
|
|
|
void GenerationDictionary::Load()
|
2011-02-24 16:14:42 +03:00
|
|
|
{
|
|
|
|
FactorCollection &factorCollection = FactorCollection::Instance();
|
|
|
|
|
|
|
|
const size_t numFeatureValuesInConfig = this->GetNumScoreComponents();
|
|
|
|
|
|
|
|
|
|
|
|
// data from file
|
2013-06-05 16:42:56 +04:00
|
|
|
InputFileStream inFile(m_filePath);
|
2013-11-23 00:27:46 +04:00
|
|
|
UTIL_THROW_IF2(!inFile.good(), "Couldn't read " << m_filePath);
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
string line;
|
|
|
|
size_t lineNum = 0;
|
|
|
|
while(getline(inFile, line)) {
|
|
|
|
++lineNum;
|
|
|
|
vector<string> token = Tokenize( line );
|
|
|
|
|
|
|
|
// add each line in generation file into class
|
|
|
|
Word *inputWord = new Word(); // deleted in destructor
|
|
|
|
Word outputWord;
|
|
|
|
|
|
|
|
// create word with certain factors filled out
|
|
|
|
|
|
|
|
// inputs
|
|
|
|
vector<string> factorString = Tokenize( token[0], "|" );
|
|
|
|
for (size_t i = 0 ; i < GetInput().size() ; i++) {
|
|
|
|
FactorType factorType = GetInput()[i];
|
2013-06-05 16:42:56 +04:00
|
|
|
const Factor *factor = factorCollection.AddFactor( Output, factorType, factorString[i]);
|
2011-02-24 16:14:42 +03:00
|
|
|
inputWord->SetFactor(factorType, factor);
|
|
|
|
}
|
|
|
|
|
|
|
|
factorString = Tokenize( token[1], "|" );
|
|
|
|
for (size_t i = 0 ; i < GetOutput().size() ; i++) {
|
|
|
|
FactorType factorType = GetOutput()[i];
|
|
|
|
|
2013-06-05 16:42:56 +04:00
|
|
|
const Factor *factor = factorCollection.AddFactor( Output, factorType, factorString[i]);
|
2011-02-24 16:14:42 +03:00
|
|
|
outputWord.SetFactor(factorType, factor);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t numFeaturesInFile = token.size() - 2;
|
|
|
|
if (numFeaturesInFile < numFeatureValuesInConfig) {
|
|
|
|
stringstream strme;
|
2013-06-05 16:42:56 +04:00
|
|
|
strme << m_filePath << ":" << lineNum << ": expected " << numFeatureValuesInConfig
|
2011-02-24 16:14:42 +03:00
|
|
|
<< " feature values, but found " << numFeaturesInFile << std::endl;
|
2013-06-05 16:42:56 +04:00
|
|
|
throw strme.str();
|
2011-02-24 16:14:42 +03:00
|
|
|
}
|
|
|
|
std::vector<float> scores(numFeatureValuesInConfig, 0.0f);
|
|
|
|
for (size_t i = 0; i < numFeatureValuesInConfig; i++)
|
|
|
|
scores[i] = FloorScore(TransformScore(Scan<float>(token[2+i])));
|
|
|
|
|
|
|
|
Collection::iterator iterWord = m_collection.find(inputWord);
|
|
|
|
if (iterWord == m_collection.end()) {
|
|
|
|
m_collection[inputWord][outputWord].Assign(this, scores);
|
|
|
|
} else {
|
|
|
|
// source word already in there. delete input word to avoid mem leak
|
|
|
|
(iterWord->second)[outputWord].Assign(this, scores);
|
|
|
|
delete inputWord;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
inFile.Close();
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
GenerationDictionary::~GenerationDictionary()
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
Collection::const_iterator iter;
|
|
|
|
for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter) {
|
|
|
|
delete iter->first;
|
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) const
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
const OutputWordCollection *ret;
|
|
|
|
|
|
|
|
Collection::const_iterator iter = m_collection.find(&word);
|
|
|
|
if (iter == m_collection.end()) {
|
|
|
|
// can't find source phrase
|
|
|
|
ret = NULL;
|
|
|
|
} else {
|
|
|
|
ret = &iter->second;
|
|
|
|
}
|
|
|
|
return ret;
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2013-06-20 16:25:02 +04:00
|
|
|
void GenerationDictionary::SetParameter(const std::string& key, const std::string& value)
|
2013-06-20 15:50:41 +04:00
|
|
|
{
|
|
|
|
if (key == "path") {
|
|
|
|
m_filePath = value;
|
|
|
|
} else {
|
2013-06-20 16:25:02 +04:00
|
|
|
DecodeFeature::SetParameter(key, value);
|
2013-06-20 15:50:41 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
|
|
|
|