This commit is contained in:
Nicola Bertoldi 2014-05-19 15:34:27 +02:00
parent d85bd05e18
commit 2f3cd5e2fe
54 changed files with 2874 additions and 2851 deletions

View File

@ -45,9 +45,9 @@ public:
typedef MapType::iterator iterator;
ChartCellLabelSet(const WordsRange &coverage)
: m_coverage(coverage)
, m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL)
, m_size(0) { }
: m_coverage(coverage)
, m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL)
, m_size(0) { }
~ChartCellLabelSet() {
RemoveAllInColl(m_map);
@ -82,8 +82,7 @@ public:
if (ChartCellExists(idx)) {
ChartCellLabel::Stack & s = m_map[idx]->MutableStack();
s.cube = stack;
}
else {
} else {
ChartCellLabel::Stack s;
s.cube = stack;
m_size++;
@ -97,8 +96,7 @@ public:
if (m_map.at(idx) != NULL) {
return true;
}
}
catch (const std::out_of_range& oor) {
} catch (const std::out_of_range& oor) {
m_map.resize(FactorCollection::Instance().GetNumNonTerminals(), NULL);
}
return false;
@ -116,8 +114,7 @@ public:
size_t idx = w[0]->GetId();
try {
return m_map.at(idx);
}
catch (const std::out_of_range& oor) {
} catch (const std::out_of_range& oor) {
return NULL;
}
}

View File

@ -32,8 +32,8 @@ namespace Moses
// Extract the k-best list from the search graph.
void ChartKBestExtractor::Extract(
const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
KBestVec &kBestList)
const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
KBestVec &kBestList)
{
kBestList.clear();
if (topLevelHypos.empty()) {
@ -45,7 +45,7 @@ void ChartKBestExtractor::Extract(
std::vector<const ChartHypothesis*>::const_iterator p = topLevelHypos.begin();
const ChartHypothesis &bestTopLevelHypo = **p;
boost::scoped_ptr<ChartHypothesis> supremeHypo(
new ChartHypothesis(bestTopLevelHypo, *this));
new ChartHypothesis(bestTopLevelHypo, *this));
// Do the same for each alternative top-level hypothesis, but add the new
// ChartHypothesis objects as arcs from supremeHypo, as if they had been
@ -68,8 +68,8 @@ void ChartKBestExtractor::Extract(
// each derivation.
kBestList.reserve(targetVertex->kBestList.size());
for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
q = targetVertex->kBestList.begin();
q != targetVertex->kBestList.end(); ++q) {
q = targetVertex->kBestList.begin();
q != targetVertex->kBestList.end(); ++q) {
const boost::shared_ptr<Derivation> d(*q);
assert(d);
assert(d->subderivations.size() == 1);
@ -124,7 +124,7 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
// Create an unweighted hyperarc corresponding to the given ChartHypothesis.
ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
const ChartHypothesis &h)
const ChartHypothesis &h)
{
UnweightedHyperarc edge;
edge.head = FindOrCreateVertex(h);

View File

@ -70,8 +70,8 @@ public:
struct Vertex {
typedef std::priority_queue<boost::weak_ptr<Derivation>,
std::vector<boost::weak_ptr<Derivation> >,
DerivationOrderer> DerivationQueue;
std::vector<boost::weak_ptr<Derivation> >,
DerivationOrderer> DerivationQueue;
Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
@ -92,7 +92,7 @@ public:
private:
typedef boost::unordered_map<const ChartHypothesis *,
boost::shared_ptr<Vertex> > VertexMap;
boost::shared_ptr<Vertex> > VertexMap;
struct DerivationHasher {
std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
@ -114,7 +114,7 @@ private:
};
typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
DerivationEqualityPred> DerivationSet;
DerivationEqualityPred> DerivationSet;
UnweightedHyperarc CreateEdge(const ChartHypothesis &);
boost::shared_ptr<Vertex> FindOrCreateVertex(const ChartHypothesis &);

View File

@ -269,9 +269,9 @@ void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDi
* \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths)
*/
void ChartManager::CalcNBest(
std::size_t n,
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
bool onlyDistinct) const
std::size_t n,
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
bool onlyDistinct) const
{
nBestList.clear();
if (n == 0 || m_source.GetSize() == 0) {
@ -282,7 +282,7 @@ void ChartManager::CalcNBest(
WordsRange range(0, m_source.GetSize()-1);
const ChartCell &lastCell = m_hypoStackColl.Get(range);
boost::scoped_ptr<const std::vector<const ChartHypothesis*> > topLevelHypos(
lastCell.GetAllSortedHypotheses());
lastCell.GetAllSortedHypotheses());
if (!topLevelHypos) {
return;
}

View File

@ -108,7 +108,9 @@ public:
return m_hypothesisId++;
}
const ChartParser &GetParser() const { return m_parser; }
const ChartParser &GetParser() const {
return m_parser;
}
};
}

View File

@ -183,7 +183,7 @@ void ChartParser::Create(const WordsRange &wordsRange, ChartParserCallback &to)
size_t maxSpan = decodeGraph.GetMaxChartSpan();
size_t last = m_source.GetSize()-1;
if (maxSpan != 0) {
last = min(last, wordsRange.GetStartPos()+maxSpan);
last = min(last, wordsRange.GetStartPos()+maxSpan);
}
if (maxSpan == 0 || wordsRange.GetNumWordsCovered() <= maxSpan) {
ruleLookupManager.GetChartRuleCollection(wordsRange, last, to);

View File

@ -48,7 +48,9 @@ public:
void Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to);
const std::vector<Phrase*> &GetUnknownSources() const { return m_unksrcs; }
const std::vector<Phrase*> &GetUnknownSources() const {
return m_unksrcs;
}
private:
std::vector<Phrase*> m_unksrcs;
@ -69,7 +71,9 @@ public:
size_t GetSize() const;
const InputPath &GetInputPath(size_t startPos, size_t endPos) const;
const InputPath &GetInputPath(WordsRange &range) const;
const std::vector<Phrase*> &GetUnknownSources() const { return m_unknown.GetUnknownSources(); }
const std::vector<Phrase*> &GetUnknownSources() const {
return m_unknown.GetUnknownSources();
}
private:
ChartParserUnknown m_unknown;

View File

@ -161,11 +161,11 @@ void ChartTranslationOptionList::ApplyThreshold()
float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell) const
{
const HypoList *stack = chartCell->GetStack().cube;
assert(stack);
assert(!stack->empty());
const ChartHypothesis &bestHypo = **(stack->begin());
return bestHypo.GetTotalScore();
const HypoList *stack = chartCell->GetStack().cube;
assert(stack);
assert(!stack->empty());
const ChartHypothesis &bestHypo = **(stack->begin());
return bestHypo.GetTotalScore();
}
void ChartTranslationOptionList::Evaluate(const InputType &input, const InputPath &inputPath)

View File

@ -14,299 +14,299 @@
namespace Moses
{
struct CNStats {
size_t created,destr,read,colls,words;
CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
~CNStats() {
print(std::cerr);
}
struct CNStats {
size_t created,destr,read,colls,words;
void createOne() {
++created;
}
void destroyOne() {
++destr;
}
void collect(const ConfusionNet& cn) {
++read;
colls+=cn.GetSize();
for(size_t i=0; i<cn.GetSize(); ++i)
words+=cn[i].size();
}
void print(std::ostream& out) const {
if(created>0) {
out<<"confusion net statistics:\n"
" created:\t"<<created<<"\n"
" destroyed:\t"<<destr<<"\n"
" succ. read:\t"<<read<<"\n"
" columns:\t"<<colls<<"\n"
" words:\t"<<words<<"\n"
" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
"\n\n";
}
}
};
CNStats stats;
size_t
ConfusionNet::
GetColumnIncrement(size_t i, size_t j) const
{
(void) i;
(void) j;
return 1;
CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
~CNStats() {
print(std::cerr);
}
ConfusionNet::
ConfusionNet()
: InputType()
{
stats.createOne();
const StaticData& staticData = StaticData::Instance();
if (staticData.IsChart()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
void createOne() {
++created;
}
void destroyOne() {
++destr;
}
ConfusionNet::
~ConfusionNet()
{
stats.destroyOne();
void collect(const ConfusionNet& cn) {
++read;
colls+=cn.GetSize();
for(size_t i=0; i<cn.GetSize(); ++i)
words+=cn[i].size();
}
ConfusionNet::
ConfusionNet(Sentence const& s)
{
data.resize(s.GetSize());
for(size_t i=0; i<s.GetSize(); ++i) {
ScorePair scorePair;
std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
data[i].push_back(temp);
void print(std::ostream& out) const {
if(created>0) {
out<<"confusion net statistics:\n"
" created:\t"<<created<<"\n"
" destroyed:\t"<<destr<<"\n"
" succ. read:\t"<<read<<"\n"
" columns:\t"<<colls<<"\n"
" words:\t"<<words<<"\n"
" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
"\n\n";
}
}
};
bool
ConfusionNet::
ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
{
VERBOSE(2, "read confusion net with format "<<format<<"\n");
switch(format) {
case 0:
return ReadFormat0(in,factorOrder);
case 1:
return ReadFormat1(in,factorOrder);
default:
std::stringstream strme;
strme << "ERROR: unknown format '"<<format
<<"' in ConfusionNet::Read";
UserMessage::Add(strme.str());
}
return false;
}
CNStats stats;
int
ConfusionNet::
Read(std::istream& in,
const std::vector<FactorType>& factorOrder)
{
int rv=ReadF(in,factorOrder,0);
if(rv) stats.collect(*this);
return rv;
size_t
ConfusionNet::
GetColumnIncrement(size_t i, size_t j) const
{
(void) i;
(void) j;
return 1;
}
ConfusionNet::
ConfusionNet()
: InputType()
{
stats.createOne();
const StaticData& staticData = StaticData::Instance();
if (staticData.IsChart()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
}
ConfusionNet::
~ConfusionNet()
{
stats.destroyOne();
}
ConfusionNet::
ConfusionNet(Sentence const& s)
{
data.resize(s.GetSize());
for(size_t i=0; i<s.GetSize(); ++i) {
ScorePair scorePair;
std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
data[i].push_back(temp);
}
}
bool
ConfusionNet::
ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
{
VERBOSE(2, "read confusion net with format "<<format<<"\n");
switch(format) {
case 0:
return ReadFormat0(in,factorOrder);
case 1:
return ReadFormat1(in,factorOrder);
default:
std::stringstream strme;
strme << "ERROR: unknown format '"<<format
<<"' in ConfusionNet::Read";
UserMessage::Add(strme.str());
}
return false;
}
int
ConfusionNet::
Read(std::istream& in,
const std::vector<FactorType>& factorOrder)
{
int rv=ReadF(in,factorOrder,0);
if(rv) stats.collect(*this);
return rv;
}
#if 0
// Deprecated due to code duplication;
// use Word::CreateFromString() instead
void
ConfusionNet::
String2Word(const std::string& s,Word& w,
const std::vector<FactorType>& factorOrder)
{
std::vector<std::string> factorStrVector = Tokenize(s, "|");
for(size_t i=0; i<factorOrder.size(); ++i)
w.SetFactor(factorOrder[i],
FactorCollection::Instance().AddFactor
(Input,factorOrder[i], factorStrVector[i]));
}
// Deprecated due to code duplication;
// use Word::CreateFromString() instead
void
ConfusionNet::
String2Word(const std::string& s,Word& w,
const std::vector<FactorType>& factorOrder)
{
std::vector<std::string> factorStrVector = Tokenize(s, "|");
for(size_t i=0; i<factorOrder.size(); ++i)
w.SetFactor(factorOrder[i],
FactorCollection::Instance().AddFactor
(Input,factorOrder[i], factorStrVector[i]));
}
#endif
bool
ConfusionNet::
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
{
Clear();
bool
ConfusionNet::
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
{
Clear();
const StaticData &staticData = StaticData::Instance();
const InputFeature &inputFeature = InputFeature::Instance();
size_t numInputScores = inputFeature.GetNumInputScores();
size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
const StaticData &staticData = StaticData::Instance();
const InputFeature &inputFeature = InputFeature::Instance();
size_t numInputScores = inputFeature.GetNumInputScores();
size_t numRealWordCount = inputFeature.GetNumRealWordsInInput();
size_t totalCount = numInputScores + numRealWordCount;
bool addRealWordCount = (numRealWordCount > 0);
size_t totalCount = numInputScores + numRealWordCount;
bool addRealWordCount = (numRealWordCount > 0);
std::string line;
while(getline(in,line)) {
std::istringstream is(line);
std::string word;
std::string line;
while(getline(in,line)) {
std::istringstream is(line);
std::string word;
Column col;
while(is>>word) {
Word w;
// String2Word(word,w,factorOrder);
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
std::vector<float> probs(totalCount, 0.0);
for(size_t i=0; i < numInputScores; i++) {
double prob;
if (!(is>>prob)) {
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
return false;
}
if(prob<0.0) {
VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
prob=0.0;
} else if (prob>1.0) {
VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
prob=1.0;
}
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
Column col;
while(is>>word) {
Word w;
// String2Word(word,w,factorOrder);
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
std::vector<float> probs(totalCount, 0.0);
for(size_t i=0; i < numInputScores; i++) {
double prob;
if (!(is>>prob)) {
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
return false;
}
if(prob<0.0) {
VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
prob=0.0;
} else if (prob>1.0) {
VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
prob=1.0;
}
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
}
//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
if (addRealWordCount && word!=EPSILON && word!="")
probs.back() = -1.0;
ScorePair scorePair(probs);
col.push_back(std::make_pair(w,scorePair));
}
if(col.size()) {
data.push_back(col);
ShrinkToFit(data.back());
} else break;
}
return !data.empty();
}
//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
if (addRealWordCount && word!=EPSILON && word!="")
probs.back() = -1.0;
bool
ConfusionNet::
ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
{
Clear();
std::string line;
ScorePair scorePair(probs);
col.push_back(std::make_pair(w,scorePair));
}
if(col.size()) {
data.push_back(col);
ShrinkToFit(data.back());
} else break;
}
return !data.empty();
}
bool
ConfusionNet::
ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
{
Clear();
std::string line;
if(!getline(in,line)) return 0;
size_t s;
if(getline(in,line)) s=atoi(line.c_str());
else return 0;
data.resize(s);
for(size_t i=0; i<data.size(); ++i) {
if(!getline(in,line)) return 0;
size_t s;
if(getline(in,line)) s=atoi(line.c_str());
else return 0;
data.resize(s);
for(size_t i=0; i<data.size(); ++i) {
if(!getline(in,line)) return 0;
std::istringstream is(line);
if(!(is>>s)) return 0;
std::string word;
double prob;
data[i].resize(s);
for(size_t j=0; j<s; ++j)
if(is>>word>>prob) {
//TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
data[i][j].second.denseScores = std::vector<float> (1);
data[i][j].second.denseScores.push_back((float) log(prob));
if(data[i][j].second.denseScores[0]<0) {
VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
data[i][j].second.denseScores[0]=0.0;
}
// String2Word(word,data[i][j].first,factorOrder);
Word& w = data[i][j].first;
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
} else return 0;
}
return !data.empty();
std::istringstream is(line);
if(!(is>>s)) return 0;
std::string word;
double prob;
data[i].resize(s);
for(size_t j=0; j<s; ++j)
if(is>>word>>prob) {
//TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
data[i][j].second.denseScores = std::vector<float> (1);
data[i][j].second.denseScores.push_back((float) log(prob));
if(data[i][j].second.denseScores[0]<0) {
VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
data[i][j].second.denseScores[0]=0.0;
}
// String2Word(word,data[i][j].first,factorOrder);
Word& w = data[i][j].first;
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
} else return 0;
}
return !data.empty();
}
void ConfusionNet::Print(std::ostream& out) const
{
out<<"conf net: "<<data.size()<<"\n";
for(size_t i=0; i<data.size(); ++i) {
out<<i<<" -- ";
for(size_t j=0; j<data[i].size(); ++j) {
out<<"("<<data[i][j].first.ToString()<<", ";
void ConfusionNet::Print(std::ostream& out) const
{
out<<"conf net: "<<data.size()<<"\n";
for(size_t i=0; i<data.size(); ++i) {
out<<i<<" -- ";
for(size_t j=0; j<data[i].size(); ++j) {
out<<"("<<data[i][j].first.ToString()<<", ";
// dense
std::vector<float>::const_iterator iterDense;
for(iterDense = data[i][j].second.denseScores.begin();
iterDense < data[i][j].second.denseScores.end();
++iterDense) {
out<<", "<<*iterDense;
}
// sparse
std::map<StringPiece, float>::const_iterator iterSparse;
for(iterSparse = data[i][j].second.sparseScores.begin();
iterSparse != data[i][j].second.sparseScores.end();
++iterSparse) {
out << ", " << iterSparse->first << "=" << iterSparse->second;
}
out<<") ";
// dense
std::vector<float>::const_iterator iterDense;
for(iterDense = data[i][j].second.denseScores.begin();
iterDense < data[i][j].second.denseScores.end();
++iterDense) {
out<<", "<<*iterDense;
}
out<<"\n";
// sparse
std::map<StringPiece, float>::const_iterator iterSparse;
for(iterSparse = data[i][j].second.sparseScores.begin();
iterSparse != data[i][j].second.sparseScores.end();
++iterSparse) {
out << ", " << iterSparse->first << "=" << iterSparse->second;
}
out<<") ";
}
out<<"\n\n";
out<<"\n";
}
out<<"\n\n";
}
#ifdef _WIN32
#pragma warning(disable:4716)
#endif
Phrase
ConfusionNet::
GetSubString(const WordsRange&) const
{
UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
//return Phrase(Input);
}
Phrase
ConfusionNet::
GetSubString(const WordsRange&) const
{
UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
//return Phrase(Input);
}
std::string
ConfusionNet::
GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
{
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
return "";
}
std::string
ConfusionNet::
GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
{
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
return "";
}
#ifdef _WIN32
#pragma warning(disable:4716)
#endif
const Word& ConfusionNet::GetWord(size_t) const
{
UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
}
const Word& ConfusionNet::GetWord(size_t) const
{
UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
}
#ifdef _WIN32
#pragma warning(default:4716)
#endif
std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
{
cn.Print(out);
return out;
}
std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
{
cn.Print(out);
return out;
}
TranslationOptionCollection*
ConfusionNet::
CreateTranslationOptionCollection() const
{
size_t maxNoTransOptPerCoverage
= StaticData::Instance().GetMaxNoTransOptPerCoverage();
float translationOptionThreshold
= StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv
= new TranslationOptionCollectionConfusionNet
(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
assert(rv);
return rv;
}
TranslationOptionCollection*
ConfusionNet::
CreateTranslationOptionCollection() const
{
size_t maxNoTransOptPerCoverage
= StaticData::Instance().GetMaxNoTransOptPerCoverage();
float translationOptionThreshold
= StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv
= new TranslationOptionCollectionConfusionNet
(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
assert(rv);
return rv;
}
}

View File

@ -8,18 +8,18 @@ using namespace std;
namespace Moses
{
CountNonTerms::CountNonTerms(const std::string &line)
:StatelessFeatureFunction(line)
,m_all(true)
,m_sourceSyntax(false)
,m_targetSyntax(false)
:StatelessFeatureFunction(line)
,m_all(true)
,m_sourceSyntax(false)
,m_targetSyntax(false)
{
ReadParameters();
}
void CountNonTerms::Evaluate(const Phrase &sourcePhrase
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
const StaticData &staticData = StaticData::Instance();
@ -27,33 +27,33 @@ void CountNonTerms::Evaluate(const Phrase &sourcePhrase
size_t indScore = 0;
if (m_all) {
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal()) {
++scores[indScore];
}
}
++indScore;
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal()) {
++scores[indScore];
}
}
++indScore;
}
if (m_targetSyntax) {
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
++scores[indScore];
}
}
++indScore;
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
++scores[indScore];
}
}
++indScore;
}
if (m_sourceSyntax) {
for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
const Word &word = sourcePhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
++scores[indScore];
}
}
++indScore;
for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
const Word &word = sourcePhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
++scores[indScore];
}
}
++indScore;
}
scoreBreakdown.PlusEquals(this, scores);
@ -64,9 +64,9 @@ void CountNonTerms::SetParameter(const std::string& key, const std::string& valu
if (key == "all") {
m_all = Scan<bool>(value);
} else if (key == "source-syntax") {
m_sourceSyntax = Scan<bool>(value);
m_sourceSyntax = Scan<bool>(value);
} else if (key == "target-syntax") {
m_targetSyntax = Scan<bool>(value);
m_targetSyntax = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}

View File

@ -9,8 +9,9 @@ class CountNonTerms : public StatelessFeatureFunction
{
public:
CountNonTerms(const std::string &line);
bool IsUseable(const FactorMask &mask) const
{ return true; }
bool IsUseable(const FactorMask &mask) const {
return true;
}
void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase

View File

@ -5,439 +5,440 @@
namespace Moses
{
std::map< const std::string, DynamicCacheBasedLanguageModel * > DynamicCacheBasedLanguageModel::s_instance_map;
DynamicCacheBasedLanguageModel *DynamicCacheBasedLanguageModel::s_instance = NULL;
DynamicCacheBasedLanguageModel::DynamicCacheBasedLanguageModel(const std::string &line)
std::map< const std::string, DynamicCacheBasedLanguageModel * > DynamicCacheBasedLanguageModel::s_instance_map;
DynamicCacheBasedLanguageModel *DynamicCacheBasedLanguageModel::s_instance = NULL;
DynamicCacheBasedLanguageModel::DynamicCacheBasedLanguageModel(const std::string &line)
: StatelessFeatureFunction(1, line)
{
VERBOSE(2,"Initializing DynamicCacheBasedLanguageModel feature..." << std::endl);
m_query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS;
m_score_type = CBLM_SCORE_TYPE_HYPERBOLA;
m_maxAge = 1000;
m_name = "default";
ReadParameters();
UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 DynamicCacheBasedLanguageModel feature named " + m_name + " is allowed");
s_instance_map[m_name] = this;
s_instance = this; //for back compatibility
}
DynamicCacheBasedLanguageModel::~DynamicCacheBasedLanguageModel() {};
void DynamicCacheBasedLanguageModel::SetPreComputedScores()
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
precomputedScores.clear();
for (unsigned int i=0; i<m_maxAge; i++) {
precomputedScores.push_back(decaying_score(i));
}
if ( m_score_type == CBLM_SCORE_TYPE_HYPERBOLA
|| m_score_type == CBLM_SCORE_TYPE_POWER
|| m_score_type == CBLM_SCORE_TYPE_EXPONENTIAL
|| m_score_type == CBLM_SCORE_TYPE_COSINE ) {
precomputedScores.push_back(decaying_score(m_maxAge));
} else { // m_score_type = CBLM_SCORE_TYPE_XXXXXXXXX_REWARD
precomputedScores.push_back(0.0);
}
m_lower_score = precomputedScores[m_maxAge];
std::cerr << "SetPreComputedScores(): lower_score:" << m_lower_score << std::endl;
}
float DynamicCacheBasedLanguageModel::GetPreComputedScores(const unsigned int age)
{
if (age < precomputedScores.size()) {
return precomputedScores.at(age);
} else {
return precomputedScores.at(m_maxAge);
}
}
void DynamicCacheBasedLanguageModel::SetParameter(const std::string& key, const std::string& value)
{
VERBOSE(2, "DynamicCacheBasedLanguageModel::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl);
if (key == "cblm-query-type") {
SetQueryType(Scan<size_t>(value));
} else if (key == "cblm-score-type") {
SetScoreType(Scan<size_t>(value));
} else if (key == "cblm-max-age") {
SetMaxAge(Scan<unsigned int>(value));
} else if (key == "cblm-file") {
m_initfiles = Scan<std::string>(value);
} else if (key == "cblm-name") {
m_name = Scan<std::string>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
void DynamicCacheBasedLanguageModel::Evaluate(const Phrase &sp
, const TargetPhrase &tp
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
float score = m_lower_score;
switch(m_query_type) {
case CBLM_QUERY_TYPE_WHOLESTRING:
score = Evaluate_Whole_String(tp);
break;
case CBLM_QUERY_TYPE_ALLSUBSTRINGS:
score = Evaluate_All_Substrings(tp);
break;
default:
UTIL_THROW_IF2(false, "This score type (" << m_query_type << ") is unknown.");
}
scoreBreakdown.Assign(this, score);
}
float DynamicCacheBasedLanguageModel::Evaluate_Whole_String(const TargetPhrase& tp) const
{
//consider all words in the TargetPhrase as one n-gram
// and compute the decaying_score for the whole n-gram
// and return this value
decaying_cache_t::const_iterator it;
float score = m_lower_score;
std::string w = "";
size_t endpos = tp.GetSize();
for (size_t pos = 0 ; pos < endpos ; ++pos) {
w += tp.GetWord(pos).GetFactor(0)->GetString().as_string();
if ((pos == 0) && (endpos > 1)) {
w += " ";
}
}
it = m_cache.find(w);
VERBOSE(4,"cblm::Evaluate_Whole_String: searching w:|" << w << "|" << std::endl);
if (it != m_cache.end()) { //found!
score = ((*it).second).second;
VERBOSE(4,"cblm::Evaluate_Whole_String: found w:|" << w << "|" << std::endl);
}
VERBOSE(4,"cblm::Evaluate_Whole_String: returning score:|" << score << "|" << std::endl);
return score;
}
float DynamicCacheBasedLanguageModel::Evaluate_All_Substrings(const TargetPhrase& tp) const
{
//loop over all n-grams in the TargetPhrase (no matter of n)
//and compute the decaying_score for all words
//and return their sum
decaying_cache_t::const_iterator it;
float score = 0.0;
for (size_t startpos = 0 ; startpos < tp.GetSize() ; ++startpos) {
std::string w = "";
for (size_t endpos = startpos; endpos < tp.GetSize() ; ++endpos) {
w += tp.GetWord(endpos).GetFactor(0)->GetString().as_string();
it = m_cache.find(w);
if (it != m_cache.end()) { //found!
score += ((*it).second).second;
VERBOSE(3,"cblm::Evaluate_All_Substrings: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << score << "|" << std::endl);
} else {
score += m_lower_score;
}
if (endpos == startpos) {
w += " ";
}
}
}
VERBOSE(3,"cblm::Evaluate_All_Substrings: returning score:|" << score << "|" << std::endl);
return score;
}
void DynamicCacheBasedLanguageModel::Print() const
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
decaying_cache_t::const_iterator it;
std::cout << "Content of the cache of Cache-Based Language Model" << std::endl;
std::cout << "Size of the cache of Cache-Based Language Model:|" << m_cache.size() << "|" << std::endl;
for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) {
std::cout << "word:|" << (*it).first << "| age:|" << ((*it).second).first << "| score:|" << ((*it).second).second << "|" << std::endl;
}
}
void DynamicCacheBasedLanguageModel::Decay()
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
decaying_cache_t::iterator it;
unsigned int age;
float score;
for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) {
age=((*it).second).first + 1;
if (age > m_maxAge) {
m_cache.erase(it);
it--;
} else {
score = decaying_score(age);
decaying_cache_value_t p (age, score);
(*it).second = p;
}
}
}
void DynamicCacheBasedLanguageModel::Update(std::vector<std::string> words, int age)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl);
for (size_t j=0; j<words.size(); j++) {
words[j] = Trim(words[j]);
VERBOSE(3,"CacheBasedLanguageModel::Update word[" << j << "]:"<< words[j] << " age:" << age << " decaying_score(age):" << decaying_score(age) << std::endl);
decaying_cache_value_t p (age,decaying_score(age));
std::pair<std::string, decaying_cache_value_t> e (words[j],p);
m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist)
m_cache.insert(e); //insert the entry
}
}
void DynamicCacheBasedLanguageModel::ClearEntries(std::string &entries)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
ClearEntries(elements);
}
}
void DynamicCacheBasedLanguageModel::ClearEntries(std::vector<std::string> words)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl);
for (size_t j=0; j<words.size(); j++) { words[j] = Trim(words[j]);
VERBOSE(3,"CacheBasedLanguageModel::ClearEntries word[" << j << "]:"<< words[j] << std::endl);
m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist)
}
}
void DynamicCacheBasedLanguageModel::Insert(std::string &entries)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
Insert(elements);
}
}
void DynamicCacheBasedLanguageModel::Insert(std::vector<std::string> ngrams)
{
VERBOSE(3,"DynamicCacheBasedLanguageModel Insert ngrams.size():|" << ngrams.size() << "|" << std::endl);
Decay();
Update(ngrams,1);
// Print();
IFVERBOSE(2) Print();
}
void DynamicCacheBasedLanguageModel::ExecuteDlt(std::map<std::string, std::string> dlt_meta)
{
if (dlt_meta.find("cblm") != dlt_meta.end()) {
Insert(dlt_meta["cblm"]);
}
if (dlt_meta.find("cblm-command") != dlt_meta.end()) {
Execute(dlt_meta["cblm-command"]);
}
if (dlt_meta.find("cblm-file") != dlt_meta.end()) {
Load(dlt_meta["cblm-file"]);
}
if (dlt_meta.find("cblm-clear-entries") != dlt_meta.end()) {
ClearEntries(dlt_meta["cblm-clear-entries"]);
}
if (dlt_meta.find("cblm-clear-all") != dlt_meta.end()) {
Clear();
}
}
void DynamicCacheBasedLanguageModel::Execute(std::string command)
{
VERBOSE(2,"DynamicCacheBasedLanguageModel::Execute(std::string command:|" << command << "|" << std::endl);
std::vector<std::string> commands = Tokenize(command, "||");
Execute(commands);
}
void DynamicCacheBasedLanguageModel::Execute(std::vector<std::string> commands)
{
for (size_t j=0; j<commands.size(); j++) {
Execute_Single_Command(commands[j]);
}
IFVERBOSE(2) Print();
}
void DynamicCacheBasedLanguageModel::Execute_Single_Command(std::string command)
{
VERBOSE(2,"CacheBasedLanguageModel::Execute_Single_Command(std::string command:|" << command << "|" << std::endl);
if (command == "clear") {
VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "|. Cache cleared." << std::endl);
Clear();
} else if (command == "settype_wholestring") {
VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "|. Query type set to " << CBLM_QUERY_TYPE_WHOLESTRING << " (CBLM_QUERY_TYPE_WHOLESTRING)." << std::endl);
SetQueryType(CBLM_QUERY_TYPE_WHOLESTRING);
} else if (command == "settype_allsubstrings") {
VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "|. Query type set to " << CBLM_QUERY_TYPE_ALLSUBSTRINGS << " (CBLM_QUERY_TYPE_ALLSUBSTRINGS)." << std::endl);
SetQueryType(CBLM_QUERY_TYPE_ALLSUBSTRINGS);
} else {
VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "| is unknown. Skipped." << std::endl);
}
}
void DynamicCacheBasedLanguageModel::Clear()
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
m_cache.clear();
}
void DynamicCacheBasedLanguageModel::Load()
{
SetPreComputedScores();
VERBOSE(2,"DynamicCacheBasedLanguageModel::Load()" << std::endl);
Load(m_initfiles);
}
void DynamicCacheBasedLanguageModel::Load(const std::string file)
{
VERBOSE(2,"DynamicCacheBasedLanguageModel::Load(const std::string file)" << std::endl);
std::vector<std::string> files = Tokenize(m_initfiles, "||");
Load_Multiple_Files(files);
}
void DynamicCacheBasedLanguageModel::Load_Multiple_Files(std::vector<std::string> files)
{
VERBOSE(2,"DynamicCacheBasedLanguageModel::Load_Multiple_Files(std::vector<std::string> files)" << std::endl);
for(size_t j = 0; j < files.size(); ++j) {
Load_Single_File(files[j]);
}
}
void DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file)
{
VERBOSE(2,"DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file)" << std::endl);
//file format
//age || n-gram
//age || n-gram || n-gram || n-gram || ...
//....
//each n-gram is a sequence of n words (no matter of n)
//
//there is no limit on the size of n
//
//entries can be repeated, but the last entry overwrites the previous
VERBOSE(2,"Loading data from the cache file " << file << std::endl);
InputFileStream cacheFile(file);
std::string line;
int age;
std::vector<std::string> words;
while (getline(cacheFile, line)) {
std::vector<std::string> vecStr = TokenizeMultiCharSeparator( line , "||" );
if (vecStr.size() >= 2) {
age = Scan<int>(vecStr[0]);
vecStr.erase(vecStr.begin());
Update(vecStr,age);
} else {
UTIL_THROW_IF2(false, "The format of the loaded file is wrong: " << line);
}
}
IFVERBOSE(2) Print();
}
void DynamicCacheBasedLanguageModel::SetQueryType(size_t type)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
m_query_type = type;
if ( m_query_type != CBLM_QUERY_TYPE_WHOLESTRING
&& m_query_type != CBLM_QUERY_TYPE_ALLSUBSTRINGS ) {
VERBOSE(2, "This query type " << m_query_type << " is unknown. Instead used " << CBLM_QUERY_TYPE_ALLSUBSTRINGS << "." << std::endl);
m_query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS;
}
VERBOSE(2, "CacheBasedLanguageModel QueryType: " << m_query_type << std::endl);
};
void DynamicCacheBasedLanguageModel::SetScoreType(size_t type)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
m_score_type = type;
if ( m_score_type != CBLM_SCORE_TYPE_HYPERBOLA
&& m_score_type != CBLM_SCORE_TYPE_POWER
&& m_score_type != CBLM_SCORE_TYPE_EXPONENTIAL
&& m_score_type != CBLM_SCORE_TYPE_COSINE
&& m_score_type != CBLM_SCORE_TYPE_HYPERBOLA_REWARD
&& m_score_type != CBLM_SCORE_TYPE_POWER_REWARD
&& m_score_type != CBLM_SCORE_TYPE_EXPONENTIAL_REWARD ) {
VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBLM_SCORE_TYPE_HYPERBOLA << "." << std::endl);
m_score_type = CBLM_SCORE_TYPE_HYPERBOLA;
}
VERBOSE(2, "CacheBasedLanguageModel ScoreType: " << m_score_type << std::endl);
};
void DynamicCacheBasedLanguageModel::SetMaxAge(unsigned int age)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
m_maxAge = age;
VERBOSE(2, "CacheBasedLanguageModel MaxAge: " << m_maxAge << std::endl);
};
float DynamicCacheBasedLanguageModel::decaying_score(const unsigned int age)
{
float sc;
switch(m_score_type) {
case CBLM_SCORE_TYPE_HYPERBOLA:
sc = (float) 1.0/age - 1.0;
break;
case CBLM_SCORE_TYPE_POWER:
sc = (float) pow(age, -0.25) - 1.0;
break;
case CBLM_SCORE_TYPE_EXPONENTIAL:
sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0;
break;
case CBLM_SCORE_TYPE_COSINE:
sc = (float) cos( (age-1) * (PI/2) / m_maxAge ) - 1.0;
break;
case CBLM_SCORE_TYPE_HYPERBOLA_REWARD:
sc = (float) 1.0/age;
break;
case CBLM_SCORE_TYPE_POWER_REWARD:
sc = (float) pow(age, -0.25);
break;
case CBLM_SCORE_TYPE_EXPONENTIAL_REWARD:
sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0);
break;
default:
sc = -1.0;
}
return sc;
}
{
VERBOSE(2,"Initializing DynamicCacheBasedLanguageModel feature..." << std::endl);
m_query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS;
m_score_type = CBLM_SCORE_TYPE_HYPERBOLA;
m_maxAge = 1000;
m_name = "default";
ReadParameters();
UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 DynamicCacheBasedLanguageModel feature named " + m_name + " is allowed");
s_instance_map[m_name] = this;
s_instance = this; //for back compatibility
}
DynamicCacheBasedLanguageModel::~DynamicCacheBasedLanguageModel() {};
void DynamicCacheBasedLanguageModel::SetPreComputedScores()
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
precomputedScores.clear();
for (unsigned int i=0; i<m_maxAge; i++) {
precomputedScores.push_back(decaying_score(i));
}
if ( m_score_type == CBLM_SCORE_TYPE_HYPERBOLA
|| m_score_type == CBLM_SCORE_TYPE_POWER
|| m_score_type == CBLM_SCORE_TYPE_EXPONENTIAL
|| m_score_type == CBLM_SCORE_TYPE_COSINE ) {
precomputedScores.push_back(decaying_score(m_maxAge));
} else { // m_score_type = CBLM_SCORE_TYPE_XXXXXXXXX_REWARD
precomputedScores.push_back(0.0);
}
m_lower_score = precomputedScores[m_maxAge];
std::cerr << "SetPreComputedScores(): lower_score:" << m_lower_score << std::endl;
}
float DynamicCacheBasedLanguageModel::GetPreComputedScores(const unsigned int age)
{
if (age < precomputedScores.size()) {
return precomputedScores.at(age);
} else {
return precomputedScores.at(m_maxAge);
}
}
void DynamicCacheBasedLanguageModel::SetParameter(const std::string& key, const std::string& value)
{
VERBOSE(2, "DynamicCacheBasedLanguageModel::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl);
if (key == "cblm-query-type") {
SetQueryType(Scan<size_t>(value));
} else if (key == "cblm-score-type") {
SetScoreType(Scan<size_t>(value));
} else if (key == "cblm-max-age") {
SetMaxAge(Scan<unsigned int>(value));
} else if (key == "cblm-file") {
m_initfiles = Scan<std::string>(value);
} else if (key == "cblm-name") {
m_name = Scan<std::string>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
void DynamicCacheBasedLanguageModel::Evaluate(const Phrase &sp
, const TargetPhrase &tp
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
float score = m_lower_score;
switch(m_query_type) {
case CBLM_QUERY_TYPE_WHOLESTRING:
score = Evaluate_Whole_String(tp);
break;
case CBLM_QUERY_TYPE_ALLSUBSTRINGS:
score = Evaluate_All_Substrings(tp);
break;
default:
UTIL_THROW_IF2(false, "This score type (" << m_query_type << ") is unknown.");
}
scoreBreakdown.Assign(this, score);
}
float DynamicCacheBasedLanguageModel::Evaluate_Whole_String(const TargetPhrase& tp) const
{
//consider all words in the TargetPhrase as one n-gram
// and compute the decaying_score for the whole n-gram
// and return this value
decaying_cache_t::const_iterator it;
float score = m_lower_score;
std::string w = "";
size_t endpos = tp.GetSize();
for (size_t pos = 0 ; pos < endpos ; ++pos) {
w += tp.GetWord(pos).GetFactor(0)->GetString().as_string();
if ((pos == 0) && (endpos > 1)) {
w += " ";
}
}
it = m_cache.find(w);
VERBOSE(4,"cblm::Evaluate_Whole_String: searching w:|" << w << "|" << std::endl);
if (it != m_cache.end()) { //found!
score = ((*it).second).second;
VERBOSE(4,"cblm::Evaluate_Whole_String: found w:|" << w << "|" << std::endl);
}
VERBOSE(4,"cblm::Evaluate_Whole_String: returning score:|" << score << "|" << std::endl);
return score;
}
float DynamicCacheBasedLanguageModel::Evaluate_All_Substrings(const TargetPhrase& tp) const
{
//loop over all n-grams in the TargetPhrase (no matter of n)
//and compute the decaying_score for all words
//and return their sum
decaying_cache_t::const_iterator it;
float score = 0.0;
for (size_t startpos = 0 ; startpos < tp.GetSize() ; ++startpos) {
std::string w = "";
for (size_t endpos = startpos; endpos < tp.GetSize() ; ++endpos) {
w += tp.GetWord(endpos).GetFactor(0)->GetString().as_string();
it = m_cache.find(w);
if (it != m_cache.end()) { //found!
score += ((*it).second).second;
VERBOSE(3,"cblm::Evaluate_All_Substrings: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << score << "|" << std::endl);
} else {
score += m_lower_score;
}
if (endpos == startpos) {
w += " ";
}
}
}
VERBOSE(3,"cblm::Evaluate_All_Substrings: returning score:|" << score << "|" << std::endl);
return score;
}
void DynamicCacheBasedLanguageModel::Print() const
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
decaying_cache_t::const_iterator it;
std::cout << "Content of the cache of Cache-Based Language Model" << std::endl;
std::cout << "Size of the cache of Cache-Based Language Model:|" << m_cache.size() << "|" << std::endl;
for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) {
std::cout << "word:|" << (*it).first << "| age:|" << ((*it).second).first << "| score:|" << ((*it).second).second << "|" << std::endl;
}
}
void DynamicCacheBasedLanguageModel::Decay()
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
decaying_cache_t::iterator it;
unsigned int age;
float score;
for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) {
age=((*it).second).first + 1;
if (age > m_maxAge) {
m_cache.erase(it);
it--;
} else {
score = decaying_score(age);
decaying_cache_value_t p (age, score);
(*it).second = p;
}
}
}
void DynamicCacheBasedLanguageModel::Update(std::vector<std::string> words, int age)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl);
for (size_t j=0; j<words.size(); j++) {
words[j] = Trim(words[j]);
VERBOSE(3,"CacheBasedLanguageModel::Update word[" << j << "]:"<< words[j] << " age:" << age << " decaying_score(age):" << decaying_score(age) << std::endl);
decaying_cache_value_t p (age,decaying_score(age));
std::pair<std::string, decaying_cache_value_t> e (words[j],p);
m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist)
m_cache.insert(e); //insert the entry
}
}
void DynamicCacheBasedLanguageModel::ClearEntries(std::string &entries)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
ClearEntries(elements);
}
}
void DynamicCacheBasedLanguageModel::ClearEntries(std::vector<std::string> words)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl);
for (size_t j=0; j<words.size(); j++) {
words[j] = Trim(words[j]);
VERBOSE(3,"CacheBasedLanguageModel::ClearEntries word[" << j << "]:"<< words[j] << std::endl);
m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist)
}
}
void DynamicCacheBasedLanguageModel::Insert(std::string &entries)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
Insert(elements);
}
}
void DynamicCacheBasedLanguageModel::Insert(std::vector<std::string> ngrams)
{
VERBOSE(3,"DynamicCacheBasedLanguageModel Insert ngrams.size():|" << ngrams.size() << "|" << std::endl);
Decay();
Update(ngrams,1);
// Print();
IFVERBOSE(2) Print();
}
void DynamicCacheBasedLanguageModel::ExecuteDlt(std::map<std::string, std::string> dlt_meta)
{
if (dlt_meta.find("cblm") != dlt_meta.end()) {
Insert(dlt_meta["cblm"]);
}
if (dlt_meta.find("cblm-command") != dlt_meta.end()) {
Execute(dlt_meta["cblm-command"]);
}
if (dlt_meta.find("cblm-file") != dlt_meta.end()) {
Load(dlt_meta["cblm-file"]);
}
if (dlt_meta.find("cblm-clear-entries") != dlt_meta.end()) {
ClearEntries(dlt_meta["cblm-clear-entries"]);
}
if (dlt_meta.find("cblm-clear-all") != dlt_meta.end()) {
Clear();
}
}
void DynamicCacheBasedLanguageModel::Execute(std::string command)
{
VERBOSE(2,"DynamicCacheBasedLanguageModel::Execute(std::string command:|" << command << "|" << std::endl);
std::vector<std::string> commands = Tokenize(command, "||");
Execute(commands);
}
void DynamicCacheBasedLanguageModel::Execute(std::vector<std::string> commands)
{
for (size_t j=0; j<commands.size(); j++) {
Execute_Single_Command(commands[j]);
}
IFVERBOSE(2) Print();
}
void DynamicCacheBasedLanguageModel::Execute_Single_Command(std::string command)
{
VERBOSE(2,"CacheBasedLanguageModel::Execute_Single_Command(std::string command:|" << command << "|" << std::endl);
if (command == "clear") {
VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "|. Cache cleared." << std::endl);
Clear();
} else if (command == "settype_wholestring") {
VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "|. Query type set to " << CBLM_QUERY_TYPE_WHOLESTRING << " (CBLM_QUERY_TYPE_WHOLESTRING)." << std::endl);
SetQueryType(CBLM_QUERY_TYPE_WHOLESTRING);
} else if (command == "settype_allsubstrings") {
VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "|. Query type set to " << CBLM_QUERY_TYPE_ALLSUBSTRINGS << " (CBLM_QUERY_TYPE_ALLSUBSTRINGS)." << std::endl);
SetQueryType(CBLM_QUERY_TYPE_ALLSUBSTRINGS);
} else {
VERBOSE(2,"CacheBasedLanguageModel Execute command:|"<< command << "| is unknown. Skipped." << std::endl);
}
}
void DynamicCacheBasedLanguageModel::Clear()
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
m_cache.clear();
}
void DynamicCacheBasedLanguageModel::Load()
{
SetPreComputedScores();
VERBOSE(2,"DynamicCacheBasedLanguageModel::Load()" << std::endl);
Load(m_initfiles);
}
void DynamicCacheBasedLanguageModel::Load(const std::string file)
{
VERBOSE(2,"DynamicCacheBasedLanguageModel::Load(const std::string file)" << std::endl);
std::vector<std::string> files = Tokenize(m_initfiles, "||");
Load_Multiple_Files(files);
}
void DynamicCacheBasedLanguageModel::Load_Multiple_Files(std::vector<std::string> files)
{
VERBOSE(2,"DynamicCacheBasedLanguageModel::Load_Multiple_Files(std::vector<std::string> files)" << std::endl);
for(size_t j = 0; j < files.size(); ++j) {
Load_Single_File(files[j]);
}
}
void DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file)
{
VERBOSE(2,"DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file)" << std::endl);
//file format
//age || n-gram
//age || n-gram || n-gram || n-gram || ...
//....
//each n-gram is a sequence of n words (no matter of n)
//
//there is no limit on the size of n
//
//entries can be repeated, but the last entry overwrites the previous
VERBOSE(2,"Loading data from the cache file " << file << std::endl);
InputFileStream cacheFile(file);
std::string line;
int age;
std::vector<std::string> words;
while (getline(cacheFile, line)) {
std::vector<std::string> vecStr = TokenizeMultiCharSeparator( line , "||" );
if (vecStr.size() >= 2) {
age = Scan<int>(vecStr[0]);
vecStr.erase(vecStr.begin());
Update(vecStr,age);
} else {
UTIL_THROW_IF2(false, "The format of the loaded file is wrong: " << line);
}
}
IFVERBOSE(2) Print();
}
void DynamicCacheBasedLanguageModel::SetQueryType(size_t type)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
m_query_type = type;
if ( m_query_type != CBLM_QUERY_TYPE_WHOLESTRING
&& m_query_type != CBLM_QUERY_TYPE_ALLSUBSTRINGS ) {
VERBOSE(2, "This query type " << m_query_type << " is unknown. Instead used " << CBLM_QUERY_TYPE_ALLSUBSTRINGS << "." << std::endl);
m_query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS;
}
VERBOSE(2, "CacheBasedLanguageModel QueryType: " << m_query_type << std::endl);
};
void DynamicCacheBasedLanguageModel::SetScoreType(size_t type)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
m_score_type = type;
if ( m_score_type != CBLM_SCORE_TYPE_HYPERBOLA
&& m_score_type != CBLM_SCORE_TYPE_POWER
&& m_score_type != CBLM_SCORE_TYPE_EXPONENTIAL
&& m_score_type != CBLM_SCORE_TYPE_COSINE
&& m_score_type != CBLM_SCORE_TYPE_HYPERBOLA_REWARD
&& m_score_type != CBLM_SCORE_TYPE_POWER_REWARD
&& m_score_type != CBLM_SCORE_TYPE_EXPONENTIAL_REWARD ) {
VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBLM_SCORE_TYPE_HYPERBOLA << "." << std::endl);
m_score_type = CBLM_SCORE_TYPE_HYPERBOLA;
}
VERBOSE(2, "CacheBasedLanguageModel ScoreType: " << m_score_type << std::endl);
};
void DynamicCacheBasedLanguageModel::SetMaxAge(unsigned int age)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
m_maxAge = age;
VERBOSE(2, "CacheBasedLanguageModel MaxAge: " << m_maxAge << std::endl);
};
float DynamicCacheBasedLanguageModel::decaying_score(const unsigned int age)
{
float sc;
switch(m_score_type) {
case CBLM_SCORE_TYPE_HYPERBOLA:
sc = (float) 1.0/age - 1.0;
break;
case CBLM_SCORE_TYPE_POWER:
sc = (float) pow(age, -0.25) - 1.0;
break;
case CBLM_SCORE_TYPE_EXPONENTIAL:
sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0;
break;
case CBLM_SCORE_TYPE_COSINE:
sc = (float) cos( (age-1) * (PI/2) / m_maxAge ) - 1.0;
break;
case CBLM_SCORE_TYPE_HYPERBOLA_REWARD:
sc = (float) 1.0/age;
break;
case CBLM_SCORE_TYPE_POWER_REWARD:
sc = (float) pow(age, -0.25);
break;
case CBLM_SCORE_TYPE_EXPONENTIAL_REWARD:
sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0);
break;
default:
sc = -1.0;
}
return sc;
}
}

View File

@ -43,7 +43,7 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction
size_t m_query_type; //way of querying the cache
size_t m_score_type; //way of scoring entries of the cache
std::string m_initfiles; // vector of files loaded in the initialization phase
std::string m_name; // internal name to identify this instance of the Cache-based pseudo LM
std::string m_name; // internal name to identify this instance of the Cache-based pseudo LM
float m_lower_score; //lower_bound_score for no match
std::vector<float> precomputedScores;
unsigned int m_maxAge;
@ -64,7 +64,7 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction
void Update(std::vector<std::string> words, int age);
void ClearEntries(std::vector<std::string> entries);
void Execute(std::vector<std::string> commands);
void Execute_Single_Command(std::string command);
@ -80,24 +80,28 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction
protected:
static DynamicCacheBasedLanguageModel *s_instance;
static std::map< const std::string, DynamicCacheBasedLanguageModel * > s_instance_map;
static std::map< const std::string, DynamicCacheBasedLanguageModel * > s_instance_map;
public:
DynamicCacheBasedLanguageModel(const std::string &line);
~DynamicCacheBasedLanguageModel();
inline const std::string GetName() { return m_name; };
inline void SetName(const std::string name){ m_name = name; }
static const DynamicCacheBasedLanguageModel& Instance(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The DynamicCacheBasedLanguageModel feature named " + name + " does not exist!");
return *(s_instance_map[name]);
}
static DynamicCacheBasedLanguageModel& InstanceNonConst(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The DynamicCacheBasedLanguageModel feature named " + name + " does not exist!");
return *(s_instance_map[name]);
}
inline const std::string GetName() {
return m_name;
};
inline void SetName(const std::string name) {
m_name = name;
}
static const DynamicCacheBasedLanguageModel& Instance(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The DynamicCacheBasedLanguageModel feature named " + name + " does not exist!");
return *(s_instance_map[name]);
}
static DynamicCacheBasedLanguageModel& InstanceNonConst(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The DynamicCacheBasedLanguageModel feature named " + name + " does not exist!");
return *(s_instance_map[name]);
}
static const DynamicCacheBasedLanguageModel& Instance() {
return *s_instance;
}
@ -113,7 +117,7 @@ public:
void Load(const std::string file);
void Execute(std::string command);
void SetParameter(const std::string& key, const std::string& value);
void ExecuteDlt(std::map<std::string, std::string> dlt_meta);
void ExecuteDlt(std::map<std::string, std::string> dlt_meta);
void ClearEntries(std::string &entries);
void Insert(std::string &entries);

View File

@ -250,13 +250,13 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
void FeatureRegistry::PrintFF() const
{
std::cerr << "Available feature functions:" << std::endl;
Map::const_iterator iter;
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
const string &ffName = iter->first;
std::cerr << ffName << " ";
}
std::cerr << std::endl;
std::cerr << "Available feature functions:" << std::endl;
Map::const_iterator iter;
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
const string &ffName = iter->first;
std::cerr << ffName << " ";
}
std::cerr << std::endl;
}
} // namespace Moses

View File

@ -7,7 +7,7 @@ namespace Moses
{
HyperParameterAsWeight::HyperParameterAsWeight(const std::string &line)
:StatelessFeatureFunction(2, line)
:StatelessFeatureFunction(2, line)
{
ReadParameters();

View File

@ -14,8 +14,9 @@ class HyperParameterAsWeight : public StatelessFeatureFunction
public:
HyperParameterAsWeight(const std::string &line);
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
virtual bool IsUseable(const FactorMask &mask) const {
return true;
}
virtual void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase

View File

@ -18,14 +18,14 @@ InputFeature::InputFeature(const std::string &line)
{
m_numInputScores = this->m_numScoreComponents;
ReadParameters();
UTIL_THROW_IF2(s_instance, "Can only have 1 input feature");
s_instance = this;
}
void InputFeature::Load()
{
const PhraseDictionary *pt = PhraseDictionary::GetColl()[0];
const PhraseDictionaryTreeAdaptor *ptBin = dynamic_cast<const PhraseDictionaryTreeAdaptor*>(pt);

View File

@ -3,7 +3,7 @@
namespace Moses
{
ReferenceComparison::ReferenceComparison(const std::string &line)
:StatelessFeatureFunction(0, line)
:StatelessFeatureFunction(0, line)
{
}

View File

@ -10,34 +10,36 @@ namespace Moses
class ReferenceComparison : public StatelessFeatureFunction
{
public:
ReferenceComparison(const std::string &line);
ReferenceComparison(const std::string &line);
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
virtual bool IsUseable(const FactorMask &mask) const {
return true;
}
virtual void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
virtual void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
virtual void Evaluate(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
virtual void Evaluate(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
virtual void Evaluate(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
virtual void Evaluate(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
virtual void EvaluateChart(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
virtual void EvaluateChart(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
std::vector<float> DefaultWeights() const
{ return std::vector<float>(); }
std::vector<float> DefaultWeights() const {
return std::vector<float>();
}
protected:

View File

@ -5,8 +5,8 @@
namespace Moses
{
RuleAmbiguity::RuleAmbiguity(const std::string &line)
:StatelessFeatureFunction(1, line)
,m_sourceSyntax(true)
:StatelessFeatureFunction(1, line)
,m_sourceSyntax(true)
{
}
@ -17,32 +17,31 @@ bool IsAmbiguous(const Word &word, bool sourceSyntax)
}
void RuleAmbiguity::Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
// source can't be empty, right?
float score = 0;
int count = 0;
for (size_t i = 0; i < source.GetSize() - 0; ++i) {
const Word &word = source.GetWord(i);
bool ambiguous = IsAmbiguous(word, m_sourceSyntax);
if (ambiguous) {
++count;
}
else {
if (count > 0) {
score += count;
}
count = -1;
}
const Word &word = source.GetWord(i);
bool ambiguous = IsAmbiguous(word, m_sourceSyntax);
if (ambiguous) {
++count;
} else {
if (count > 0) {
score += count;
}
count = -1;
}
}
// 1st & last always adjacent to ambiguity
++count;
if (count > 0) {
score += count;
score += count;
}
scoreBreakdown.PlusEquals(this, score);
@ -51,7 +50,7 @@ void RuleAmbiguity::Evaluate(const Phrase &source
void RuleAmbiguity::SetParameter(const std::string& key, const std::string& value)
{
if (key == "source-syntax") {
m_sourceSyntax = Scan<bool>(value);
m_sourceSyntax = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}

View File

@ -9,32 +9,33 @@ namespace Moses
class RuleAmbiguity : public StatelessFeatureFunction
{
public:
RuleAmbiguity(const std::string &line);
RuleAmbiguity(const std::string &line);
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
virtual bool IsUseable(const FactorMask &mask) const {
return true;
}
virtual void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
virtual void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
virtual void Evaluate(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
virtual void Evaluate(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
virtual void Evaluate(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
virtual void Evaluate(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
virtual void EvaluateChart(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
virtual void EvaluateChart(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
void SetParameter(const std::string& key, const std::string& value);
void SetParameter(const std::string& key, const std::string& value);
protected:
bool m_sourceSyntax;

View File

@ -4,18 +4,18 @@
namespace Moses
{
SetSourcePhrase::SetSourcePhrase(const std::string &line)
:StatelessFeatureFunction(1, line)
:StatelessFeatureFunction(1, line)
{
m_tuneable = false;
ReadParameters();
}
void SetSourcePhrase::Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
targetPhrase.SetRuleSource(source);
}
}

View File

@ -11,19 +11,20 @@ class SetSourcePhrase : public StatelessFeatureFunction
public:
SetSourcePhrase(const std::string &line);
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
virtual bool IsUseable(const FactorMask &mask) const {
return true;
}
virtual void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
virtual void Evaluate(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
virtual void Evaluate(const Hypothesis& hypo,
@ -34,8 +35,9 @@ public:
ScoreComponentCollection* accumulator) const
{}
std::vector<float> DefaultWeights() const
{ return std::vector<float>(); }
std::vector<float> DefaultWeights() const {
return std::vector<float>();
}
};

View File

@ -24,8 +24,8 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string
m_tuneable = Scan<bool>(value);
} else if (key == "filterable") { //ignore
} else if (key == "path") {
const std::string filePath = value;
Load(filePath);
const std::string filePath = value;
Load(filePath);
} else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
}
@ -35,34 +35,34 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string
bool SoftMatchingFeature::Load(const std::string& filePath)
{
StaticData &staticData = StaticData::InstanceNonConst();
StaticData &staticData = StaticData::InstanceNonConst();
InputFileStream inStream(filePath);
std::string line;
while(getline(inStream, line)) {
std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line");
InputFileStream inStream(filePath);
std::string line;
while(getline(inStream, line)) {
std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line");
// no soft matching necessary if LHS and RHS are the same
if (tokens[0] == tokens[1]) {
continue;
}
Word LHS, RHS;
LHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[0], true);
RHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[1], true);
m_softMatches[RHS[0]->GetId()].push_back(LHS);
GetOrSetFeatureName(RHS, LHS);
// no soft matching necessary if LHS and RHS are the same
if (tokens[0] == tokens[1]) {
continue;
}
staticData.SetSoftMatches(m_softMatches);
Word LHS, RHS;
LHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[0], true);
RHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[1], true);
return true;
m_softMatches[RHS[0]->GetId()].push_back(LHS);
GetOrSetFeatureName(RHS, LHS);
}
staticData.SetSoftMatches(m_softMatches);
return true;
}
void SoftMatchingFeature::EvaluateChart(const ChartHypothesis& hypo,
ScoreComponentCollection* accumulator) const
ScoreComponentCollection* accumulator) const
{
const TargetPhrase& target = hypo.GetCurrTargetPhrase();
@ -87,7 +87,8 @@ void SoftMatchingFeature::EvaluateChart(const ChartHypothesis& hypo,
}
// when loading, or when we notice that non-terminals have been added after loading, we resize vectors
void SoftMatchingFeature::ResizeCache() const {
void SoftMatchingFeature::ResizeCache() const
{
FactorCollection& fc = FactorCollection::Instance();
size_t numNonTerminals = fc.GetNumNonTerminals();
@ -98,7 +99,8 @@ void SoftMatchingFeature::ResizeCache() const {
}
const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, const Word& LHS) const {
const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, const Word& LHS) const
{
try {
#ifdef WITH_THREADS //try read-only lock
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
@ -107,23 +109,22 @@ const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, con
if (!name.empty()) {
return name;
}
}
catch (const std::out_of_range& oor) {
} catch (const std::out_of_range& oor) {
#ifdef WITH_THREADS //need to resize cache; write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
ResizeCache();
}
#ifdef WITH_THREADS //need to update cache; write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
std::string &name = m_nameCache[RHS[0]->GetId()][LHS[0]->GetId()];
const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
std::string LHS_string = LHS.GetString(outputFactorOrder, false);
std::string RHS_string = RHS.GetString(outputFactorOrder, false);
name = LHS_string + "->" + RHS_string;
return name;
}
std::string &name = m_nameCache[RHS[0]->GetId()][LHS[0]->GetId()];
const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
std::string LHS_string = LHS.GetString(outputFactorOrder, false);
std::string RHS_string = RHS.GetString(outputFactorOrder, false);
name = LHS_string + "->" + RHS_string;
return name;
}
}

View File

@ -13,230 +13,237 @@ namespace Moses
{
InternalTree::InternalTree(const std::string & line, const bool terminal):
m_value_nt(0),
m_isTerminal(terminal)
{
m_value_nt(0),
m_isTerminal(terminal)
{
size_t found = line.find_first_of("[] ");
size_t found = line.find_first_of("[] ");
if (found == line.npos) {
m_value = line;
}
if (found == line.npos) {
m_value = line;
}
else {
AddSubTree(line, 0);
}
else {
AddSubTree(line, 0);
}
}
size_t InternalTree::AddSubTree(const std::string & line, size_t pos) {
size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
{
std::string value = "";
char token = 0;
std::string value = "";
char token = 0;
while (token != ']' && pos != std::string::npos)
{
size_t oldpos = pos;
pos = line.find_first_of("[] ", pos);
if (pos == std::string::npos) break;
token = line[pos];
value = line.substr(oldpos,pos-oldpos);
while (token != ']' && pos != std::string::npos) {
size_t oldpos = pos;
pos = line.find_first_of("[] ", pos);
if (pos == std::string::npos) break;
token = line[pos];
value = line.substr(oldpos,pos-oldpos);
if (token == '[') {
if (m_value.size() > 0) {
TreePointer child(new InternalTree(value, false));
m_children.push_back(child);
pos = child->AddSubTree(line, pos+1);
}
else {
if (value.size() > 0) {
m_value = value;
}
pos = AddSubTree(line, pos+1);
}
}
else if (token == ' ' || token == ']') {
if (value.size() > 0 && ! m_value.size() > 0) {
m_value = value;
}
else if (value.size() > 0) {
m_isTerminal = false;
TreePointer child(new InternalTree(value, true));
m_children.push_back(child);
}
if (token == ' ') {
pos++;
}
}
if (m_children.size() > 0) {
m_isTerminal = false;
if (token == '[') {
if (m_value.size() > 0) {
TreePointer child(new InternalTree(value, false));
m_children.push_back(child);
pos = child->AddSubTree(line, pos+1);
} else {
if (value.size() > 0) {
m_value = value;
}
pos = AddSubTree(line, pos+1);
}
} else if (token == ' ' || token == ']') {
if (value.size() > 0 && ! m_value.size() > 0) {
m_value = value;
} else if (value.size() > 0) {
m_isTerminal = false;
TreePointer child(new InternalTree(value, true));
m_children.push_back(child);
}
if (token == ' ') {
pos++;
}
}
if (pos == std::string::npos) {
return line.size();
if (m_children.size() > 0) {
m_isTerminal = false;
}
return min(line.size(),pos+1);
}
if (pos == std::string::npos) {
return line.size();
}
return min(line.size(),pos+1);
}
std::string InternalTree::GetString() const {
std::string InternalTree::GetString() const
{
std::string ret = " ";
std::string ret = " ";
if (!m_isTerminal) {
ret += "[";
}
if (!m_isTerminal) {
ret += "[";
}
ret += m_value;
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it)
{
ret += (*it)->GetString();
}
ret += m_value;
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
ret += (*it)->GetString();
}
if (!m_isTerminal) {
ret += "]";
}
return ret;
if (!m_isTerminal) {
ret += "]";
}
return ret;
}
void InternalTree::Combine(const std::vector<TreePointer> &previous) {
void InternalTree::Combine(const std::vector<TreePointer> &previous)
{
std::vector<TreePointer>::iterator it;
bool found = false;
leafNT next_leafNT(this);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
found = next_leafNT(it);
if (found) {
*it = *it_prev;
}
else {
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
}
std::vector<TreePointer>::iterator it;
bool found = false;
leafNT next_leafNT(this);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
found = next_leafNT(it);
if (found) {
*it = *it_prev;
} else {
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
}
}
}
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
return false;
}
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
parent = this;
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
return false;
}
return false;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
parent = this;
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
return false;
}
return false;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2, parent)) {
it = it2;
return true;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
parent = this;
return true;
}
return false;
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
void TreeStructureFeature::Load() {
void TreeStructureFeature::Load()
{
// syntactic constraints can be hooked in here.
m_constraints = NULL;
@ -248,27 +255,28 @@ void TreeStructureFeature::Load() {
// define NT labels (ints) that are mapped from strings for quicker comparison.
void TreeStructureFeature::AddNTLabels(TreePointer root) const {
std::string label = root->GetLabel();
void TreeStructureFeature::AddNTLabels(TreePointer root) const
{
std::string label = root->GetLabel();
if (root->IsTerminal()) {
return;
}
if (root->IsTerminal()) {
return;
}
std::map<std::string, NTLabel>::const_iterator it = m_labelset->string_to_label.find(label);
if (it != m_labelset->string_to_label.end()) {
root->SetNTLabel(it->second);
}
std::map<std::string, NTLabel>::const_iterator it = m_labelset->string_to_label.find(label);
if (it != m_labelset->string_to_label.end()) {
root->SetNTLabel(it->second);
}
std::vector<TreePointer> children = root->GetChildren();
for (std::vector<TreePointer>::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) {
AddNTLabels(*it2);
}
std::vector<TreePointer> children = root->GetChildren();
for (std::vector<TreePointer>::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) {
AddNTLabels(*it2);
}
}
FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo
, int featureID /* used to index the state in the previous hypotheses */
, ScoreComponentCollection* accumulator) const
, int featureID /* used to index the state in the previous hypotheses */
, ScoreComponentCollection* accumulator) const
{
std::string tree;
bool found = 0;
@ -277,7 +285,7 @@ FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo
TreePointer mytree (new InternalTree(tree));
if (m_labelset) {
AddNTLabels(mytree);
AddNTLabels(mytree);
}
//get subtrees (in target order)
@ -304,8 +312,7 @@ FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo
accumulator->PlusEquals(this, *feature, 1);
}
return new TreeState(mytree);
}
else {
} else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
}

View File

@ -17,92 +17,91 @@ typedef int NTLabel;
class InternalTree
{
std::string m_value;
NTLabel m_value_nt;
std::vector<TreePointer> m_children;
bool m_isTerminal;
std::string m_value;
NTLabel m_value_nt;
std::vector<TreePointer> m_children;
bool m_isTerminal;
public:
InternalTree(const std::string & line, const bool terminal = false);
InternalTree(const InternalTree & tree):
m_value(tree.m_value),
m_isTerminal(tree.m_isTerminal) {
const std::vector<TreePointer> & children = tree.m_children;
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
TreePointer child (new InternalTree(**it));
m_children.push_back(child);
}
}
size_t AddSubTree(const std::string & line, size_t start);
std::string GetString() const;
void Combine(const std::vector<TreePointer> &previous);
const std::string & GetLabel() const {
return m_value;
InternalTree(const std::string & line, const bool terminal = false);
InternalTree(const InternalTree & tree):
m_value(tree.m_value),
m_isTerminal(tree.m_isTerminal) {
const std::vector<TreePointer> & children = tree.m_children;
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
TreePointer child (new InternalTree(**it));
m_children.push_back(child);
}
}
size_t AddSubTree(const std::string & line, size_t start);
// optionally identify label by int instead of string;
// allows abstraction if multiple nonterminal strings should map to same label.
const NTLabel & GetNTLabel() const {
return m_value_nt;
}
std::string GetString() const;
void Combine(const std::vector<TreePointer> &previous);
const std::string & GetLabel() const {
return m_value;
}
void SetNTLabel(NTLabel value) {
m_value_nt = value;
}
// optionally identify label by int instead of string;
// allows abstraction if multiple nonterminal strings should map to same label.
const NTLabel & GetNTLabel() const {
return m_value_nt;
}
size_t GetLength() const {
return m_children.size();
}
std::vector<TreePointer> & GetChildren() {
return m_children;
}
void AddChild(TreePointer child) {
m_children.push_back(child);
}
void SetNTLabel(NTLabel value) {
m_value_nt = value;
}
bool IsTerminal() const {
return m_isTerminal;
}
size_t GetLength() const {
return m_children.size();
}
std::vector<TreePointer> & GetChildren() {
return m_children;
}
void AddChild(TreePointer child) {
m_children.push_back(child);
}
bool IsLeafNT() const {
return (!m_isTerminal && m_children.size() == 0);
}
bool IsTerminal() const {
return m_isTerminal;
}
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
// can be used for formulating syntax constraints.
bool IsLeafNT() const {
return (!m_isTerminal && m_children.size() == 0);
}
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
// can be used for formulating syntax constraints.
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
// pass vector of possible labels to search
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// pass vector of possible labels to search
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
};
// mapping from string nonterminal label to int representation.
// allows abstraction if multiple nonterminal strings should map to same label.
struct LabelSet
{
struct LabelSet {
public:
std::map<std::string, NTLabel> string_to_label;
std::map<std::string, NTLabel> string_to_label;
};
@ -111,8 +110,8 @@ public:
class SyntaxConstraints
{
public:
virtual std::vector<std::string> SyntacticRules(TreePointer root, const std::vector<TreePointer> &previous) = 0;
virtual ~SyntaxConstraints() {};
virtual std::vector<std::string> SyntacticRules(TreePointer root, const std::vector<TreePointer> &previous) = 0;
virtual ~SyntaxConstraints() {};
};
@ -125,10 +124,12 @@ public:
{}
TreePointer GetTree() const {
return m_tree;
return m_tree;
}
int Compare(const FFState& other) const {return 0;};
int Compare(const FFState& other) const {
return 0;
};
};
class TreeStructureFeature : public StatefulFeatureFunction
@ -138,9 +139,11 @@ class TreeStructureFeature : public StatefulFeatureFunction
public:
TreeStructureFeature(const std::string &line)
:StatefulFeatureFunction(0, line) {
ReadParameters();
}
~TreeStructureFeature() {delete m_constraints;};
ReadParameters();
}
~TreeStructureFeature() {
delete m_constraints;
};
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
return new TreeState(TreePointer());
@ -164,7 +167,9 @@ public:
FFState* Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");};
ScoreComponentCollection* accumulator) const {
UTIL_THROW(util::Exception, "Not implemented");
};
FFState* EvaluateChart(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
@ -174,42 +179,42 @@ public:
};
// Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
$emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it);
}
else if ((*it)->GetLength() > 0) {
if (&(**it)) { // normal pointer to same object that TreePointer points to
$restart(tree = &(**it));
}
}
$generator(leafNT)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
$emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it);
} else if ((*it)->GetLength() > 0) {
if (&(**it)) { // normal pointer to same object that TreePointer points to
$restart(tree = &(**it));
}
}
$stop;
}
$stop;
};
// Python-like generator that yields the parent of the next nonterminal leaf on every call
$generator(leafNTParent) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {}
$emit(InternalTree*)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(tree);
}
else if ((*it)->GetLength() > 0) {
if (&(**it)) { // normal pointer to same object that TreePointer points to
$restart(tree = &(**it));
}
}
$generator(leafNTParent)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {}
$emit(InternalTree*)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(tree);
} else if ((*it)->GetLength() > 0) {
if (&(**it)) { // normal pointer to same object that TreePointer points to
$restart(tree = &(**it));
}
}
$stop;
}
$stop;
};

View File

@ -59,8 +59,7 @@ const Factor *FactorCollection::AddFactor(const StringPiece &factorString, bool
if (isNonTerminal) {
m_factorIdNonTerminal++;
UTIL_THROW_IF2(m_factorIdNonTerminal >= moses_MaxNumNonterminals, "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile");
}
else {
} else {
m_factorId++;
}
}

View File

@ -182,9 +182,9 @@ template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std:
// for pruning
template <class Model> float Fill<Model>::GetBestScore(const ChartCellLabel *chartCell) const
{
search::PartialVertex vertex = chartCell->GetStack().incr->RootAlternate();
UTIL_THROW_IF2(vertex.Empty(), "hypothesis with empty stack");
return vertex.Bound();
search::PartialVertex vertex = chartCell->GetStack().incr->RootAlternate();
UTIL_THROW_IF2(vertex.Empty(), "hypothesis with empty stack");
return vertex.Bound();
}
// TODO: factors (but chart doesn't seem to support factors anyway).

View File

@ -33,14 +33,14 @@ InputPath(const Phrase &phrase, const NonTerminalSet &sourceNonTerms,
InputPath::~InputPath()
{
// Since there is no way for the Phrase Dictionaries to tell in
// which (sentence) context phrases were looked up, we tell them
// Since there is no way for the Phrase Dictionaries to tell in
// which (sentence) context phrases were looked up, we tell them
// now that the phrase isn't needed any more by this inputPath
typedef std::pair<const TargetPhraseCollection*, const void* > entry;
std::map<const PhraseDictionary*, entry>::const_iterator iter;
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter)
iter->first->Release(iter->second.first);
delete m_inputScore;
}

View File

@ -59,22 +59,22 @@ public:
delete state;
}
void reset(const DALMState &from){
delete state;
state = new DALM::State(*from.state);
}
void reset(const DALMState &from) {
delete state;
state = new DALM::State(*from.state);
}
void reset(DALM::State *s){
delete state;
state = s;
}
void reset(DALM::State *s) {
delete state;
state = s;
}
virtual int Compare(const FFState& other) const{
const DALMState &o = static_cast<const DALMState &>(other);
if(state->get_count() < o.state->get_count()) return -1;
else if(state->get_count() > o.state->get_count()) return 1;
else return state->compare(o.state);
}
virtual int Compare(const FFState& other) const {
const DALMState &o = static_cast<const DALMState &>(other);
if(state->get_count() < o.state->get_count()) return -1;
else if(state->get_count() > o.state->get_count()) return 1;
else return state->compare(o.state);
}
DALM::State *get_state() const {
return state;
@ -88,78 +88,78 @@ public:
class DALMChartState : public FFState
{
private:
const ChartHypothesis &hypo;
DALM::Fragment *prefixFragments;
unsigned short prefixLength;
float prefixScore;
DALMState *rightContext;
bool isLarge;
const ChartHypothesis &hypo;
DALM::Fragment *prefixFragments;
unsigned short prefixLength;
float prefixScore;
DALMState *rightContext;
bool isLarge;
public:
DALMChartState(
const ChartHypothesis &hypo,
DALM::Fragment *prefixFragments,
unsigned short prefixLength,
float prefixScore,
DALMState *rightContext,
bool isLarge)
: hypo(hypo),
prefixFragments(prefixFragments),
prefixLength(prefixLength),
prefixScore(prefixScore),
rightContext(rightContext),
isLarge(isLarge)
{}
DALMChartState(
const ChartHypothesis &hypo,
DALM::Fragment *prefixFragments,
unsigned short prefixLength,
float prefixScore,
DALMState *rightContext,
bool isLarge)
: hypo(hypo),
prefixFragments(prefixFragments),
prefixLength(prefixLength),
prefixScore(prefixScore),
rightContext(rightContext),
isLarge(isLarge)
{}
virtual ~DALMChartState(){
delete [] prefixFragments;
delete rightContext;
}
virtual ~DALMChartState() {
delete [] prefixFragments;
delete rightContext;
}
unsigned short GetPrefixLength() const{
return prefixLength;
}
unsigned short GetPrefixLength() const {
return prefixLength;
}
const DALM::Fragment *GetPrefixFragments() const{
return prefixFragments;
}
const DALM::Fragment *GetPrefixFragments() const {
return prefixFragments;
}
float GetPrefixScore() const{
return prefixScore;
}
float GetPrefixScore() const {
return prefixScore;
}
const DALMState *GetRightContext() const{
return rightContext;
}
const DALMState *GetRightContext() const {
return rightContext;
}
bool LargeEnough() const{
return isLarge;
}
bool LargeEnough() const {
return isLarge;
}
virtual int Compare(const FFState& other) const{
const DALMChartState &o = static_cast<const DALMChartState &>(other);
// prefix
virtual int Compare(const FFState& other) const {
const DALMChartState &o = static_cast<const DALMChartState &>(other);
// prefix
if (hypo.GetCurrSourceRange().GetStartPos() > 0) { // not for "<s> ..."
if (prefixLength != o.prefixLength){
return (prefixLength < o.prefixLength)?-1:1;
} else {
if(prefixLength > 0){
DALM::Fragment &f = prefixFragments[prefixLength-1];
DALM::Fragment &of = o.prefixFragments[prefixLength-1];
int ret = DALM::compare_fragments(f, of);
if(ret != 0) return ret;
}
}
if (prefixLength != o.prefixLength) {
return (prefixLength < o.prefixLength)?-1:1;
} else {
if(prefixLength > 0) {
DALM::Fragment &f = prefixFragments[prefixLength-1];
DALM::Fragment &of = o.prefixFragments[prefixLength-1];
int ret = DALM::compare_fragments(f, of);
if(ret != 0) return ret;
}
}
}
// suffix
size_t inputSize = hypo.GetManager().GetSource().GetSize();
size_t inputSize = hypo.GetManager().GetSource().GetSize();
if (hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1) { // not for "... </s>"
int ret = o.rightContext->Compare(*rightContext);
int ret = o.rightContext->Compare(*rightContext);
if (ret != 0) return ret;
}
return 0;
}
return 0;
}
};
LanguageModelDALM::LanguageModelDALM(const std::string &line)
@ -181,18 +181,18 @@ LanguageModelDALM::~LanguageModelDALM()
void LanguageModelDALM::Load()
{
/////////////////////
// READING INIFILE //
/////////////////////
string inifile= m_filePath + "/dalm.ini";
/////////////////////
// READING INIFILE //
/////////////////////
string inifile= m_filePath + "/dalm.ini";
UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
util::FileOpenException,
"Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
model = m_filePath + "/" + model;
words = m_filePath + "/" + words;
wordstxt = m_filePath + "/" + wordstxt;
model = m_filePath + "/" + model;
words = m_filePath + "/" + words;
wordstxt = m_filePath + "/" + wordstxt;
// Preparing a logger object.
m_logger = new DALM::Logger(stderr);
@ -233,14 +233,14 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
size_t currPos = 0;
size_t hist_count = 0;
DALMState *dalm_state = new DALMState(m_nGramOrder);
DALM::State *state = dalm_state->get_state();
DALM::State *state = dalm_state->get_state();
if(phrase.GetWord(0).GetFactor(m_factorType) == m_beginSentenceFactor) {
m_lm->init_state(*state);
currPos++;
hist_count++;
}
if(phrase.GetWord(0).GetFactor(m_factorType) == m_beginSentenceFactor){
m_lm->init_state(*state);
currPos++;
hist_count++;
}
while (currPos < phraseSize) {
const Word &word = phrase.GetWord(currPos);
hist_count++;
@ -249,9 +249,9 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
state->refresh();
hist_count = 0;
} else {
DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
float score = m_lm->query(wid, *state);
fullScore += score;
DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
float score = m_lm->query(wid, *state);
fullScore += score;
if (hist_count >= m_nGramOrder) ngramScore += score;
if (wid==m_vocab->unk()) ++oovCount;
}
@ -259,9 +259,9 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
currPos++;
}
fullScore = TransformLMScore(fullScore);
ngramScore = TransformLMScore(ngramScore);
delete dalm_state;
fullScore = TransformLMScore(fullScore);
ngramScore = TransformLMScore(ngramScore);
delete dalm_state;
}
FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
@ -283,11 +283,11 @@ FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps,
const std::size_t adjust_end = std::min(end, begin + m_nGramOrder - 1);
DALMState *dalm_state = new DALMState(*dalm_ps);
DALM::State *state = dalm_state->get_state();
DALM::State *state = dalm_state->get_state();
float score = 0.0;
for(std::size_t position=begin; position < adjust_end; position++){
score += m_lm->query(GetVocabId(hypo.GetWord(position).GetFactor(m_factorType)), *state);
for(std::size_t position=begin; position < adjust_end; position++) {
score += m_lm->query(GetVocabId(hypo.GetWord(position).GetFactor(m_factorType)), *state);
}
if (hypo.IsSourceCompleted()) {
@ -295,8 +295,8 @@ FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps,
std::vector<DALM::VocabId> indices(m_nGramOrder-1);
const DALM::VocabId *last = LastIDs(hypo, &indices.front());
m_lm->set_state(&indices.front(), (last-&indices.front()), *state);
score += m_lm->query(wid_end, *state);
score += m_lm->query(wid_end, *state);
} else if (adjust_end < end) {
// Get state after adding a long phrase.
std::vector<DALM::VocabId> indices(m_nGramOrder-1);
@ -304,7 +304,7 @@ FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps,
m_lm->set_state(&indices.front(), (last-&indices.front()), *state);
}
score = TransformLMScore(score);
score = TransformLMScore(score);
if (OOVFeatureEnabled()) {
std::vector<float> scores(2);
scores[0] = score;
@ -317,73 +317,74 @@ FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps,
return dalm_state;
}
FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{
FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const
{
// initialize language model context state
DALMState *dalm_state = new DALMState(m_nGramOrder);
DALM::State *state = dalm_state->get_state();
DALMState *dalm_state = new DALMState(m_nGramOrder);
DALM::State *state = dalm_state->get_state();
size_t contextSize = m_nGramOrder-1;
DALM::Fragment *prefixFragments = new DALM::Fragment[contextSize];
unsigned short prefixLength = 0;
bool isLarge = false;
size_t contextSize = m_nGramOrder-1;
DALM::Fragment *prefixFragments = new DALM::Fragment[contextSize];
unsigned short prefixLength = 0;
bool isLarge = false;
// initial language model scores
float prefixScore = 0.0; // not yet final for initial words (lack context)
float hypoScore = 0.0; // total hypothesis score.
const TargetPhrase &targetPhrase = hypo.GetCurrTargetPhrase();
size_t hypoSize = targetPhrase.GetSize();
const TargetPhrase &targetPhrase = hypo.GetCurrTargetPhrase();
size_t hypoSize = targetPhrase.GetSize();
// get index map for underlying hypotheses
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
targetPhrase.GetAlignNonTerm().GetNonTermIndexMap();
size_t phrasePos = 0;
// begginig of sentence.
if(hypoSize > 0){
const Word &word = targetPhrase.GetWord(0);
if(!word.IsNonTerminal()){
DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
if(word.GetFactor(m_factorType) == m_beginSentenceFactor){
m_lm->init_state(*state);
// state is finalized.
isLarge = true;
}else{
if(isLarge){
float score = m_lm->query(wid, *state);
hypoScore += score;
}else{
float score = m_lm->query(wid, *state, prefixFragments[prefixLength]);
size_t phrasePos = 0;
prefixScore += score;
hypoScore += score;
prefixLength++;
if(prefixLength >= contextSize) isLarge = true;
}
}
}else{
// begginig of sentence.
if(hypoSize > 0) {
const Word &word = targetPhrase.GetWord(0);
if(!word.IsNonTerminal()) {
DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
if(word.GetFactor(m_factorType) == m_beginSentenceFactor) {
m_lm->init_state(*state);
// state is finalized.
isLarge = true;
} else {
if(isLarge) {
float score = m_lm->query(wid, *state);
hypoScore += score;
} else {
float score = m_lm->query(wid, *state, prefixFragments[prefixLength]);
prefixScore += score;
hypoScore += score;
prefixLength++;
if(prefixLength >= contextSize) isLarge = true;
}
}
} else {
// special case: rule starts with non-terminal -> copy everything
size_t nonTermIndex = nonTermIndexMap[0];
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
const DALMChartState* prevState =
static_cast<const DALMChartState*>(prevHypo->GetFFState(featureID));
// get prefixScore and hypoScore
prefixScore = prevState->GetPrefixScore();
hypoScore = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
hypoScore = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
// get language model state
dalm_state->reset(*prevState->GetRightContext());
state = dalm_state->get_state();
dalm_state->reset(*prevState->GetRightContext());
state = dalm_state->get_state();
prefixLength = prevState->GetPrefixLength();
const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments();
std::memcpy(prefixFragments, prevPrefixFragments, sizeof(DALM::Fragment)*prefixLength);
isLarge = prevState->LargeEnough();
}
phrasePos++;
prefixLength = prevState->GetPrefixLength();
const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments();
std::memcpy(prefixFragments, prevPrefixFragments, sizeof(DALM::Fragment)*prefixLength);
isLarge = prevState->LargeEnough();
}
phrasePos++;
}
// loop over rule
@ -393,16 +394,16 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
// regular word
if (!word.IsNonTerminal()) {
DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
if (isLarge) {
hypoScore += m_lm->query(wid, *state);
}else{
float score = m_lm->query(wid, *state, prefixFragments[prefixLength]);
prefixScore += score;
hypoScore += score;
prefixLength++;
if(prefixLength >= contextSize) isLarge = true;
}
DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
if (isLarge) {
hypoScore += m_lm->query(wid, *state);
} else {
float score = m_lm->query(wid, *state, prefixFragments[prefixLength]);
prefixScore += score;
hypoScore += score;
prefixLength++;
if(prefixLength >= contextSize) isLarge = true;
}
}
// non-terminal, add phrase from underlying hypothesis
@ -414,40 +415,40 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
const DALMChartState* prevState =
static_cast<const DALMChartState*>(prevHypo->GetFFState(featureID));
size_t prevPrefixLength = prevState->GetPrefixLength();
const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments();
DALM::Gap gap(*state);
const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments();
DALM::Gap gap(*state);
// score its prefix
for(size_t prefixPos = 0; prefixPos < prevPrefixLength; prefixPos++) {
const DALM::Fragment &f = prevPrefixFragments[prefixPos];
const DALM::Fragment &f = prevPrefixFragments[prefixPos];
if (isLarge) {
hypoScore += m_lm->query(f, *state, gap);
} else {
float score = m_lm->query(f, *state, gap, prefixFragments[prefixLength]);
prefixScore += score;
hypoScore += score;
prefixLength++;
if(prefixLength >= contextSize) isLarge = true;
}
gap.succ();
if (isLarge) {
hypoScore += m_lm->query(f, *state, gap);
} else {
float score = m_lm->query(f, *state, gap, prefixFragments[prefixLength]);
prefixScore += score;
hypoScore += score;
prefixLength++;
if(prefixLength >= contextSize) isLarge = true;
}
gap.succ();
}
// check if we are dealing with a large sub-phrase
if (prevState->LargeEnough()) {
// add its language model score
hypoScore += UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
hypoScore += UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
hypoScore -= prevState->GetPrefixScore(); // remove overwrapped score.
// copy language model state
dalm_state->reset(*prevState->GetRightContext());
state = dalm_state->get_state();
// copy language model state
dalm_state->reset(*prevState->GetRightContext());
state = dalm_state->get_state();
} else {
DALM::State *state_new = new DALM::State(*prevState->GetRightContext()->get_state());
m_lm->set_state(*state_new, *state, gap);
dalm_state->reset(state_new);
state = dalm_state->get_state();
}
DALM::State *state_new = new DALM::State(*prevState->GetRightContext()->get_state());
m_lm->set_state(*state_new, *state, gap);
dalm_state->reset(state_new);
state = dalm_state->get_state();
}
}
}
@ -466,36 +467,36 @@ void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt)
{
InputFileStream vocabStrm(wordstxt);
std::vector< std::pair<std::size_t, DALM::VocabId> > vlist;
std::vector< std::pair<std::size_t, DALM::VocabId> > vlist;
string line;
std::size_t max_fid = 0;
std::size_t max_fid = 0;
while(getline(vocabStrm, line)) {
const Factor *factor = FactorCollection::Instance().AddFactor(line);
std::size_t fid = factor->GetId();
DALM::VocabId wid = m_vocab->lookup(line.c_str());
const Factor *factor = FactorCollection::Instance().AddFactor(line);
std::size_t fid = factor->GetId();
DALM::VocabId wid = m_vocab->lookup(line.c_str());
vlist.push_back(std::pair<std::size_t, DALM::VocabId>(fid, wid));
if(max_fid < fid) max_fid = fid;
vlist.push_back(std::pair<std::size_t, DALM::VocabId>(fid, wid));
if(max_fid < fid) max_fid = fid;
}
for(std::size_t i = 0; i < m_vocabMap.size(); i++){
m_vocabMap[i] = m_vocab->unk();
}
for(std::size_t i = 0; i < m_vocabMap.size(); i++) {
m_vocabMap[i] = m_vocab->unk();
}
m_vocabMap.resize(max_fid+1, m_vocab->unk());
std::vector< std::pair<std::size_t, DALM::VocabId> >::iterator it = vlist.begin();
while(it != vlist.end()){
std::pair<std::size_t, DALM::VocabId> &entry = *it;
m_vocabMap[entry.first] = entry.second;
m_vocabMap.resize(max_fid+1, m_vocab->unk());
std::vector< std::pair<std::size_t, DALM::VocabId> >::iterator it = vlist.begin();
while(it != vlist.end()) {
std::pair<std::size_t, DALM::VocabId> &entry = *it;
m_vocabMap[entry.first] = entry.second;
++it;
}
++it;
}
}
DALM::VocabId LanguageModelDALM::GetVocabId(const Factor *factor) const
{
std::size_t fid = factor->GetId();
return (m_vocabMap.size() > fid)? m_vocabMap[fid] : m_vocab->unk();
std::size_t fid = factor->GetId();
return (m_vocabMap.size() > fid)? m_vocabMap[fid] : m_vocab->unk();
}
void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value)

View File

@ -182,11 +182,11 @@ void Manager::printDivergentHypothesis(long translationId, const Hypothesis* hyp
}
void
void
Manager::
printThisHypothesis(long translationId, const Hypothesis* hypo,
const vector <const TargetPhrase*> & remainingPhrases,
float remainingScore, ostream& outputStream) const
printThisHypothesis(long translationId, const Hypothesis* hypo,
const vector <const TargetPhrase*> & remainingPhrases,
float remainingScore, ostream& outputStream) const
{
outputStream << translationId << " ||| ";

View File

@ -140,23 +140,23 @@ public:
std::pair<MapSrc2Tgt::iterator,bool> piter;
if(useCache) {
piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollectionWithSourcePhrase const*>(0)));
if(!piter.second){
if (piter.first->second){
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << (piter.first->second)->GetSize() << std::endl);
}else{
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << 0 << std::endl);
}
if(!piter.second) {
if (piter.first->second) {
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << (piter.first->second)->GetSize() << std::endl);
} else {
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << 0 << std::endl);
}
return piter.first->second;
}
} else if (m_cache.size()) {
MapSrc2Tgt::const_iterator i=m_cache.find(src);
if (i!=m_cache.end()){
if (i->second){
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << (void*) (i->second) << std::endl);
}else{
if (i!=m_cache.end()) {
if (i->second) {
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << (void*) (i->second) << std::endl);
} else {
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << 0 << std::endl);
}
}else{
} else {
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << 0 << std::endl);
}
return (i!=m_cache.end() ? i->second : 0);

View File

@ -117,7 +117,7 @@ public:
std::vector<float> GetWeights(const std::string &name);
std::map<std::string, std::vector<float> > GetAllWeights() const {
return m_weights;
return m_weights;
}
std::set<std::string> GetWeightNames() const;

View File

@ -381,7 +381,7 @@ void Phrase::InitStartEndWord()
size_t Phrase::Find(const Phrase &sought, int maxUnknown) const
{
if (GetSize() < sought.GetSize()) {
// sought phrase too big
// sought phrase too big
return NOT_FOUND;
}

View File

@ -65,7 +65,8 @@ void Candidates::readBin(FILE* f)
const LabelId PrefixTreeMap::MagicWord = std::numeric_limits<LabelId>::max() - 1;
//////////////////////////////////////////////////////////////////
PrefixTreeMap::~PrefixTreeMap() {
PrefixTreeMap::~PrefixTreeMap()
{
if(m_FileSrc) {
fClose(m_FileSrc);
}
@ -99,8 +100,7 @@ WordVoc &ReadVoc(std::map<std::string,WordVoc> &vocs, const std::string& filenam
WordVoc &voc = vocs[filename];
voc.Read(filename);
return voc;
}
else {
} else {
return vi->second;
}
}

View File

@ -251,8 +251,8 @@ bool SearchCubePruning::CheckDistortion(const WordsBitmap &hypoBitmap, const Wor
}
if (StaticData::Instance().AdjacentOnly() &&
!hypoBitmap.IsAdjacent(range.GetStartPos(), range.GetEndPos())) {
return false;
!hypoBitmap.IsAdjacent(range.GetStartPos(), range.GetEndPos())) {
return false;
}
bool leftMostEdge = (hypoFirstGapPos == startPos);

View File

@ -254,8 +254,8 @@ void SearchNormal::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t star
}
if (StaticData::Instance().AdjacentOnly() &&
!hypothesis.GetWordsBitmap().IsAdjacent(startPos, endPos)) {
return;
!hypothesis.GetWordsBitmap().IsAdjacent(startPos, endPos)) {
return;
}
// loop through all translation options

View File

@ -130,30 +130,30 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
std::vector< std::map<std::string, std::string> > dlt_meta = ProcessAndStripDLT(line);
PhraseDictionaryDynamicCacheBased* cbtm = NULL;
DynamicCacheBasedLanguageModel* cblm = NULL;
PhraseDictionaryDynamicCacheBased* cbtm = NULL;
DynamicCacheBasedLanguageModel* cblm = NULL;
std::vector< std::map<std::string, std::string> >::iterator dlt_meta_it = dlt_meta.begin();
for (dlt_meta_it = dlt_meta.begin(); dlt_meta_it != dlt_meta.end(); ++dlt_meta_it) {
if ((*dlt_meta_it).find("type") != (*dlt_meta_it).end()) {
if ((*dlt_meta_it)["type"] == "cbtm") {
std::string id = "default";
if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) {
id = (*dlt_meta_it)["id"];
}
cbtm = &PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
if (cbtm) cbtm->ExecuteDlt(*dlt_meta_it);
}
if ((*dlt_meta_it)["type"] == "cblm") {
std::string id = "default";
if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) {
id = (*dlt_meta_it)["id"];
}
cblm = &DynamicCacheBasedLanguageModel::InstanceNonConst(id);
if (cblm) cblm->ExecuteDlt(*dlt_meta_it);
}
if ((*dlt_meta_it).find("type") != (*dlt_meta_it).end()) {
if ((*dlt_meta_it)["type"] == "cbtm") {
std::string id = "default";
if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) {
id = (*dlt_meta_it)["id"];
}
cbtm = &PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
if (cbtm) cbtm->ExecuteDlt(*dlt_meta_it);
}
if ((*dlt_meta_it)["type"] == "cblm") {
std::string id = "default";
if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) {
id = (*dlt_meta_it)["id"];
}
cblm = &DynamicCacheBasedLanguageModel::InstanceNonConst(id);
if (cblm) cblm->ExecuteDlt(*dlt_meta_it);
}
}
}
}
// parse XML markup in translation line
std::vector< size_t > xmlWalls;

View File

@ -537,21 +537,21 @@ bool StaticData::LoadData(Parameter *parameter)
NoCache();
OverrideFeatures();
std::cerr <<"After StaticData::LoadDataStatic" << std::endl;
std::cerr <<"After StaticData::LoadDataStatic" << std::endl;
/*
std::cerr <<"Before ShowWeights" << std::endl;
// setting "-show-weights" -> just dump out weights and exit
if (m_parameter->isParamSpecified("show-weights")) {
MosesCmd::ShowWeights();
exit(0);
}
std::cerr <<"After ShowWeights" << std::endl;
*/
/*
std::cerr <<"Before ShowWeights" << std::endl;
// setting "-show-weights" -> just dump out weights and exit
if (m_parameter->isParamSpecified("show-weights")) {
MosesCmd::ShowWeights();
exit(0);
}
std::cerr <<"After ShowWeights" << std::endl;
*/
std::cerr <<"Before LoadFeatureFunctions" << std::endl;
std::cerr <<"Before LoadFeatureFunctions" << std::endl;
LoadFeatureFunctions();
std::cerr <<"After LoadFeatureFunctions" << std::endl;
std::cerr <<"After LoadFeatureFunctions" << std::endl;
if (!LoadDecodeGraphs()) return false;
@ -982,8 +982,7 @@ bool StaticData::CheckWeights() const
cerr << fname << "\n";
if (featureNames.find(fname) != featureNames.end()) {
weightNames.erase(iter++);
}
else {
} else {
++iter;
}
}
@ -1002,7 +1001,8 @@ bool StaticData::CheckWeights() const
}
void StaticData::LoadSparseWeightsFromConfig() {
void StaticData::LoadSparseWeightsFromConfig()
{
set<string> featureNames;
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < ffs.size(); ++i) {
@ -1017,7 +1017,7 @@ void StaticData::LoadSparseWeightsFromConfig() {
// this indicates that it is sparse feature
if (featureNames.find(iter->first) == featureNames.end()) {
UTIL_THROW_IF2(iter->second.size() != 1, "ERROR: only one weight per sparse feature allowed: " << iter->first);
m_allWeights.Assign(iter->first, iter->second[0]);
m_allWeights.Assign(iter->first, iter->second[0]);
}
}
@ -1211,24 +1211,24 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string
vector<float> weights;
vector<string> toks = Tokenize(denseWeights);
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
const string &tok = toks[i];
if (tok.substr(tok.size() - 1, 1) == "=") {
// start of new feature
if (tok.substr(tok.size() - 1, 1) == "=") {
// start of new feature
if (name != "") {
// save previous ff
const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
m_allWeights.Assign(&ff, weights);
weights.clear();
}
if (name != "") {
// save previous ff
const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
m_allWeights.Assign(&ff, weights);
weights.clear();
}
name = tok.substr(0, tok.size() - 1);
} else {
// a weight for curr ff
float weight = Scan<float>(toks[i]);
weights.push_back(weight);
}
name = tok.substr(0, tok.size() - 1);
} else {
// a weight for curr ff
float weight = Scan<float>(toks[i]);
weights.push_back(weight);
}
}
const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
@ -1238,14 +1238,14 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string
InputFileStream sparseStrme(sparseFile);
string line;
while (getline(sparseStrme, line)) {
vector<string> toks = Tokenize(line);
UTIL_THROW_IF2(toks.size() != 2, "Incorrect sparse weight format. Should be FFName_spareseName weight");
vector<string> toks = Tokenize(line);
UTIL_THROW_IF2(toks.size() != 2, "Incorrect sparse weight format. Should be FFName_spareseName weight");
vector<string> names = Tokenize(toks[0], "_");
UTIL_THROW_IF2(names.size() != 2, "Incorrect sparse weight name. Should be FFName_spareseName");
vector<string> names = Tokenize(toks[0], "_");
UTIL_THROW_IF2(names.size() != 2, "Incorrect sparse weight name. Should be FFName_spareseName");
const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(names[0]);
m_allWeights.Assign(&ff, names[1], Scan<float>(toks[1]));
const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(names[0]);
m_allWeights.Assign(&ff, names[1], Scan<float>(toks[1]));
}
}

View File

@ -760,8 +760,9 @@ public:
}
bool AdjacentOnly() const
{ return m_adjacentOnly; }
bool AdjacentOnly() const {
return m_adjacentOnly;
}
void ResetWeights(const std::string &denseWeights, const std::string &sparseFile);
@ -769,11 +770,11 @@ public:
// need global access for output of tree structure
const StatefulFeatureFunction* GetTreeStructure() const {
return m_treeStructure;
return m_treeStructure;
}
void SetTreeStructure(const StatefulFeatureFunction* treeStructure) {
m_treeStructure = treeStructure;
m_treeStructure = treeStructure;
}
};

View File

@ -3,17 +3,17 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@ -38,7 +38,7 @@ using namespace std;
namespace Moses
{
TargetPhrase::TargetPhrase( std::string out_string)
TargetPhrase::TargetPhrase( std::string out_string)
:Phrase(0)
, m_fullScore(0.0)
, m_futureScore(0.0)
@ -46,14 +46,14 @@ namespace Moses
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
, m_lhsTarget(NULL)
, m_ruleSource(NULL)
{
//ACAT
const StaticData &staticData = StaticData::Instance();
CreateFromString(Output, staticData.GetInputFactorOrder(), out_string, staticData.GetFactorDelimiter(), NULL);
}
TargetPhrase::TargetPhrase()
{
//ACAT
const StaticData &staticData = StaticData::Instance();
CreateFromString(Output, staticData.GetInputFactorOrder(), out_string, staticData.GetFactorDelimiter(), NULL);
}
TargetPhrase::TargetPhrase()
:Phrase()
, m_fullScore(0.0)
, m_futureScore(0.0)
@ -61,10 +61,10 @@ namespace Moses
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
, m_lhsTarget(NULL)
, m_ruleSource(NULL)
{
}
TargetPhrase::TargetPhrase(const Phrase &phrase)
{
}
TargetPhrase::TargetPhrase(const Phrase &phrase)
: Phrase(phrase)
, m_fullScore(0.0)
, m_futureScore(0.0)
@ -72,223 +72,223 @@ namespace Moses
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
, m_lhsTarget(NULL)
, m_ruleSource(NULL)
{
}
TargetPhrase::TargetPhrase(const TargetPhrase &copy)
{
}
TargetPhrase::TargetPhrase(const TargetPhrase &copy)
: Phrase(copy)
, m_fullScore(copy.m_fullScore)
, m_futureScore(copy.m_futureScore)
, m_scoreBreakdown(copy.m_scoreBreakdown)
, m_alignTerm(copy.m_alignTerm)
, m_alignNonTerm(copy.m_alignNonTerm)
{
if (copy.m_lhsTarget) {
m_lhsTarget = new Word(*copy.m_lhsTarget);
} else {
m_lhsTarget = NULL;
}
if (copy.m_ruleSource) {
m_ruleSource = new Phrase(*copy.m_ruleSource);
} else {
m_ruleSource = NULL;
}
}
TargetPhrase::~TargetPhrase()
{
//cerr << "m_lhsTarget=" << m_lhsTarget << endl;
delete m_lhsTarget;
delete m_ruleSource;
}
#ifdef HAVE_PROTOBUF
void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const
{
pb->add_trg_words("[X,1]");
for (size_t pos = 0 ; pos < GetSize() ; pos++)
pb->add_trg_words(GetWord(pos)[0]->GetString());
}
#endif
void TargetPhrase::Evaluate(const Phrase &source)
{
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
Evaluate(source, ffs);
}
void TargetPhrase::Evaluate(const Phrase &source, const std::vector<FeatureFunction*> &ffs)
{
if (ffs.size()) {
const StaticData &staticData = StaticData::Instance();
ScoreComponentCollection futureScoreBreakdown;
for (size_t i = 0; i < ffs.size(); ++i) {
const FeatureFunction &ff = *ffs[i];
if (! staticData.IsFeatureFunctionIgnored( ff )) {
ff.Evaluate(source, *this, m_scoreBreakdown, futureScoreBreakdown);
}
}
float weightedScore = m_scoreBreakdown.GetWeightedScore();
m_futureScore += futureScoreBreakdown.GetWeightedScore();
m_fullScore = weightedScore + m_futureScore;
}
}
void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath)
{
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
const StaticData &staticData = StaticData::Instance();
ScoreComponentCollection futureScoreBreakdown;
for (size_t i = 0; i < ffs.size(); ++i) {
const FeatureFunction &ff = *ffs[i];
if (! staticData.IsFeatureFunctionIgnored( ff )) {
ff.Evaluate(input, inputPath, *this, m_scoreBreakdown, &futureScoreBreakdown);
}
}
float weightedScore = m_scoreBreakdown.GetWeightedScore();
m_futureScore += futureScoreBreakdown.GetWeightedScore();
m_fullScore = weightedScore + m_futureScore;
}
void TargetPhrase::SetXMLScore(float score)
{
const FeatureFunction* prod = PhraseDictionary::GetColl()[0];
size_t numScores = prod->GetNumScoreComponents();
vector <float> scoreVector(numScores,score/numScores);
m_scoreBreakdown.Assign(prod, scoreVector);
}
void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
{
AlignmentInfo::CollType alignTerm, alignNonTerm;
for (util::TokenIter<util::AnyCharacter, true> token(alignString, util::AnyCharacter(" \t")); token; ++token) {
util::TokenIter<util::SingleCharacter, false> dash(*token, util::SingleCharacter('-'));
char *endptr;
size_t sourcePos = strtoul(dash->data(), &endptr, 10);
UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash);
++dash;
size_t targetPos = strtoul(dash->data(), &endptr, 10);
UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash);
UTIL_THROW_IF2(++dash, "Extra gunk in alignment " << *token);
if (GetWord(targetPos).IsNonTerminal()) {
alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
} else {
alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
}
}
SetAlignTerm(alignTerm);
SetAlignNonTerm(alignNonTerm);
// cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n";
}
void TargetPhrase::SetAlignTerm(const AlignmentInfo::CollType &coll)
{
const AlignmentInfo *alignmentInfo = AlignmentInfoCollection::Instance().Add(coll);
m_alignTerm = alignmentInfo;
}
void TargetPhrase::SetAlignNonTerm(const AlignmentInfo::CollType &coll)
{
const AlignmentInfo *alignmentInfo = AlignmentInfoCollection::Instance().Add(coll);
m_alignNonTerm = alignmentInfo;
}
void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString)
{
m_scoreBreakdown.Assign(translationScoreProducer, sparseString.as_string());
}
void TargetPhrase::Merge(const TargetPhrase &copy, const std::vector<FactorType>& factorVec)
{
Phrase::MergeFactors(copy, factorVec);
m_scoreBreakdown.Merge(copy.GetScoreBreakdown());
m_futureScore += copy.m_futureScore;
m_fullScore += copy.m_fullScore;
}
void TargetPhrase::SetProperties(const StringPiece &str)
{
if (str.size() == 0) {
return;
}
vector<string> toks;
TokenizeMultiCharSeparator(toks, str.as_string(), "{{");
for (size_t i = 0; i < toks.size(); ++i) {
string &tok = toks[i];
if (tok.empty()) {
continue;
}
size_t endPos = tok.rfind("}");
tok = tok.substr(0, endPos - 1);
vector<string> keyValue = TokenizeFirstOnly(tok, " ");
UTIL_THROW_IF2(keyValue.size() != 2,
"Incorrect format of property: " << str);
SetProperty(keyValue[0], keyValue[1]);
}
}
void TargetPhrase::GetProperty(const std::string &key, std::string &value, bool &found) const
{
std::map<std::string, std::string>::const_iterator iter;
iter = m_properties.find(key);
if (iter == m_properties.end()) {
found = false;
} else {
found = true;
value = iter->second;
}
}
void TargetPhrase::SetRuleSource(const Phrase &ruleSource) const
{
if (m_ruleSource == NULL) {
m_ruleSource = new Phrase(ruleSource);
}
}
void swap(TargetPhrase &first, TargetPhrase &second)
{
first.SwapWords(second);
std::swap(first.m_fullScore, second.m_fullScore);
std::swap(first.m_futureScore, second.m_futureScore);
swap(first.m_scoreBreakdown, second.m_scoreBreakdown);
std::swap(first.m_alignTerm, second.m_alignTerm);
std::swap(first.m_alignNonTerm, second.m_alignNonTerm);
std::swap(first.m_lhsTarget, second.m_lhsTarget);
}
TO_STRING_BODY(TargetPhrase);
std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
{
if (tp.m_lhsTarget) {
os << *tp.m_lhsTarget<< " -> ";
}
{
if (copy.m_lhsTarget) {
m_lhsTarget = new Word(*copy.m_lhsTarget);
} else {
m_lhsTarget = NULL;
}
if (copy.m_ruleSource) {
m_ruleSource = new Phrase(*copy.m_ruleSource);
} else {
m_ruleSource = NULL;
}
}
TargetPhrase::~TargetPhrase()
{
//cerr << "m_lhsTarget=" << m_lhsTarget << endl;
delete m_lhsTarget;
delete m_ruleSource;
}
#ifdef HAVE_PROTOBUF
void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const
{
pb->add_trg_words("[X,1]");
for (size_t pos = 0 ; pos < GetSize() ; pos++)
pb->add_trg_words(GetWord(pos)[0]->GetString());
}
#endif
void TargetPhrase::Evaluate(const Phrase &source)
{
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
Evaluate(source, ffs);
}
void TargetPhrase::Evaluate(const Phrase &source, const std::vector<FeatureFunction*> &ffs)
{
if (ffs.size()) {
const StaticData &staticData = StaticData::Instance();
ScoreComponentCollection futureScoreBreakdown;
for (size_t i = 0; i < ffs.size(); ++i) {
const FeatureFunction &ff = *ffs[i];
if (! staticData.IsFeatureFunctionIgnored( ff )) {
ff.Evaluate(source, *this, m_scoreBreakdown, futureScoreBreakdown);
}
}
float weightedScore = m_scoreBreakdown.GetWeightedScore();
m_futureScore += futureScoreBreakdown.GetWeightedScore();
m_fullScore = weightedScore + m_futureScore;
}
}
void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath)
{
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
const StaticData &staticData = StaticData::Instance();
ScoreComponentCollection futureScoreBreakdown;
for (size_t i = 0; i < ffs.size(); ++i) {
const FeatureFunction &ff = *ffs[i];
if (! staticData.IsFeatureFunctionIgnored( ff )) {
ff.Evaluate(input, inputPath, *this, m_scoreBreakdown, &futureScoreBreakdown);
}
}
float weightedScore = m_scoreBreakdown.GetWeightedScore();
m_futureScore += futureScoreBreakdown.GetWeightedScore();
m_fullScore = weightedScore + m_futureScore;
}
void TargetPhrase::SetXMLScore(float score)
{
const FeatureFunction* prod = PhraseDictionary::GetColl()[0];
size_t numScores = prod->GetNumScoreComponents();
vector <float> scoreVector(numScores,score/numScores);
m_scoreBreakdown.Assign(prod, scoreVector);
}
void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
{
AlignmentInfo::CollType alignTerm, alignNonTerm;
for (util::TokenIter<util::AnyCharacter, true> token(alignString, util::AnyCharacter(" \t")); token; ++token) {
util::TokenIter<util::SingleCharacter, false> dash(*token, util::SingleCharacter('-'));
char *endptr;
size_t sourcePos = strtoul(dash->data(), &endptr, 10);
UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash);
++dash;
size_t targetPos = strtoul(dash->data(), &endptr, 10);
UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash);
UTIL_THROW_IF2(++dash, "Extra gunk in alignment " << *token);
if (GetWord(targetPos).IsNonTerminal()) {
alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
} else {
alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
}
}
SetAlignTerm(alignTerm);
SetAlignNonTerm(alignNonTerm);
// cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n";
}
void TargetPhrase::SetAlignTerm(const AlignmentInfo::CollType &coll)
{
const AlignmentInfo *alignmentInfo = AlignmentInfoCollection::Instance().Add(coll);
m_alignTerm = alignmentInfo;
}
void TargetPhrase::SetAlignNonTerm(const AlignmentInfo::CollType &coll)
{
const AlignmentInfo *alignmentInfo = AlignmentInfoCollection::Instance().Add(coll);
m_alignNonTerm = alignmentInfo;
}
void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString)
{
m_scoreBreakdown.Assign(translationScoreProducer, sparseString.as_string());
}
void TargetPhrase::Merge(const TargetPhrase &copy, const std::vector<FactorType>& factorVec)
{
Phrase::MergeFactors(copy, factorVec);
m_scoreBreakdown.Merge(copy.GetScoreBreakdown());
m_futureScore += copy.m_futureScore;
m_fullScore += copy.m_fullScore;
}
void TargetPhrase::SetProperties(const StringPiece &str)
{
if (str.size() == 0) {
return;
}
vector<string> toks;
TokenizeMultiCharSeparator(toks, str.as_string(), "{{");
for (size_t i = 0; i < toks.size(); ++i) {
string &tok = toks[i];
if (tok.empty()) {
continue;
}
size_t endPos = tok.rfind("}");
tok = tok.substr(0, endPos - 1);
vector<string> keyValue = TokenizeFirstOnly(tok, " ");
UTIL_THROW_IF2(keyValue.size() != 2,
"Incorrect format of property: " << str);
SetProperty(keyValue[0], keyValue[1]);
}
}
void TargetPhrase::GetProperty(const std::string &key, std::string &value, bool &found) const
{
std::map<std::string, std::string>::const_iterator iter;
iter = m_properties.find(key);
if (iter == m_properties.end()) {
found = false;
} else {
found = true;
value = iter->second;
}
}
void TargetPhrase::SetRuleSource(const Phrase &ruleSource) const
{
if (m_ruleSource == NULL) {
m_ruleSource = new Phrase(ruleSource);
}
}
void swap(TargetPhrase &first, TargetPhrase &second)
{
first.SwapWords(second);
std::swap(first.m_fullScore, second.m_fullScore);
std::swap(first.m_futureScore, second.m_futureScore);
swap(first.m_scoreBreakdown, second.m_scoreBreakdown);
std::swap(first.m_alignTerm, second.m_alignTerm);
std::swap(first.m_alignNonTerm, second.m_alignNonTerm);
std::swap(first.m_lhsTarget, second.m_lhsTarget);
}
TO_STRING_BODY(TargetPhrase);
std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
{
if (tp.m_lhsTarget) {
os << *tp.m_lhsTarget<< " -> ";
}
os << static_cast<const Phrase&>(tp) << ":" << flush;
// os << tp.GetAlignNonTerm() << flush;
os << ": term=" << tp.GetAlignTerm() << flush;
os << ": nonterm=" << tp.GetAlignNonTerm() << flush;
os << ": c=" << tp.m_fullScore << flush;
os << " " << tp.m_scoreBreakdown << flush;
const Phrase *sourcePhrase = tp.GetRuleSource();
if (sourcePhrase) {
os << " sourcePhrase=" << *sourcePhrase << flush;
}
return os;
}
os << static_cast<const Phrase&>(tp) << ":" << flush;
// os << tp.GetAlignNonTerm() << flush;
os << ": term=" << tp.GetAlignTerm() << flush;
os << ": nonterm=" << tp.GetAlignNonTerm() << flush;
os << ": c=" << tp.m_fullScore << flush;
os << " " << tp.m_scoreBreakdown << flush;
const Phrase *sourcePhrase = tp.GetRuleSource();
if (sourcePhrase) {
os << " sourcePhrase=" << *sourcePhrase << flush;
}
return os;
}
}

View File

@ -136,22 +136,22 @@ SetFeaturesToApply()
}
}
// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
void
PhraseDictionary::
Release(TargetPhraseCollection const* tpc) const
{
// do nothing by default
return;
}
bool
PhraseDictionary::
PrefixExists(Phrase const& phrase) const
{
return true;
}
// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
void
PhraseDictionary::
Release(TargetPhraseCollection const* tpc) const
{
// do nothing by default
return;
}
bool
PhraseDictionary::
PrefixExists(Phrase const& phrase) const
{
return true;
}
void
PhraseDictionary::

View File

@ -91,7 +91,7 @@ public:
void
Release(TargetPhraseCollection const* tpc) const;
/// return true if phrase table entries starting with /phrase/
/// return true if phrase table entries starting with /phrase/
// exist in the table.
virtual
bool

File diff suppressed because it is too large Load Diff

View File

@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@ -43,126 +43,130 @@
namespace Moses
{
class ChartParser;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
/** Implementation of a Cache-based phrase table.
*/
class PhraseDictionaryDynamicCacheBased : public PhraseDictionary
{
typedef std::vector<unsigned int> AgeCollection;
typedef std::pair<TargetPhraseCollection*, AgeCollection*> TargetCollectionAgePair;
typedef std::map<Phrase, TargetCollectionAgePair> cacheMap;
// data structure for the cache
cacheMap m_cacheTM;
std::vector<Scores> precomputedScores;
unsigned int m_maxAge;
size_t m_score_type; //scoring type of the match
size_t m_entries; //total number of entries in the cache
float m_lower_score; //lower_bound_score for no match
std::string m_initfiles; // vector of files loaded in the initialization phase
std::string m_name; // internal name to identify this instance of the Cache-based phrase table
class ChartParser;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
/** Implementation of a Cache-based phrase table.
*/
class PhraseDictionaryDynamicCacheBased : public PhraseDictionary
{
typedef std::vector<unsigned int> AgeCollection;
typedef std::pair<TargetPhraseCollection*, AgeCollection*> TargetCollectionAgePair;
typedef std::map<Phrase, TargetCollectionAgePair> cacheMap;
// data structure for the cache
cacheMap m_cacheTM;
std::vector<Scores> precomputedScores;
unsigned int m_maxAge;
size_t m_score_type; //scoring type of the match
size_t m_entries; //total number of entries in the cache
float m_lower_score; //lower_bound_score for no match
std::string m_initfiles; // vector of files loaded in the initialization phase
std::string m_name; // internal name to identify this instance of the Cache-based phrase table
#ifdef WITH_THREADS
//multiple readers - single writer lock
mutable boost::shared_mutex m_cacheLock;
//multiple readers - single writer lock
mutable boost::shared_mutex m_cacheLock;
#endif
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryDynamicCacheBased&);
public:
PhraseDictionaryDynamicCacheBased(const std::string &line);
~PhraseDictionaryDynamicCacheBased();
inline const std::string GetName() { return m_name; };
inline void SetName(const std::string name){ m_name = name; }
static const PhraseDictionaryDynamicCacheBased& Instance(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The PhraseDictionaryDynamicCacheBased feature named " + name + " does not exist!");
return *(s_instance_map[name]);
}
static PhraseDictionaryDynamicCacheBased& InstanceNonConst(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The PhraseDictionaryDynamicCacheBased feature named " + name + " does not exist!");
return *(s_instance_map[name]);
}
static const PhraseDictionaryDynamicCacheBased& Instance() {
return *s_instance;
}
static PhraseDictionaryDynamicCacheBased& InstanceNonConst() {
return *s_instance;
}
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryDynamicCacheBased&);
public:
PhraseDictionaryDynamicCacheBased(const std::string &line);
~PhraseDictionaryDynamicCacheBased();
inline const std::string GetName() {
return m_name;
};
inline void SetName(const std::string name) {
m_name = name;
}
static const PhraseDictionaryDynamicCacheBased& Instance(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The PhraseDictionaryDynamicCacheBased feature named " + name + " does not exist!");
return *(s_instance_map[name]);
}
static PhraseDictionaryDynamicCacheBased& InstanceNonConst(const std::string name) {
UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The PhraseDictionaryDynamicCacheBased feature named " + name + " does not exist!");
return *(s_instance_map[name]);
}
static const PhraseDictionaryDynamicCacheBased& Instance() {
return *s_instance;
}
static PhraseDictionaryDynamicCacheBased& InstanceNonConst() {
return *s_instance;
}
void Load();
void Load(const std::string file);
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &src) const;
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
// for phrase-based model
// void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
// for syntax/hiero model (CKY+ decoding)
ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t);
void SetParameter(const std::string& key, const std::string& value);
void InitializeForInput(InputType const& source);
// virtual void InitializeForInput(InputType const&) {
// /* Don't do anything source specific here as this object is shared between threads.*/
// }
void Print() const; // prints the cache
void Clear(); // clears the cache
void ClearEntries(std::string &entries);
void ClearSource(std::string &entries);
void Insert(std::string &entries);
void Execute(std::string command);
void ExecuteDlt(std::map<std::string, std::string> dlt_meta);
void SetScoreType(size_t type);
void SetMaxAge(unsigned int age);
protected:
static PhraseDictionaryDynamicCacheBased *s_instance;
static std::map< const std::string, PhraseDictionaryDynamicCacheBased * > s_instance_map;
float decaying_score(const int age); // calculates the decay score given the age
void Insert(std::vector<std::string> entries);
void Decay(); // traverse through the cache and decay each entry
void Decay(Phrase p); // traverse through the cache and decay each entry for a given Phrase
void Update(std::vector<std::string> entries, std::string ageString);
void Update(std::string sourceString, std::string targetString, std::string ageString, std::string waString="");
void Update(Phrase p, Phrase tp, int age, std::string waString="");
void ClearEntries(std::vector<std::string> entries);
void ClearEntries(std::string sourceString, std::string targetString);
void ClearEntries(Phrase p, Phrase tp);
void ClearSource(std::vector<std::string> entries);
void ClearSource(Phrase sp);
void Execute(std::vector<std::string> commands);
void Execute_Single_Command(std::string command);
void SetPreComputedScores(const unsigned int numScoreComponent);
Scores GetPreComputedScores(const unsigned int age);
void Load_Multiple_Files(std::vector<std::string> files);
void Load_Single_File(const std::string file);
TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const;
};
void Load();
void Load(const std::string file);
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &src) const;
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
// for phrase-based model
// void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
// for syntax/hiero model (CKY+ decoding)
ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t);
void SetParameter(const std::string& key, const std::string& value);
void InitializeForInput(InputType const& source);
// virtual void InitializeForInput(InputType const&) {
// /* Don't do anything source specific here as this object is shared between threads.*/
// }
void Print() const; // prints the cache
void Clear(); // clears the cache
void ClearEntries(std::string &entries);
void ClearSource(std::string &entries);
void Insert(std::string &entries);
void Execute(std::string command);
void ExecuteDlt(std::map<std::string, std::string> dlt_meta);
void SetScoreType(size_t type);
void SetMaxAge(unsigned int age);
protected:
static PhraseDictionaryDynamicCacheBased *s_instance;
static std::map< const std::string, PhraseDictionaryDynamicCacheBased * > s_instance_map;
float decaying_score(const int age); // calculates the decay score given the age
void Insert(std::vector<std::string> entries);
void Decay(); // traverse through the cache and decay each entry
void Decay(Phrase p); // traverse through the cache and decay each entry for a given Phrase
void Update(std::vector<std::string> entries, std::string ageString);
void Update(std::string sourceString, std::string targetString, std::string ageString, std::string waString="");
void Update(Phrase p, Phrase tp, int age, std::string waString="");
void ClearEntries(std::vector<std::string> entries);
void ClearEntries(std::string sourceString, std::string targetString);
void ClearEntries(Phrase p, Phrase tp);
void ClearSource(std::vector<std::string> entries);
void ClearSource(Phrase sp);
void Execute(std::vector<std::string> commands);
void Execute_Single_Command(std::string command);
void SetPreComputedScores(const unsigned int numScoreComponent);
Scores GetPreComputedScores(const unsigned int age);
void Load_Multiple_Files(std::vector<std::string> files);
void Load_Single_File(const std::string file);
TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const;
};
} // namespace Moses
#endif /* moses_PhraseDictionaryDynamicCacheBased_H_ */

View File

@ -65,7 +65,7 @@ PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const W
PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
{
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
"Not a non-terminal: " << targetNonTerm);
"Not a non-terminal: " << targetNonTerm);
return &m_nonTermMap[targetNonTerm];
}
@ -95,7 +95,7 @@ const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetChild(const Wor
const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetNonTerminalChild(const Word &targetNonTerm) const
{
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
"Not a non-terminal: " << targetNonTerm);
"Not a non-terminal: " << targetNonTerm);
NonTerminalMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
return (p == m_nonTermMap.end()) ? NULL : &p->second;

View File

@ -59,8 +59,8 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
CacheColl &cache = GetCache();
CacheColl::iterator iter;
iter = cache.find(hash);
CacheColl::iterator iter;
iter = cache.find(hash);
if (iter != cache.end()) {
// already in cache

View File

@ -20,11 +20,11 @@ namespace Moses
/** constructor; just initialize the base class */
TranslationOptionCollectionConfusionNet::
TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
size_t maxNoTransOptPerCoverage,
float translationOptionThreshold)
: TranslationOptionCollection(input, maxNoTransOptPerCoverage,
translationOptionThreshold)
TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
size_t maxNoTransOptPerCoverage,
float translationOptionThreshold)
: TranslationOptionCollection(input, maxNoTransOptPerCoverage,
translationOptionThreshold)
{
// Prefix checkers are phrase dictionaries that provide a prefix check
// to indicate that a phrase table entry with a given prefix exists.
@ -32,8 +32,8 @@ TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
// expanding it further.
vector<PhraseDictionary*> prefixCheckers;
BOOST_FOREACH(PhraseDictionary* pd, PhraseDictionary::GetColl())
if (pd->ProvidesPrefixCheck()) prefixCheckers.push_back(pd);
if (pd->ProvidesPrefixCheck()) prefixCheckers.push_back(pd);
const InputFeature &inputFeature = InputFeature::Instance();
UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified");
@ -103,10 +103,10 @@ TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
Phrase subphrase(prevPhrase);
subphrase.AddWord(word);
bool OK = prefixCheckers.size() == 0;
for (size_t k = 0; !OK && k < prefixCheckers.size(); ++k)
OK = prefixCheckers[k]->PrefixExists(subphrase);
if (!OK) continue;
bool OK = prefixCheckers.size() == 0;
for (size_t k = 0; !OK && k < prefixCheckers.size(); ++k)
OK = prefixCheckers[k]->PrefixExists(subphrase);
if (!OK) continue;
const ScorePair &scores = col[i].second;
ScorePair *inputScore = new ScorePair(*prevInputScore);
@ -122,8 +122,8 @@ TranslationOptionCollectionConfusionNet(const ConfusionNet &input,
} // for (iterPath = prevPaths.begin(); iterPath != prevPaths.end(); ++iterPath) {
}
}
// cerr << "HAVE " << m_inputPathQueue.size()
// << " input paths of max. length "
// cerr << "HAVE " << m_inputPathQueue.size()
// << " input paths of max. length "
// << maxSizePhrase << "." << endl;
}
@ -249,9 +249,9 @@ void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLE
// go thru each intermediate trans opt just created
const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();
vector<TranslationOption*>::const_iterator iterPartialTranslOpt;
for (iterPartialTranslOpt = partTransOptList.begin();
iterPartialTranslOpt != partTransOptList.end();
++iterPartialTranslOpt) {
for (iterPartialTranslOpt = partTransOptList.begin();
iterPartialTranslOpt != partTransOptList.end();
++iterPartialTranslOpt) {
TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;
if (transStep) {

View File

@ -136,12 +136,12 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
const WordsRange &range = path.GetWordsRange();
if (tpColl && tpColl->GetSize()) {
TargetPhraseCollection::const_iterator iter;
for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
const TargetPhrase &tp = **iter;
TranslationOption *transOpt = new TranslationOption(range, tp);
transOpt->SetInputPath(path);
transOpt->Evaluate(m_source);
TargetPhraseCollection::const_iterator iter;
for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
const TargetPhrase &tp = **iter;
TranslationOption *transOpt = new TranslationOption(range, tp);
transOpt->SetInputPath(path);
transOpt->Evaluate(m_source);
Add(transOpt);
}

View File

@ -3,17 +3,17 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@ -42,305 +42,301 @@ using namespace std;
namespace Moses
{
//global variable
Timer g_timer;
string GetTempFolder()
{
#ifdef _WIN32
char *tmpPath = getenv("TMP");
string str(tmpPath);
if (str.substr(str.size() - 1, 1) != "\\")
str += "\\";
return str;
#else
return "/tmp/";
#endif
}
const std::string ToLower(const std::string& str)
{
std::string lc(str);
std::transform(lc.begin(), lc.end(), lc.begin(), (int(*)(int))std::tolower);
return lc;
}
class BoolValueException : public util::Exception {};
template<>
bool Scan<bool>(const std::string &input)
{
std::string lc = ToLower(input);
if (lc == "yes" || lc == "y" || lc == "true" || lc == "1")
return true;
if (lc == "no" || lc == "n" || lc =="false" || lc == "0")
return false;
UTIL_THROW(BoolValueException, "Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0.");
}
bool FileExists(const std::string& filePath)
{
ifstream ifs(filePath.c_str());
return !ifs.fail();
}
const std::string Trim(const std::string& str, const std::string dropChars)
{
std::string res = str;
res.erase(str.find_last_not_of(dropChars)+1);
return res.erase(0, res.find_first_not_of(dropChars));
}
void ResetUserTime()
{
g_timer.start();
};
void PrintUserTime(const std::string &message)
{
g_timer.check(message.c_str());
}
double GetUserTime()
{
return g_timer.get_elapsed_time();
}
std::vector< std::map<std::string, std::string> > ProcessAndStripDLT(std::string &line)
{
std::vector< std::map<std::string, std::string> > meta;
std::string lline = ToLower(line);
bool check_dlt = true;
//allowed format of dlt tag
//<dlt type="name" id="name" attr="value"/>
//the type attribute is mandatory; the name should not contain any double quotation mark
//the id attribute is optional; if present, the name should not contain any double quotation mark
//only one additional attribute is possible; value can contain double quotation marks
//both name and value must be surrounded by double quotation mark
// std::cerr << "GLOBAL START" << endl;
while (check_dlt) {
size_t start = lline.find("<dlt");
if (start == std::string::npos) {
//no more dlt tags
check_dlt = false;
continue;
}
size_t close = lline.find("/>");
if (close == std::string::npos) {
// error: dlt tag is not ended
check_dlt = false;
continue;
}
//std::string dlt = Trim(lline.substr(start+4, close-start-4));
std::string dlt = Trim(line.substr(start+4, close-start-4));
line.erase(start,close-start+2);
lline.erase(start,close-start+2);
if (dlt != "") {
std::map<std::string, std::string> tmp_meta;
//check if type is present and store it
size_t start_type = dlt.find("type=");
size_t len_type=4;
if (start_type != std::string::npos) {
//type is present
//required format type="value"
//double quotation mark is required
std::string val_type;
std::string label_type = dlt.substr(start_type, len_type);
if (dlt[start_type+len_type+1] == '"'){
val_type = dlt.substr(start_type+len_type+2);
size_t close_type = val_type.find('"');
val_type = val_type.substr(0, close_type);
dlt.erase(start_type,start_type+len_type+2+close_type+1);
}
else{
TRACE_ERR("DLT parse error: missing character \" for type \n");
}
label_type = Trim(label_type);
dlt = Trim(dlt);
tmp_meta[label_type] = val_type;
}
else{
//type is not present
UTIL_THROW(util::Exception, "ProcessAndStripDLT(std::string &line): Attribute type for dlt tag is mandatory.");
}
//check if id is present and store it
size_t start_id = dlt.find("id=");
size_t len_id=2;
if (start_id != std::string::npos) {
//id is present
//required format id="name"
//double quotation mark is required
std::string val_id;
std::string label_id = dlt.substr(start_id, len_id);
if (dlt[start_id+len_id+1] == '"'){
val_id = dlt.substr(start_id+len_id+2);
size_t close_id = val_id.find('"');
val_id = val_id.substr(0, close_id);
dlt.erase(start_id,start_id+len_id+2+close_id+1);
}
else{
TRACE_ERR("DLT parse error: missing character \" for id \n");
}
label_id = Trim(label_id);
dlt = Trim(dlt);
tmp_meta[label_id] = val_id;
}
else{
//id is not present
//do nothing
}
for (size_t i = 1; i < dlt.size(); i++) {
if (dlt[i] == '=') {
std::string label = dlt.substr(0, i);
std::string val = dlt.substr(i+1);
if (val[0] == '"') {
val = val.substr(1);
// it admits any double quotation mark (but is attribute) in the value of the attribute
// it assumes that just one attribute (besides id attribute) is present in the tag,
// it assumes that the value starts and ends with double quotation mark
size_t close = val.rfind('"');
if (close == std::string::npos) {
TRACE_ERR("SGML parse error: missing \"\n");
dlt = "";
i = 0;
} else {
dlt = val.substr(close+1);
val = val.substr(0, close);
i = 0;
}
} else {
size_t close = val.find(' ');
if (close == std::string::npos) {
dlt = "";
i = 0;
} else {
dlt = val.substr(close+1);
val = val.substr(0, close);
}
}
label = Trim(label);
dlt = Trim(dlt);
tmp_meta[label] = val;
}
}
meta.push_back(tmp_meta);
}
}
//global variable
Timer g_timer;
string GetTempFolder()
{
#ifdef _WIN32
char *tmpPath = getenv("TMP");
string str(tmpPath);
if (str.substr(str.size() - 1, 1) != "\\")
str += "\\";
return str;
#else
return "/tmp/";
#endif
}
const std::string ToLower(const std::string& str)
{
std::string lc(str);
std::transform(lc.begin(), lc.end(), lc.begin(), (int(*)(int))std::tolower);
return lc;
}
class BoolValueException : public util::Exception {};
template<>
bool Scan<bool>(const std::string &input)
{
std::string lc = ToLower(input);
if (lc == "yes" || lc == "y" || lc == "true" || lc == "1")
return true;
if (lc == "no" || lc == "n" || lc =="false" || lc == "0")
return false;
UTIL_THROW(BoolValueException, "Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0.");
}
bool FileExists(const std::string& filePath)
{
ifstream ifs(filePath.c_str());
return !ifs.fail();
}
const std::string Trim(const std::string& str, const std::string dropChars)
{
std::string res = str;
res.erase(str.find_last_not_of(dropChars)+1);
return res.erase(0, res.find_first_not_of(dropChars));
}
void ResetUserTime()
{
g_timer.start();
};
void PrintUserTime(const std::string &message)
{
g_timer.check(message.c_str());
}
double GetUserTime()
{
return g_timer.get_elapsed_time();
}
std::vector< std::map<std::string, std::string> > ProcessAndStripDLT(std::string &line)
{
std::vector< std::map<std::string, std::string> > meta;
std::string lline = ToLower(line);
bool check_dlt = true;
//allowed format of dlt tag
//<dlt type="name" id="name" attr="value"/>
//the type attribute is mandatory; the name should not contain any double quotation mark
//the id attribute is optional; if present, the name should not contain any double quotation mark
//only one additional attribute is possible; value can contain double quotation marks
//both name and value must be surrounded by double quotation mark
// std::cerr << "GLOBAL START" << endl;
while (check_dlt) {
size_t start = lline.find("<dlt");
if (start == std::string::npos) {
//no more dlt tags
check_dlt = false;
continue;
}
size_t close = lline.find("/>");
if (close == std::string::npos) {
// error: dlt tag is not ended
check_dlt = false;
continue;
}
//std::string dlt = Trim(lline.substr(start+4, close-start-4));
std::string dlt = Trim(line.substr(start+4, close-start-4));
line.erase(start,close-start+2);
lline.erase(start,close-start+2);
if (dlt != "") {
std::map<std::string, std::string> tmp_meta;
//check if type is present and store it
size_t start_type = dlt.find("type=");
size_t len_type=4;
if (start_type != std::string::npos) {
//type is present
//required format type="value"
//double quotation mark is required
std::string val_type;
std::string label_type = dlt.substr(start_type, len_type);
if (dlt[start_type+len_type+1] == '"') {
val_type = dlt.substr(start_type+len_type+2);
size_t close_type = val_type.find('"');
val_type = val_type.substr(0, close_type);
dlt.erase(start_type,start_type+len_type+2+close_type+1);
} else {
TRACE_ERR("DLT parse error: missing character \" for type \n");
}
label_type = Trim(label_type);
dlt = Trim(dlt);
tmp_meta[label_type] = val_type;
} else {
//type is not present
UTIL_THROW(util::Exception, "ProcessAndStripDLT(std::string &line): Attribute type for dlt tag is mandatory.");
}
//check if id is present and store it
size_t start_id = dlt.find("id=");
size_t len_id=2;
if (start_id != std::string::npos) {
//id is present
//required format id="name"
//double quotation mark is required
std::string val_id;
std::string label_id = dlt.substr(start_id, len_id);
if (dlt[start_id+len_id+1] == '"') {
val_id = dlt.substr(start_id+len_id+2);
size_t close_id = val_id.find('"');
val_id = val_id.substr(0, close_id);
dlt.erase(start_id,start_id+len_id+2+close_id+1);
} else {
TRACE_ERR("DLT parse error: missing character \" for id \n");
}
label_id = Trim(label_id);
dlt = Trim(dlt);
tmp_meta[label_id] = val_id;
} else {
//id is not present
//do nothing
}
for (size_t i = 1; i < dlt.size(); i++) {
if (dlt[i] == '=') {
std::string label = dlt.substr(0, i);
std::string val = dlt.substr(i+1);
if (val[0] == '"') {
val = val.substr(1);
// it admits any double quotation mark (but is attribute) in the value of the attribute
// it assumes that just one attribute (besides id attribute) is present in the tag,
// it assumes that the value starts and ends with double quotation mark
size_t close = val.rfind('"');
if (close == std::string::npos) {
TRACE_ERR("SGML parse error: missing \"\n");
dlt = "";
i = 0;
} else {
dlt = val.substr(close+1);
val = val.substr(0, close);
i = 0;
}
} else {
size_t close = val.find(' ');
if (close == std::string::npos) {
dlt = "";
i = 0;
} else {
dlt = val.substr(close+1);
val = val.substr(0, close);
}
}
label = Trim(label);
dlt = Trim(dlt);
tmp_meta[label] = val;
}
}
meta.push_back(tmp_meta);
}
}
// std::cerr << "GLOBAL END" << endl;
return meta;
}
std::map<std::string, std::string> ProcessAndStripSGML(std::string &line)
{
std::map<std::string, std::string> meta;
std::string lline = ToLower(line);
if (lline.find("<seg")!=0) return meta;
size_t close = lline.find(">");
if (close == std::string::npos) return meta; // error
size_t end = lline.find("</seg>");
std::string seg = Trim(lline.substr(4, close-4));
std::string text = line.substr(close+1, end - close - 1);
for (size_t i = 1; i < seg.size(); i++) {
if (seg[i] == '=' && seg[i-1] == ' ') {
std::string less = seg.substr(0, i-1) + seg.substr(i);
seg = less;
i = 0;
continue;
}
if (seg[i] == '=' && seg[i+1] == ' ') {
std::string less = seg.substr(0, i+1);
if (i+2 < seg.size()) less += seg.substr(i+2);
seg = less;
i = 0;
continue;
}
}
line = Trim(text);
if (seg == "") return meta;
for (size_t i = 1; i < seg.size(); i++) {
if (seg[i] == '=') {
std::string label = seg.substr(0, i);
std::string val = seg.substr(i+1);
if (val[0] == '"') {
val = val.substr(1);
size_t close = val.find('"');
if (close == std::string::npos) {
TRACE_ERR("SGML parse error: missing \"\n");
seg = "";
i = 0;
} else {
seg = val.substr(close+1);
val = val.substr(0, close);
i = 0;
}
} else {
size_t close = val.find(' ');
if (close == std::string::npos) {
seg = "";
i = 0;
} else {
seg = val.substr(close+1);
val = val.substr(0, close);
}
}
label = Trim(label);
seg = Trim(seg);
meta[label] = val;
}
}
return meta;
}
std::string PassthroughSGML(std::string &line, const std::string tagName, const std::string& lbrackStr, const std::string& rbrackStr)
{
string lbrack = lbrackStr; // = "<";
string rbrack = rbrackStr; // = ">";
std::string meta = "";
std::string lline = ToLower(line);
size_t open = lline.find(lbrack+tagName);
//check whether the tag exists; if not return the empty string
if (open == std::string::npos) return meta;
size_t close = lline.find(rbrack, open);
//check whether the tag is closed with '/>'; if not return the empty string
if (close == std::string::npos) {
TRACE_ERR("PassthroughSGML error: the <passthrough info/> tag does not end properly\n");
return meta;
}
// extract the tag
std::string tmp = line.substr(open, close - open + 1);
meta = line.substr(open, close - open + 1);
// strip the tag from the line
line = line.substr(0, open) + line.substr(close + 1, std::string::npos);
TRACE_ERR("The input contains a <passthrough info/> tag:" << meta << std::endl);
lline = ToLower(line);
open = lline.find(lbrack+tagName);
if (open != std::string::npos) {
TRACE_ERR("PassthroughSGML error: there are two <passthrough> tags\n");
}
return meta;
}
return meta;
}
std::map<std::string, std::string> ProcessAndStripSGML(std::string &line)
{
std::map<std::string, std::string> meta;
std::string lline = ToLower(line);
if (lline.find("<seg")!=0) return meta;
size_t close = lline.find(">");
if (close == std::string::npos) return meta; // error
size_t end = lline.find("</seg>");
std::string seg = Trim(lline.substr(4, close-4));
std::string text = line.substr(close+1, end - close - 1);
for (size_t i = 1; i < seg.size(); i++) {
if (seg[i] == '=' && seg[i-1] == ' ') {
std::string less = seg.substr(0, i-1) + seg.substr(i);
seg = less;
i = 0;
continue;
}
if (seg[i] == '=' && seg[i+1] == ' ') {
std::string less = seg.substr(0, i+1);
if (i+2 < seg.size()) less += seg.substr(i+2);
seg = less;
i = 0;
continue;
}
}
line = Trim(text);
if (seg == "") return meta;
for (size_t i = 1; i < seg.size(); i++) {
if (seg[i] == '=') {
std::string label = seg.substr(0, i);
std::string val = seg.substr(i+1);
if (val[0] == '"') {
val = val.substr(1);
size_t close = val.find('"');
if (close == std::string::npos) {
TRACE_ERR("SGML parse error: missing \"\n");
seg = "";
i = 0;
} else {
seg = val.substr(close+1);
val = val.substr(0, close);
i = 0;
}
} else {
size_t close = val.find(' ');
if (close == std::string::npos) {
seg = "";
i = 0;
} else {
seg = val.substr(close+1);
val = val.substr(0, close);
}
}
label = Trim(label);
seg = Trim(seg);
meta[label] = val;
}
}
return meta;
}
std::string PassthroughSGML(std::string &line, const std::string tagName, const std::string& lbrackStr, const std::string& rbrackStr)
{
string lbrack = lbrackStr; // = "<";
string rbrack = rbrackStr; // = ">";
std::string meta = "";
std::string lline = ToLower(line);
size_t open = lline.find(lbrack+tagName);
//check whether the tag exists; if not return the empty string
if (open == std::string::npos) return meta;
size_t close = lline.find(rbrack, open);
//check whether the tag is closed with '/>'; if not return the empty string
if (close == std::string::npos) {
TRACE_ERR("PassthroughSGML error: the <passthrough info/> tag does not end properly\n");
return meta;
}
// extract the tag
std::string tmp = line.substr(open, close - open + 1);
meta = line.substr(open, close - open + 1);
// strip the tag from the line
line = line.substr(0, open) + line.substr(close + 1, std::string::npos);
TRACE_ERR("The input contains a <passthrough info/> tag:" << meta << std::endl);
lline = ToLower(line);
open = lline.find(lbrack+tagName);
if (open != std::string::npos) {
TRACE_ERR("PassthroughSGML error: there are two <passthrough> tags\n");
}
return meta;
}
}

View File

@ -98,45 +98,44 @@ StringPiece Word::GetString(FactorType factorType) const
class StrayFactorException : public util::Exception {};
void
void
Word::
CreateFromString(FactorDirection direction
, const std::vector<FactorType> &factorOrder
, const StringPiece &str
, bool isNonTerminal
, bool strict)
, const std::vector<FactorType> &factorOrder
, const StringPiece &str
, bool isNonTerminal
, bool strict)
{
FactorCollection &factorCollection = FactorCollection::Instance();
vector<StringPiece> bits(MAX_NUM_FACTORS);
util::TokenIter<util::MultiCharacter>
fit(str, StaticData::Instance().GetFactorDelimiter());
util::TokenIter<util::MultiCharacter>
fit(str, StaticData::Instance().GetFactorDelimiter());
size_t i = 0;
for (; i < MAX_NUM_FACTORS && fit; ++i,++fit)
bits[i] = *fit;
if (i == MAX_NUM_FACTORS)
UTIL_THROW_IF(fit, StrayFactorException,
"The hard limit for factors is " << MAX_NUM_FACTORS
<< ". The word " << str << " contains factor delimiter "
<< StaticData::Instance().GetFactorDelimiter()
<< " too many times.");
UTIL_THROW_IF(fit, StrayFactorException,
"The hard limit for factors is " << MAX_NUM_FACTORS
<< ". The word " << str << " contains factor delimiter "
<< StaticData::Instance().GetFactorDelimiter()
<< " too many times.");
if (strict)
UTIL_THROW_IF(fit, StrayFactorException,
"You have configured " << factorOrder.size()
<< " factors but the word " << str
<< " contains factor delimiter "
<< StaticData::Instance().GetFactorDelimiter()
<< " too many times.");
UTIL_THROW_IF(i < factorOrder.size(),util::Exception,
"Too few factors in string '" << str << "'.");
for (size_t k = 0; k < factorOrder.size(); ++k)
{
UTIL_THROW_IF(factorOrder[k] >= MAX_NUM_FACTORS, util::Exception,
"Factor order out of bounds.");
m_factorArray[factorOrder[k]] = factorCollection.AddFactor(bits[k], isNonTerminal);
}
UTIL_THROW_IF(fit, StrayFactorException,
"You have configured " << factorOrder.size()
<< " factors but the word " << str
<< " contains factor delimiter "
<< StaticData::Instance().GetFactorDelimiter()
<< " too many times.");
UTIL_THROW_IF(i < factorOrder.size(),util::Exception,
"Too few factors in string '" << str << "'.");
for (size_t k = 0; k < factorOrder.size(); ++k) {
UTIL_THROW_IF(factorOrder[k] >= MAX_NUM_FACTORS, util::Exception,
"Factor order out of bounds.");
m_factorArray[factorOrder[k]] = factorCollection.AddFactor(bits[k], isNonTerminal);
}
// assume term/non-term same for all factors
m_isNonTerminal = isNonTerminal;
}

View File

@ -152,7 +152,7 @@ public:
, const std::vector<FactorType> &factorOrder
, const StringPiece &str
, bool isNonTerminal
, bool strict = true);
, bool strict = true);
void CreateUnknownWord(const Word &sourceWord);

View File

@ -49,11 +49,11 @@ void WordLattice::Print(std::ostream& out) const
out<<"\n\n";
}
int
int
WordLattice::
InitializeFromPCNDataType
(const PCN::CN& cn,
const std::vector<FactorType>& factorOrder,
(const PCN::CN& cn,
const std::vector<FactorType>& factorOrder,
const std::string& debug_line)
{
const StaticData &staticData = StaticData::Instance();
@ -78,20 +78,20 @@ InitializeFromPCNDataType
//check for correct number of link parameters
if (alt.m_denseFeatures.size() != numInputScores) {
TRACE_ERR("ERROR: need " << numInputScores
<< " link parameters, found "
<< alt.m_denseFeatures.size()
<< " while reading column " << i
<< " from " << debug_line << "\n");
TRACE_ERR("ERROR: need " << numInputScores
<< " link parameters, found "
<< alt.m_denseFeatures.size()
<< " while reading column " << i
<< " from " << debug_line << "\n");
return false;
}
//check each element for bounds
std::vector<float>::const_iterator probsIterator;
data[i][j].second = std::vector<float>(0);
for(probsIterator = alt.m_denseFeatures.begin();
probsIterator < alt.m_denseFeatures.end();
probsIterator++) {
for(probsIterator = alt.m_denseFeatures.begin();
probsIterator < alt.m_denseFeatures.end();
probsIterator++) {
IFVERBOSE(1) {
if (*probsIterator < 0.0f) {
TRACE_ERR("WARN: neg probability: " << *probsIterator << "\n");

View File

@ -66,7 +66,7 @@ int WordsBitmap::GetFutureCosts(int lastPos) const
bool WordsBitmap::IsAdjacent(size_t startPos, size_t endPos) const
{
if (GetNumWordsCovered() == 0) {
return true;
return true;
}
size_t first = GetFirstGapPos();