mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-08-16 15:00:33 +03:00
beautify
This commit is contained in:
parent
a098550f33
commit
bd83999264
@ -161,13 +161,13 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
||||
}
|
||||
|
||||
if (m_translations.size() > 1) {
|
||||
UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
|
||||
"Non-monotonic future score");
|
||||
UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
|
||||
"Non-monotonic future score");
|
||||
}
|
||||
|
||||
if (m_hypotheses.size() > 1) {
|
||||
UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(),
|
||||
"Non-monotonic total score");
|
||||
"Non-monotonic total score");
|
||||
}
|
||||
|
||||
HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
|
||||
@ -442,7 +442,7 @@ BitmapContainer::ProcessBestHypothesis()
|
||||
if (!Empty()) {
|
||||
HypothesisQueueItem *check = Dequeue(true);
|
||||
UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(),
|
||||
"Non-monotonic total score");
|
||||
"Non-monotonic total score");
|
||||
}
|
||||
|
||||
// Logging for the criminally insane
|
||||
|
@ -108,12 +108,12 @@ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
|
||||
if (sourcePosSet.size() == 1) {
|
||||
const std::vector<const Word*> *ruleSourceFromInputPath = GetTranslationOption().GetSourceRuleFromInputPath();
|
||||
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
|
||||
"No source rule");
|
||||
"No source rule");
|
||||
|
||||
size_t sourcePos = *sourcePosSet.begin();
|
||||
const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
|
||||
UTIL_THROW_IF2(sourceWord == NULL,
|
||||
"No source word");
|
||||
"No source word");
|
||||
const Factor *factor = sourceWord->GetFactor(placeholderFactor);
|
||||
if (factor) {
|
||||
outPhrase.Back()[0] = factor;
|
||||
@ -247,10 +247,10 @@ void ChartHypothesis::CleanupArcList()
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize) {
|
||||
// prune arc list only if there too many arcs
|
||||
NTH_ELEMENT4(m_arcList->begin()
|
||||
, m_arcList->begin() + nBestSize - 1
|
||||
, m_arcList->end()
|
||||
, CompareChartChartHypothesisTotalScore());
|
||||
NTH_ELEMENT4(m_arcList->begin()
|
||||
, m_arcList->begin() + nBestSize - 1
|
||||
, m_arcList->end()
|
||||
, CompareChartChartHypothesisTotalScore());
|
||||
|
||||
// delete bad ones
|
||||
ChartArcList::iterator iter;
|
||||
|
@ -89,7 +89,7 @@ bool ChartHypothesisCollection::AddHypothesis(ChartHypothesis *hypo, ChartManage
|
||||
HCType::iterator &iterExisting = addRet.first;
|
||||
ChartHypothesis *hypoExisting = *iterExisting;
|
||||
UTIL_THROW_IF2(iterExisting == m_hypos.end(),
|
||||
"Adding a hypothesis should have returned a valid iterator");
|
||||
"Adding a hypothesis should have returned a valid iterator");
|
||||
|
||||
//StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
|
||||
|
||||
@ -253,7 +253,7 @@ void ChartHypothesisCollection::PruneToSize(ChartManager &manager)
|
||||
ChartHypothesis *hypo = *iter;
|
||||
HCType::iterator iterFindHypo = m_hypos.find(hypo);
|
||||
UTIL_THROW_IF2(iterFindHypo == m_hypos.end(),
|
||||
"Adding a hypothesis should have returned a valid iterator");
|
||||
"Adding a hypothesis should have returned a valid iterator");
|
||||
|
||||
Remove(iterFindHypo);
|
||||
}
|
||||
|
@ -204,7 +204,7 @@ void ChartParser::CreateInputPaths(const InputType &input)
|
||||
m_inputPathMatrix.resize(size);
|
||||
|
||||
UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType,
|
||||
"Input must be a sentence or a tree, not lattice or confusion networks");
|
||||
"Input must be a sentence or a tree, not lattice or confusion networks");
|
||||
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
|
||||
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
|
||||
size_t endPos = startPos + phaseSize -1;
|
||||
@ -238,7 +238,7 @@ const InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos) const
|
||||
{
|
||||
size_t offset = endPos - startPos;
|
||||
UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(),
|
||||
"Out of bound: " << offset);
|
||||
"Out of bound: " << offset);
|
||||
return *m_inputPathMatrix[startPos][offset];
|
||||
}
|
||||
|
||||
@ -246,7 +246,7 @@ InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos)
|
||||
{
|
||||
size_t offset = endPos - startPos;
|
||||
UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(),
|
||||
"Out of bound: " << offset);
|
||||
"Out of bound: " << offset);
|
||||
return *m_inputPathMatrix[startPos][offset];
|
||||
}
|
||||
/*
|
||||
|
@ -102,10 +102,10 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
|
||||
|
||||
// Prune if bursting
|
||||
if (m_size == m_ruleLimit * 2) {
|
||||
NTH_ELEMENT4(m_collection.begin(),
|
||||
m_collection.begin() + m_ruleLimit - 1,
|
||||
m_collection.begin() + m_size,
|
||||
ChartTranslationOptionOrderer());
|
||||
NTH_ELEMENT4(m_collection.begin(),
|
||||
m_collection.begin() + m_ruleLimit - 1,
|
||||
m_collection.begin() + m_size,
|
||||
ChartTranslationOptionOrderer());
|
||||
m_scoreThreshold = m_collection[m_ruleLimit-1]->GetEstimateOfBestScore();
|
||||
m_size = m_ruleLimit;
|
||||
}
|
||||
@ -129,9 +129,9 @@ void ChartTranslationOptionList::ApplyThreshold()
|
||||
// Reduce the list to the best m_ruleLimit options. The remaining
|
||||
// options can be overwritten on subsequent calls to Add().
|
||||
NTH_ELEMENT4(m_collection.begin(),
|
||||
m_collection.begin()+m_ruleLimit,
|
||||
m_collection.begin()+m_size,
|
||||
ChartTranslationOptionOrderer());
|
||||
m_collection.begin()+m_ruleLimit,
|
||||
m_collection.begin()+m_size,
|
||||
ChartTranslationOptionOrderer());
|
||||
m_size = m_ruleLimit;
|
||||
}
|
||||
|
||||
@ -167,11 +167,11 @@ void ChartTranslationOptionList::Evaluate(const InputType &input, const InputPat
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptionList &obj)
|
||||
{
|
||||
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
|
||||
const ChartTranslationOptions &transOpts = *obj.m_collection[i];
|
||||
out << transOpts << endl;
|
||||
}
|
||||
return out;
|
||||
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
|
||||
const ChartTranslationOptions &transOpts = *obj.m_collection[i];
|
||||
out << transOpts << endl;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -137,12 +137,12 @@ void ChartTranslationOptions::CreateSourceRuleFromInputPath()
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptions &obj)
|
||||
{
|
||||
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
|
||||
const ChartTranslationOption &transOpt = *obj.m_collection[i];
|
||||
out << transOpt << endl;
|
||||
}
|
||||
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
|
||||
const ChartTranslationOption &transOpt = *obj.m_collection[i];
|
||||
out << transOpt << endl;
|
||||
}
|
||||
|
||||
return out;
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -59,8 +59,9 @@ public:
|
||||
static float CalcEstimateOfBestScore(const TargetPhraseCollection &,
|
||||
const StackVec &);
|
||||
|
||||
size_t GetSize() const
|
||||
{ return m_collection.size(); }
|
||||
size_t GetSize() const {
|
||||
return m_collection.size();
|
||||
}
|
||||
|
||||
//! @todo dunno
|
||||
const StackVec &GetStackVec() const {
|
||||
|
@ -45,7 +45,7 @@ public:
|
||||
|
||||
const Column& GetColumn(size_t i) const {
|
||||
UTIL_THROW_IF2(i >= data.size(),
|
||||
"Out of bounds. Trying to access " << i << " when vector only contains " << data.size());
|
||||
"Out of bounds. Trying to access " << i << " when vector only contains " << data.size());
|
||||
return data[i];
|
||||
}
|
||||
const Column& operator[](size_t i) const {
|
||||
|
@ -33,7 +33,8 @@ DecodeGraph::~DecodeGraph()
|
||||
}
|
||||
|
||||
//! Add another decode step to the graph
|
||||
void DecodeGraph::Add(DecodeStep *decodeStep) {
|
||||
void DecodeGraph::Add(DecodeStep *decodeStep)
|
||||
{
|
||||
m_steps.push_back(decodeStep);
|
||||
decodeStep->SetContainer(this);
|
||||
}
|
||||
|
@ -49,8 +49,8 @@ public:
|
||||
DecodeGraph(size_t position)
|
||||
: m_position(position)
|
||||
, m_maxChartSpan(NOT_FOUND)
|
||||
, m_backoff(0)
|
||||
{}
|
||||
, m_backoff(0)
|
||||
{}
|
||||
|
||||
// for chart decoding
|
||||
DecodeGraph(size_t position, size_t maxChartSpan)
|
||||
@ -78,7 +78,7 @@ public:
|
||||
}
|
||||
|
||||
size_t GetMaxChartSpan() const {
|
||||
UTIL_THROW_IF2(m_maxChartSpan == NOT_FOUND, "Max chart span not specified");
|
||||
UTIL_THROW_IF2(m_maxChartSpan == NOT_FOUND, "Max chart span not specified");
|
||||
return m_maxChartSpan;
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ public:
|
||||
return m_backoff;
|
||||
}
|
||||
|
||||
void SetBackoff(size_t backoff){
|
||||
void SetBackoff(size_t backoff) {
|
||||
m_backoff = backoff;
|
||||
}
|
||||
|
||||
|
@ -105,10 +105,12 @@ public:
|
||||
|
||||
void RemoveFeature(const FeatureFunction *ff);
|
||||
|
||||
void SetContainer(const DecodeGraph *container)
|
||||
{ m_container = container; }
|
||||
const DecodeGraph *GetContainer() const
|
||||
{ return m_container; }
|
||||
void SetContainer(const DecodeGraph *container) {
|
||||
m_container = container;
|
||||
}
|
||||
const DecodeGraph *GetContainer() const {
|
||||
return m_container;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
@ -85,7 +85,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
|
||||
|
||||
outPhrase.Merge(targetPhrase, m_newOutputFactors);
|
||||
outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
|
||||
cerr << "DecodeStepTranslation::Process is calling outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply)" << endl;
|
||||
cerr << "DecodeStepTranslation::Process is calling outPhrase.Evaluate(inputPath.GetPhrase(), m_featuresToApply)" << endl;
|
||||
|
||||
TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
|
||||
assert(newTransOpt != NULL);
|
||||
@ -199,11 +199,11 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
|
||||
|
||||
const Word *wordIP = NULL;
|
||||
for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) {
|
||||
const Word &tempWord = phraseFromIP.GetWord(i);
|
||||
if (!tempWord.IsEpsilon()) {
|
||||
wordIP = &tempWord;
|
||||
break;
|
||||
}
|
||||
const Word &tempWord = phraseFromIP.GetWord(i);
|
||||
if (!tempWord.IsEpsilon()) {
|
||||
wordIP = &tempWord;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// const WordsRange &range = inputPath.GetWordsRange();
|
||||
@ -238,7 +238,7 @@ void DecodeStepTranslation::ProcessLEGACY(const TranslationOption &inputPartialT
|
||||
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
||||
|
||||
const TargetPhraseCollectionWithSourcePhrase *phraseColl
|
||||
= phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange);
|
||||
= phraseDictionary->GetTargetPhraseCollectionLEGACY(toc->GetSource(),sourceWordsRange);
|
||||
|
||||
|
||||
if (phraseColl != NULL) {
|
||||
|
@ -62,7 +62,7 @@ class BleuScoreFeature : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
static const std::vector<BleuScoreFeature*>& GetColl() {
|
||||
return s_staticColl;
|
||||
return s_staticColl;
|
||||
}
|
||||
|
||||
typedef boost::unordered_map<size_t, RefValue > RefCounts;
|
||||
|
@ -72,7 +72,7 @@ void ConstrainedDecoding::Load()
|
||||
std::vector<float> ConstrainedDecoding::DefaultWeights() const
|
||||
{
|
||||
UTIL_THROW_IF2(m_numScoreComponents != 1,
|
||||
"ConstrainedDecoding must only have 1 score");
|
||||
"ConstrainedDecoding must only have 1 score");
|
||||
vector<float> ret(1, 1);
|
||||
return ret;
|
||||
}
|
||||
@ -112,18 +112,16 @@ FFState* ConstrainedDecoding::Evaluate(
|
||||
float score;
|
||||
if (hypo.IsSourceCompleted()) {
|
||||
// translated entire sentence.
|
||||
bool match = (searchPos == 0) && (ref->GetSize() == outputPhrase.GetSize());
|
||||
if (!m_negate) {
|
||||
score = match ? 0 : - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
else {
|
||||
score = !match ? 0 : - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
bool match = (searchPos == 0) && (ref->GetSize() == outputPhrase.GetSize());
|
||||
if (!m_negate) {
|
||||
score = match ? 0 : - std::numeric_limits<float>::infinity();
|
||||
} else {
|
||||
score = !match ? 0 : - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
} else if (m_negate) {
|
||||
// keep all derivations
|
||||
score = 0;
|
||||
}
|
||||
else {
|
||||
// keep all derivations
|
||||
score = 0;
|
||||
} else {
|
||||
score = (searchPos != NOT_FOUND) ? 0 : - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
@ -151,17 +149,16 @@ FFState* ConstrainedDecoding::EvaluateChart(
|
||||
if (hypo.GetCurrSourceRange().GetStartPos() == 0 &&
|
||||
hypo.GetCurrSourceRange().GetEndPos() == source.GetSize() - 1) {
|
||||
// translated entire sentence.
|
||||
bool match = (searchPos == 0) && (ref->GetSize() == outputPhrase.GetSize());
|
||||
bool match = (searchPos == 0) && (ref->GetSize() == outputPhrase.GetSize());
|
||||
|
||||
if (!m_negate) {
|
||||
score = match ? 0 : - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
else {
|
||||
score = !match ? 0 : - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
if (!m_negate) {
|
||||
score = match ? 0 : - std::numeric_limits<float>::infinity();
|
||||
} else {
|
||||
score = !match ? 0 : - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
} else if (m_negate) {
|
||||
// keep all derivations
|
||||
score = 0;
|
||||
// keep all derivations
|
||||
score = 0;
|
||||
} else {
|
||||
score = (searchPos != NOT_FOUND) ? 0 : - std::numeric_limits<float>::infinity();
|
||||
}
|
||||
@ -178,7 +175,7 @@ void ConstrainedDecoding::SetParameter(const std::string& key, const std::string
|
||||
} else if (key == "max-unknowns") {
|
||||
m_maxUnknowns = Scan<int>(value);
|
||||
} else if (key == "negate") {
|
||||
m_negate = Scan<bool>(value);
|
||||
m_negate = Scan<bool>(value);
|
||||
} else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ int ControlRecombinationState::Compare(const FFState& other) const
|
||||
std::vector<float> ControlRecombination::DefaultWeights() const
|
||||
{
|
||||
UTIL_THROW_IF2(m_numScoreComponents,
|
||||
"ControlRecombination should not have any scores");
|
||||
"ControlRecombination should not have any scores");
|
||||
vector<float> ret(0);
|
||||
return ret;
|
||||
}
|
||||
|
@ -22,43 +22,43 @@ int CoveredReferenceState::Compare(const FFState& other) const
|
||||
const CoveredReferenceState &otherState = static_cast<const CoveredReferenceState&>(other);
|
||||
|
||||
if (m_coveredRef.size() != otherState.m_coveredRef.size()) {
|
||||
return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
|
||||
return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
|
||||
} else {
|
||||
multiset<string>::const_iterator thisIt, otherIt;
|
||||
for (thisIt = m_coveredRef.begin(), otherIt = otherState.m_coveredRef.begin();
|
||||
thisIt != m_coveredRef.end();
|
||||
thisIt++, otherIt++) {
|
||||
thisIt != m_coveredRef.end();
|
||||
thisIt++, otherIt++) {
|
||||
if (*thisIt != *otherIt) return thisIt->compare(*otherIt);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
// return m_coveredRef == otherState.m_coveredRef;
|
||||
|
||||
|
||||
// if (m_coveredRef == otherState.m_coveredRef)
|
||||
// return 0;
|
||||
// return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
|
||||
}
|
||||
|
||||
void CoveredReferenceFeature::Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
|
||||
void CoveredReferenceFeature::Evaluate(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
long id = input.GetTranslationId();
|
||||
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
|
||||
multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase);
|
||||
multiset<string> covered;
|
||||
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
|
||||
refIt->second.begin(), refIt->second.end(),
|
||||
inserter(covered, covered.begin()));
|
||||
refIt->second.begin(), refIt->second.end(),
|
||||
inserter(covered, covered.begin()));
|
||||
vector<float> scores;
|
||||
scores.push_back(covered.size());
|
||||
|
||||
@ -66,7 +66,8 @@ void CoveredReferenceFeature::Evaluate(const InputType &input
|
||||
estimatedFutureScore->Assign(this, scores);
|
||||
}
|
||||
|
||||
void CoveredReferenceFeature::Load() {
|
||||
void CoveredReferenceFeature::Load()
|
||||
{
|
||||
InputFileStream refFile(m_path);
|
||||
std::string line;
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
@ -75,7 +76,7 @@ void CoveredReferenceFeature::Load() {
|
||||
vector<string> words = Tokenize(line, " ");
|
||||
multiset<string> wordSet;
|
||||
// TODO make Tokenize work with other containers than vector
|
||||
copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
|
||||
copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
|
||||
m_refs.insert(make_pair(sentenceID++, wordSet));
|
||||
}
|
||||
}
|
||||
@ -106,15 +107,15 @@ FFState* CoveredReferenceFeature::Evaluate(
|
||||
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
|
||||
if (refIt == m_refs.end()) UTIL_THROW(util::Exception, "Sentence id out of range: " + SPrint<long>(id));
|
||||
set_difference(refIt->second.begin(), refIt->second.end(),
|
||||
ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
|
||||
inserter(remaining, remaining.begin()));
|
||||
ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
|
||||
inserter(remaining, remaining.begin()));
|
||||
|
||||
// which of the remaining words are present in the current phrase
|
||||
multiset<string> wordsInPhrase = GetWordsInPhrase(cur_hypo.GetCurrTargetPhrase());
|
||||
multiset<string> newCovered;
|
||||
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
|
||||
remaining.begin(), remaining.end(),
|
||||
inserter(newCovered, newCovered.begin()));
|
||||
remaining.begin(), remaining.end(),
|
||||
inserter(newCovered, newCovered.begin()));
|
||||
|
||||
vector<float> estimateScore =
|
||||
cur_hypo.GetCurrTargetPhrase().GetScoreBreakdown().GetScoresForProducer(this);
|
||||
|
@ -37,8 +37,7 @@ class CoveredReferenceFeature : public StatefulFeatureFunction
|
||||
|
||||
public:
|
||||
CoveredReferenceFeature(const std::string &line)
|
||||
:StatefulFeatureFunction(1, line)
|
||||
{
|
||||
:StatefulFeatureFunction(1, line) {
|
||||
m_tuneable = true;
|
||||
ReadParameters();
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ DecodeFeature::DecodeFeature(const std::string &line)
|
||||
}
|
||||
|
||||
DecodeFeature::DecodeFeature(size_t numScoreComponents
|
||||
, const std::string &line)
|
||||
, const std::string &line)
|
||||
: StatelessFeatureFunction(numScoreComponents, line)
|
||||
{
|
||||
VERBOSE(2,"DecodeFeature: no factors yet" << std::endl);
|
||||
|
@ -42,12 +42,12 @@ public:
|
||||
DecodeFeature(const std::string &line);
|
||||
|
||||
DecodeFeature(size_t numScoreComponents
|
||||
, const std::string &line);
|
||||
, const std::string &line);
|
||||
|
||||
DecodeFeature(size_t numScoreComponents
|
||||
, const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, const std::string &line);
|
||||
, const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, const std::string &line);
|
||||
|
||||
//! returns output factor types as specified by the ini file
|
||||
const FactorMask& GetOutputFactorMask() const;
|
||||
@ -79,8 +79,9 @@ public:
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
|
||||
void SetContainer(const DecodeStep *container)
|
||||
{ m_container = container; }
|
||||
void SetContainer(const DecodeStep *container) {
|
||||
m_container = container;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<FactorType> m_input;
|
||||
|
@ -49,12 +49,9 @@ void DynamicCacheBasedLanguageModel::SetPreComputedScores()
|
||||
|
||||
float DynamicCacheBasedLanguageModel::GetPreComputedScores(const unsigned int age)
|
||||
{
|
||||
if (age < precomputedScores.size())
|
||||
{
|
||||
if (age < precomputedScores.size()) {
|
||||
return precomputedScores.at(age);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
return precomputedScores.at(m_maxAge);
|
||||
}
|
||||
}
|
||||
@ -117,8 +114,7 @@ float DynamicCacheBasedLanguageModel::Evaluate_Whole_String(const TargetPhrase&
|
||||
if (it != m_cache.end()) { //found!
|
||||
score = ((*it).second).second;
|
||||
VERBOSE(3,"cblm::Evaluate_Whole_String: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << score << "|" << std::endl);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
score = m_lower_score;
|
||||
}
|
||||
|
||||
@ -134,7 +130,7 @@ float DynamicCacheBasedLanguageModel::Evaluate_All_Substrings(const TargetPhrase
|
||||
|
||||
decaying_cache_t::const_iterator it;
|
||||
float score = 0.0;
|
||||
|
||||
|
||||
for (size_t startpos = 0 ; startpos < tp.GetSize() ; ++startpos) {
|
||||
std::string w = "";
|
||||
for (size_t endpos = startpos; endpos < tp.GetSize() ; ++endpos) {
|
||||
@ -144,9 +140,8 @@ float DynamicCacheBasedLanguageModel::Evaluate_All_Substrings(const TargetPhrase
|
||||
if (it != m_cache.end()) { //found!
|
||||
score += ((*it).second).second;
|
||||
VERBOSE(3,"cblm::Evaluate_All_Substrings: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << score << "|" << std::endl);
|
||||
}
|
||||
else{
|
||||
score += m_lower_score;
|
||||
} else {
|
||||
score += m_lower_score;
|
||||
}
|
||||
|
||||
if (endpos == startpos) {
|
||||
|
@ -113,16 +113,16 @@ public:
|
||||
{}
|
||||
|
||||
void Evaluate(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
void EvaluateChart(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
void Evaluate(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, ScoreComponentCollection &scoreBreakdown) const
|
||||
, const InputPath &inputPath
|
||||
, ScoreComponentCollection &scoreBreakdown) const
|
||||
{}
|
||||
|
||||
void SetQueryType(size_t type);
|
||||
|
@ -236,12 +236,12 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
|
||||
|
||||
void FeatureRegistry::PrintFF() const
|
||||
{
|
||||
std::cerr << "Available feature functions:" << std::endl;
|
||||
Map::const_iterator iter;
|
||||
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
|
||||
const string &ffName = iter->first;
|
||||
std::cerr << ffName << std::endl;
|
||||
}
|
||||
std::cerr << "Available feature functions:" << std::endl;
|
||||
Map::const_iterator iter;
|
||||
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
|
||||
const string &ffName = iter->first;
|
||||
std::cerr << ffName << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -75,7 +75,7 @@ void FeatureFunction::ParseLine(const std::string &line)
|
||||
for (size_t i = 1; i < toks.size(); ++i) {
|
||||
vector<string> args = TokenizeFirstOnly(toks[i], "=");
|
||||
UTIL_THROW_IF2(args.size() != 2,
|
||||
"Incorrect format for feature function arg: " << toks[i]);
|
||||
"Incorrect format for feature function arg: " << toks[i]);
|
||||
|
||||
pair<set<string>::iterator,bool> ret = keys.insert(args[0]);
|
||||
UTIL_THROW_IF2(!ret.second, "Duplicate key in line " << line);
|
||||
|
@ -14,7 +14,7 @@ GlobalLexicalModelUnlimited::GlobalLexicalModelUnlimited(const std::string &line
|
||||
:StatelessFeatureFunction(0, line)
|
||||
{
|
||||
UTIL_THROW(util::Exception,
|
||||
"GlobalLexicalModelUnlimited hasn't been refactored for new feature function framework yet"); // TODO need to update arguments to key=value
|
||||
"GlobalLexicalModelUnlimited hasn't been refactored for new feature function framework yet"); // TODO need to update arguments to key=value
|
||||
|
||||
const vector<string> modelSpec = Tokenize(line);
|
||||
|
||||
|
@ -6,36 +6,36 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
void InternalStructStatelessFF::Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
// cerr << "MARIA!!!" << endl;
|
||||
scoreBreakdown.PlusEquals(this, 0);
|
||||
scoreBreakdown.PlusEquals(this, 0);
|
||||
|
||||
}
|
||||
|
||||
void InternalStructStatelessFF::Evaluate(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
|
||||
cerr << "HHHHH" << scoreBreakdown << endl;
|
||||
scoreBreakdown.PlusEquals(this, 66);
|
||||
/* FactorList f_mask;
|
||||
f_mask.push_back(0);
|
||||
//if(inputPath.GetPhrase().GetStringRep(f_mask).)
|
||||
int score =50;
|
||||
for(size_t i=0;i<inputPath.GetPhrase().GetSize();i++){
|
||||
if(inputPath.GetPhrase(). GetFactor(i,0)->GetString().as_string()=="ist"){
|
||||
//cout<<inputPath.GetPhrase().GetStringRep(f_mask);
|
||||
score+=1;
|
||||
}
|
||||
}
|
||||
scoreBreakdown.PlusEquals(this, score);
|
||||
*/
|
||||
cerr << "HHHHH" << scoreBreakdown << endl;
|
||||
scoreBreakdown.PlusEquals(this, 66);
|
||||
/* FactorList f_mask;
|
||||
f_mask.push_back(0);
|
||||
//if(inputPath.GetPhrase().GetStringRep(f_mask).)
|
||||
int score =50;
|
||||
for(size_t i=0;i<inputPath.GetPhrase().GetSize();i++){
|
||||
if(inputPath.GetPhrase(). GetFactor(i,0)->GetString().as_string()=="ist"){
|
||||
//cout<<inputPath.GetPhrase().GetStringRep(f_mask);
|
||||
score+=1;
|
||||
}
|
||||
}
|
||||
scoreBreakdown.PlusEquals(this, score);
|
||||
*/
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,29 +9,30 @@ namespace Moses
|
||||
class InternalStructStatelessFF : public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
InternalStructStatelessFF(const std::string &line)
|
||||
:StatelessFeatureFunction(line)
|
||||
{}
|
||||
InternalStructStatelessFF(const std::string &line)
|
||||
:StatelessFeatureFunction(line)
|
||||
{}
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const
|
||||
{ return true; }
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
|
||||
void Evaluate(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
|
||||
virtual void Evaluate(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
void EvaluateChart(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
void Evaluate(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
|
||||
virtual void Evaluate(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
void EvaluateChart(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
};
|
||||
|
||||
|
@ -13,9 +13,9 @@ PhrasePenalty::PhrasePenalty(const std::string &line)
|
||||
}
|
||||
|
||||
void PhrasePenalty::Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
scoreBreakdown.Assign(this, - 1.0f);
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ void TargetNgramFeature::SetParameter(const std::string& key, const std::string&
|
||||
} else if (key == "lower-ngrams") {
|
||||
m_lower_ngrams = Scan<bool>(value);
|
||||
} else if (key == "file") {
|
||||
m_file = value;
|
||||
m_file = value;
|
||||
} else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
@ -74,7 +74,7 @@ void TargetNgramFeature::Load()
|
||||
if (m_file == "*") return; //allow all
|
||||
ifstream inFile(m_file.c_str());
|
||||
if (!inFile) {
|
||||
UTIL_THROW(util::Exception, "Couldn't open file" << m_file);
|
||||
UTIL_THROW(util::Exception, "Couldn't open file" << m_file);
|
||||
}
|
||||
|
||||
std::string line;
|
||||
|
@ -45,10 +45,10 @@ public:
|
||||
{}
|
||||
|
||||
|
||||
/*
|
||||
virtual void Evaluate(const InputType &source
|
||||
, ScoreComponentCollection &scoreBreakdown) const;
|
||||
*/
|
||||
/*
|
||||
virtual void Evaluate(const InputType &source
|
||||
, ScoreComponentCollection &scoreBreakdown) const;
|
||||
*/
|
||||
};
|
||||
|
||||
}
|
||||
|
18
moses/File.h
18
moses/File.h
@ -38,7 +38,7 @@ static const OFF_T InvalidOffT=-1;
|
||||
template<typename T> inline size_t fWrite(FILE* f,const T& t)
|
||||
{
|
||||
if(fwrite(&t,sizeof(t),1,f)!=1) {
|
||||
UTIL_THROW2("ERROR:: fwrite!");
|
||||
UTIL_THROW2("ERROR:: fwrite!");
|
||||
}
|
||||
return sizeof(t);
|
||||
}
|
||||
@ -46,7 +46,7 @@ template<typename T> inline size_t fWrite(FILE* f,const T& t)
|
||||
template<typename T> inline void fRead(FILE* f,T& t)
|
||||
{
|
||||
if(fread(&t,sizeof(t),1,f)!=1) {
|
||||
UTIL_THROW2("ERROR: fread!");
|
||||
UTIL_THROW2("ERROR: fread!");
|
||||
}
|
||||
}
|
||||
|
||||
@ -55,7 +55,7 @@ template<typename T> inline size_t fWrite(FILE* f,const T* b,const T* e)
|
||||
UINT32 s=std::distance(b,e);
|
||||
size_t rv=fWrite(f,s);
|
||||
if(fwrite(b,sizeof(T),s,f)!=s) {
|
||||
UTIL_THROW2("ERROR: fwrite!");
|
||||
UTIL_THROW2("ERROR: fwrite!");
|
||||
}
|
||||
return rv+sizeof(T)*s;
|
||||
}
|
||||
@ -65,7 +65,7 @@ template<typename T> inline size_t fWrite(FILE* f,const T b,const T e)
|
||||
UINT32 s=std::distance(b,e);
|
||||
size_t rv=fWrite(f,s);
|
||||
if(fwrite(&(*b),sizeof(T),s,f)!=s) {
|
||||
UTIL_THROW2("ERROR: fwrite!");
|
||||
UTIL_THROW2("ERROR: fwrite!");
|
||||
}
|
||||
return rv+sizeof(T)*s;
|
||||
}
|
||||
@ -75,7 +75,7 @@ template<typename C> inline size_t fWriteVector(FILE* f,const C& v)
|
||||
UINT32 s=v.size();
|
||||
size_t rv=fWrite(f,s);
|
||||
if(fwrite(&v[0],sizeof(typename C::value_type),s,f)!=s) {
|
||||
UTIL_THROW2("ERROR: fwrite!");
|
||||
UTIL_THROW2("ERROR: fwrite!");
|
||||
}
|
||||
return rv+sizeof(typename C::value_type)*s;
|
||||
}
|
||||
@ -87,7 +87,7 @@ template<typename C> inline void fReadVector(FILE* f, C& v)
|
||||
v.resize(s);
|
||||
size_t r=fread(&(*v.begin()),sizeof(typename C::value_type),s,f);
|
||||
if(r!=s) {
|
||||
UTIL_THROW2("ERROR: freadVec! "<<r<<" "<<s);
|
||||
UTIL_THROW2("ERROR: freadVec! "<<r<<" "<<s);
|
||||
}
|
||||
}
|
||||
|
||||
@ -106,7 +106,7 @@ inline void fReadString(FILE* f,std::string& e)
|
||||
fRead(f,s);
|
||||
char* a=new char[s+1];
|
||||
if(fread(a,sizeof(char),s,f)!=s) {
|
||||
UTIL_THROW2("ERROR: fread!");
|
||||
UTIL_THROW2("ERROR: fread!");
|
||||
}
|
||||
a[s]='\0';
|
||||
e.assign(a);
|
||||
@ -142,8 +142,8 @@ inline OFF_T fTell(FILE* f)
|
||||
inline void fSeek(FILE* f,OFF_T o)
|
||||
{
|
||||
if(FSEEKO(f,o,SEEK_SET)<0) {
|
||||
std::stringstream strme;
|
||||
strme << "ERROR: could not fseeko position " << o <<"\n";
|
||||
std::stringstream strme;
|
||||
strme << "ERROR: could not fseeko position " << o <<"\n";
|
||||
if(o==InvalidOffT) strme << "You tried to seek for 'InvalidOffT'!\n";
|
||||
UTIL_THROW2(strme.str());
|
||||
}
|
||||
|
@ -55,7 +55,7 @@ protected:
|
||||
|
||||
public:
|
||||
static const std::vector<GenerationDictionary*>& GetColl() {
|
||||
return s_staticColl;
|
||||
return s_staticColl;
|
||||
}
|
||||
|
||||
GenerationDictionary(const std::string &line);
|
||||
|
@ -340,10 +340,10 @@ void Hypothesis::CleanupArcList()
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
|
||||
// prune arc list only if there too many arcs
|
||||
NTH_ELEMENT4(m_arcList->begin()
|
||||
, m_arcList->begin() + nBestSize - 1
|
||||
, m_arcList->end()
|
||||
, CompareHypothesisTotalScore());
|
||||
NTH_ELEMENT4(m_arcList->begin()
|
||||
, m_arcList->begin() + nBestSize - 1
|
||||
, m_arcList->end()
|
||||
, CompareHypothesisTotalScore());
|
||||
|
||||
// delete bad ones
|
||||
ArcList::iterator iter;
|
||||
|
@ -145,7 +145,7 @@ void HypothesisStackCubePruning::AddInitial(Hypothesis *hypo)
|
||||
{
|
||||
std::pair<iterator, bool> addRet = Add(hypo);
|
||||
UTIL_THROW_IF2(!addRet.second,
|
||||
"Should have added hypothesis " << *hypo);
|
||||
"Should have added hypothesis " << *hypo);
|
||||
|
||||
const WordsBitmap &bitmap = hypo->GetWordsBitmap();
|
||||
m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this);
|
||||
|
@ -157,7 +157,7 @@ template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std:
|
||||
{
|
||||
std::vector<lm::WordIndex> words;
|
||||
UTIL_THROW_IF2(phrase.GetSize() > 1,
|
||||
"OOV target phrase should be 0 or 1 word in length");
|
||||
"OOV target phrase should be 0 or 1 word in length");
|
||||
if (phrase.GetSize())
|
||||
words.push_back(Convert(phrase.GetWord(0)));
|
||||
|
||||
|
@ -87,8 +87,9 @@ public:
|
||||
, const TargetPhraseCollection *targetPhrases
|
||||
, const void *ptNode);
|
||||
const TargetPhraseCollection *GetTargetPhrases(const PhraseDictionary &phraseDictionary) const;
|
||||
const TargetPhrases &GetTargetPhrases() const
|
||||
{ return m_targetPhrases; }
|
||||
const TargetPhrases &GetTargetPhrases() const {
|
||||
return m_targetPhrases;
|
||||
}
|
||||
|
||||
// pointer to internal node in phrase-table. Since this is implementation dependent, this is a void*
|
||||
const void *GetPtNode(const PhraseDictionary &phraseDictionary) const;
|
||||
|
@ -131,8 +131,8 @@ template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase
|
||||
lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);
|
||||
|
||||
UTIL_THROW_IF2(m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType),
|
||||
"BackwardLanguageModel does not currently support rules that include <s>"
|
||||
);
|
||||
"BackwardLanguageModel does not currently support rules that include <s>"
|
||||
);
|
||||
|
||||
float before_boundary = 0.0f;
|
||||
|
||||
@ -144,8 +144,8 @@ template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase
|
||||
for (position = lastWord; position >= 0; position-=1) {
|
||||
const Word &word = phrase.GetWord(position);
|
||||
UTIL_THROW_IF2(word.IsNonTerminal(),
|
||||
"BackwardLanguageModel does not currently support rules that include non-terminals "
|
||||
);
|
||||
"BackwardLanguageModel does not currently support rules that include non-terminals "
|
||||
);
|
||||
|
||||
lm::WordIndex index = TranslateID(word);
|
||||
scorer.Terminal(index);
|
||||
@ -259,8 +259,8 @@ template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Phr
|
||||
for (int position=std::min( lastWord, ngramBoundary - 1); position >= 0; position-=1) {
|
||||
const Word &word = phrase.GetWord(position);
|
||||
UTIL_THROW_IF2(word.IsNonTerminal(),
|
||||
"BackwardLanguageModel does not currently support rules that include non-terminals "
|
||||
);
|
||||
"BackwardLanguageModel does not currently support rules that include non-terminals "
|
||||
);
|
||||
|
||||
lm::WordIndex index = TranslateID(word);
|
||||
scorer.Terminal(index);
|
||||
@ -285,27 +285,27 @@ template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Phr
|
||||
|
||||
LanguageModel *ConstructBackwardLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
|
||||
{
|
||||
lm::ngram::ModelType model_type;
|
||||
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
|
||||
switch(model_type) {
|
||||
case lm::ngram::PROBING:
|
||||
return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::REST_PROBING:
|
||||
return new BackwardLanguageModel<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::TrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::QUANT_TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::ARRAY_TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::QUANT_ARRAY_TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
|
||||
default:
|
||||
UTIL_THROW2("Unrecognized kenlm model type " << model_type);
|
||||
}
|
||||
} else {
|
||||
return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
|
||||
lm::ngram::ModelType model_type;
|
||||
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
|
||||
switch(model_type) {
|
||||
case lm::ngram::PROBING:
|
||||
return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::REST_PROBING:
|
||||
return new BackwardLanguageModel<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::TrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::QUANT_TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::ARRAY_TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::QUANT_ARRAY_TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
|
||||
default:
|
||||
UTIL_THROW2("Unrecognized kenlm model type " << model_type);
|
||||
}
|
||||
} else {
|
||||
return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -15,24 +15,25 @@
|
||||
using namespace std;
|
||||
|
||||
/////////////////////////
|
||||
void read_ini(const char *inifile, string &model, string &words, string &wordstxt){
|
||||
ifstream ifs(inifile);
|
||||
string line;
|
||||
void read_ini(const char *inifile, string &model, string &words, string &wordstxt)
|
||||
{
|
||||
ifstream ifs(inifile);
|
||||
string line;
|
||||
|
||||
getline(ifs, line);
|
||||
while(ifs){
|
||||
unsigned int pos = line.find("=");
|
||||
string key = line.substr(0, pos);
|
||||
string value = line.substr(pos+1, line.size()-pos);
|
||||
if(key=="MODEL"){
|
||||
model = value;
|
||||
}else if(key=="WORDS"){
|
||||
words = value;
|
||||
}else if(key=="WORDSTXT"){
|
||||
wordstxt = value;
|
||||
}
|
||||
getline(ifs, line);
|
||||
}
|
||||
getline(ifs, line);
|
||||
while(ifs) {
|
||||
unsigned int pos = line.find("=");
|
||||
string key = line.substr(0, pos);
|
||||
string value = line.substr(pos+1, line.size()-pos);
|
||||
if(key=="MODEL") {
|
||||
model = value;
|
||||
} else if(key=="WORDS") {
|
||||
words = value;
|
||||
} else if(key=="WORDSTXT") {
|
||||
wordstxt = value;
|
||||
}
|
||||
getline(ifs, line);
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
@ -43,35 +44,35 @@ namespace Moses
|
||||
class DALMState : public FFState
|
||||
{
|
||||
private:
|
||||
DALM::State *state;
|
||||
DALM::State *state;
|
||||
|
||||
public:
|
||||
DALMState(unsigned short order){
|
||||
state = new DALM::State(order);
|
||||
}
|
||||
DALMState(unsigned short order) {
|
||||
state = new DALM::State(order);
|
||||
}
|
||||
|
||||
DALMState(const DALMState &from){
|
||||
state = new DALM::State(*from.state);
|
||||
}
|
||||
DALMState(const DALMState &from) {
|
||||
state = new DALM::State(*from.state);
|
||||
}
|
||||
|
||||
virtual ~DALMState(){
|
||||
delete state;
|
||||
}
|
||||
virtual ~DALMState() {
|
||||
delete state;
|
||||
}
|
||||
|
||||
virtual int Compare(const FFState& other) const{
|
||||
const DALMState &o = static_cast<const DALMState &>(other);
|
||||
if(state->get_count() < o.state->get_count()) return -1;
|
||||
else if(state->get_count() > o.state->get_count()) return 1;
|
||||
else return state->compare(o.state);
|
||||
}
|
||||
virtual int Compare(const FFState& other) const {
|
||||
const DALMState &o = static_cast<const DALMState &>(other);
|
||||
if(state->get_count() < o.state->get_count()) return -1;
|
||||
else if(state->get_count() > o.state->get_count()) return 1;
|
||||
else return state->compare(o.state);
|
||||
}
|
||||
|
||||
DALM::State *get_state() const{
|
||||
return state;
|
||||
}
|
||||
|
||||
void refresh(){
|
||||
state->refresh();
|
||||
}
|
||||
DALM::State *get_state() const {
|
||||
return state;
|
||||
}
|
||||
|
||||
void refresh() {
|
||||
state->refresh();
|
||||
}
|
||||
};
|
||||
|
||||
LanguageModelDALM::LanguageModelDALM(const std::string &line)
|
||||
@ -86,62 +87,64 @@ LanguageModelDALM::LanguageModelDALM(const std::string &line)
|
||||
|
||||
LanguageModelDALM::~LanguageModelDALM()
|
||||
{
|
||||
delete m_logger;
|
||||
delete m_vocab;
|
||||
delete m_lm;
|
||||
delete m_logger;
|
||||
delete m_vocab;
|
||||
delete m_lm;
|
||||
}
|
||||
|
||||
void LanguageModelDALM::Load()
|
||||
{
|
||||
/////////////////////
|
||||
// READING INIFILE //
|
||||
/////////////////////
|
||||
string inifile= m_filePath + "/dalm.ini";
|
||||
/////////////////////
|
||||
// READING INIFILE //
|
||||
/////////////////////
|
||||
string inifile= m_filePath + "/dalm.ini";
|
||||
|
||||
string model; // Path to the double-array file.
|
||||
string words; // Path to the vocabulary file.
|
||||
string wordstxt; //Path to the vocabulary file in text format.
|
||||
read_ini(inifile.c_str(), model, words, wordstxt);
|
||||
string model; // Path to the double-array file.
|
||||
string words; // Path to the vocabulary file.
|
||||
string wordstxt; //Path to the vocabulary file in text format.
|
||||
read_ini(inifile.c_str(), model, words, wordstxt);
|
||||
|
||||
model = m_filePath + "/" + model;
|
||||
words = m_filePath + "/" + words;
|
||||
wordstxt = m_filePath + "/" + wordstxt;
|
||||
model = m_filePath + "/" + model;
|
||||
words = m_filePath + "/" + words;
|
||||
wordstxt = m_filePath + "/" + wordstxt;
|
||||
|
||||
UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
|
||||
util::FileOpenException,
|
||||
"Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
|
||||
UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
|
||||
util::FileOpenException,
|
||||
"Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
|
||||
|
||||
////////////////
|
||||
// LOADING LM //
|
||||
////////////////
|
||||
////////////////
|
||||
// LOADING LM //
|
||||
////////////////
|
||||
|
||||
// Preparing a logger object.
|
||||
m_logger = new DALM::Logger(stderr);
|
||||
m_logger->setLevel(DALM::LOGGER_INFO);
|
||||
// Preparing a logger object.
|
||||
m_logger = new DALM::Logger(stderr);
|
||||
m_logger->setLevel(DALM::LOGGER_INFO);
|
||||
|
||||
// Load the vocabulary file.
|
||||
m_vocab = new DALM::Vocabulary(words, *m_logger);
|
||||
// Load the vocabulary file.
|
||||
m_vocab = new DALM::Vocabulary(words, *m_logger);
|
||||
|
||||
// Load the language model.
|
||||
m_lm = new DALM::LM(model, *m_vocab, *m_logger);
|
||||
|
||||
wid_start = m_vocab->lookup(BOS_);
|
||||
wid_end = m_vocab->lookup(EOS_);
|
||||
// Load the language model.
|
||||
m_lm = new DALM::LM(model, *m_vocab, *m_logger);
|
||||
|
||||
// vocab mapping
|
||||
CreateVocabMapping(wordstxt);
|
||||
|
||||
FactorCollection &collection = FactorCollection::Instance();
|
||||
m_beginSentenceFactor = collection.AddFactor(BOS_);
|
||||
wid_start = m_vocab->lookup(BOS_);
|
||||
wid_end = m_vocab->lookup(EOS_);
|
||||
|
||||
// vocab mapping
|
||||
CreateVocabMapping(wordstxt);
|
||||
|
||||
FactorCollection &collection = FactorCollection::Instance();
|
||||
m_beginSentenceFactor = collection.AddFactor(BOS_);
|
||||
}
|
||||
|
||||
const FFState *LanguageModelDALM::EmptyHypothesisState(const InputType &/*input*/) const{
|
||||
DALMState *s = new DALMState(m_nGramOrder);
|
||||
m_lm->init_state(*s->get_state());
|
||||
return s;
|
||||
const FFState *LanguageModelDALM::EmptyHypothesisState(const InputType &/*input*/) const
|
||||
{
|
||||
DALMState *s = new DALMState(m_nGramOrder);
|
||||
m_lm->init_state(*s->get_state());
|
||||
return s;
|
||||
}
|
||||
|
||||
void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const{
|
||||
void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
|
||||
{
|
||||
fullScore = 0;
|
||||
ngramScore = 0;
|
||||
|
||||
@ -149,12 +152,12 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
|
||||
|
||||
size_t phraseSize = phrase.GetSize();
|
||||
if (!phraseSize) return;
|
||||
|
||||
|
||||
DALMState *dalm_state = new DALMState(m_nGramOrder);
|
||||
|
||||
|
||||
size_t currPos = 0;
|
||||
size_t hist_count = 0;
|
||||
|
||||
|
||||
while (currPos < phraseSize) {
|
||||
const Word &word = phrase.GetWord(currPos);
|
||||
hist_count++;
|
||||
@ -169,7 +172,7 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
|
||||
if (currPos != 0) {
|
||||
UTIL_THROW2("Either your data contains <s> in a position other than the first word or your language model is missing <s>. Did you build your ARPA using IRSTLM and forget to run add-start-end.sh?");
|
||||
}
|
||||
m_lm->init_state(*dalm_state->get_state());
|
||||
m_lm->init_state(*dalm_state->get_state());
|
||||
} else {
|
||||
LMResult result = GetValue(word, dalm_state->get_state());
|
||||
fullScore += result.score;
|
||||
@ -180,10 +183,11 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
|
||||
|
||||
currPos++;
|
||||
}
|
||||
delete dalm_state;
|
||||
delete dalm_state;
|
||||
}
|
||||
|
||||
LMResult LanguageModelDALM::GetValue(DALM::VocabId wid, DALM::State* finalState) const{
|
||||
LMResult LanguageModelDALM::GetValue(DALM::VocabId wid, DALM::State* finalState) const
|
||||
{
|
||||
LMResult ret;
|
||||
|
||||
// last word is unk?
|
||||
@ -200,41 +204,42 @@ LMResult LanguageModelDALM::GetValue(DALM::VocabId wid, DALM::State* finalState)
|
||||
LMResult LanguageModelDALM::GetValue(const Word &word, DALM::State* finalState) const
|
||||
{
|
||||
DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType));
|
||||
|
||||
|
||||
return GetValue(wid, finalState);
|
||||
}
|
||||
|
||||
FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const{
|
||||
FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
|
||||
{
|
||||
// In this function, we only compute the LM scores of n-grams that overlap a
|
||||
// phrase boundary. Phrase-internal scores are taken directly from the
|
||||
// translation option.
|
||||
|
||||
const DALMState *dalm_ps = static_cast<const DALMState *>(ps);
|
||||
|
||||
const DALMState *dalm_ps = static_cast<const DALMState *>(ps);
|
||||
|
||||
// Empty phrase added? nothing to be done
|
||||
if (hypo.GetCurrTargetLength() == 0){
|
||||
if (hypo.GetCurrTargetLength() == 0) {
|
||||
return dalm_ps ? new DALMState(*dalm_ps) : NULL;
|
||||
}
|
||||
|
||||
|
||||
const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos();
|
||||
//[begin, end) in STL-like fashion.
|
||||
const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
|
||||
const std::size_t adjust_end = std::min(end, begin + m_nGramOrder - 1);
|
||||
|
||||
|
||||
DALMState *dalm_state = new DALMState(*dalm_ps);
|
||||
|
||||
|
||||
std::size_t position = begin;
|
||||
float score = 0.0;
|
||||
for(; position < adjust_end; position++){
|
||||
score += GetValue(hypo.GetWord(position), dalm_state->get_state()).score;
|
||||
for(; position < adjust_end; position++) {
|
||||
score += GetValue(hypo.GetWord(position), dalm_state->get_state()).score;
|
||||
}
|
||||
|
||||
|
||||
if (hypo.IsSourceCompleted()) {
|
||||
// Score end of sentence.
|
||||
std::vector<DALM::VocabId> indices(m_nGramOrder-1);
|
||||
const DALM::VocabId *last = LastIDs(hypo, &indices.front());
|
||||
m_lm->set_state(&indices.front(), (last-&indices.front()), *dalm_state->get_state());
|
||||
|
||||
|
||||
float s = GetValue(wid_end, dalm_state->get_state()).score;
|
||||
score += s;
|
||||
} else if (adjust_end < end) {
|
||||
@ -252,14 +257,15 @@ FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps,
|
||||
} else {
|
||||
out->PlusEquals(this, score);
|
||||
}
|
||||
|
||||
|
||||
return dalm_state;
|
||||
}
|
||||
|
||||
FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{
|
||||
FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const
|
||||
{
|
||||
LanguageModelChartState *ret = new LanguageModelChartState(hypo, featureID, m_nGramOrder);
|
||||
// initialize language model context state
|
||||
DALMState *dalm_state = new DALMState(m_nGramOrder);
|
||||
DALMState *dalm_state = new DALMState(m_nGramOrder);
|
||||
|
||||
// initial language model scores
|
||||
float prefixScore = 0.0; // not yet final for initial words (lack context)
|
||||
@ -280,9 +286,9 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
|
||||
if (!word.IsNonTerminal()) {
|
||||
// beginning of sentence symbol <s>? -> just update state
|
||||
if (word.GetFactor(m_factorType) == m_beginSentenceFactor) {
|
||||
UTIL_THROW_IF2(phrasePos != 0,
|
||||
"Sentence start symbol must be at the beginning of sentence");
|
||||
m_lm->init_state(*dalm_state->get_state());
|
||||
UTIL_THROW_IF2(phrasePos != 0,
|
||||
"Sentence start symbol must be at the beginning of sentence");
|
||||
m_lm->init_state(*dalm_state->get_state());
|
||||
}
|
||||
// score a regular word added by the rule
|
||||
else {
|
||||
@ -298,7 +304,7 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
|
||||
|
||||
const LanguageModelChartState* prevState =
|
||||
static_cast<const LanguageModelChartState*>(prevHypo->GetFFState(featureID));
|
||||
|
||||
|
||||
size_t subPhraseLength = prevState->GetNumTargetTerminals();
|
||||
// special case: rule starts with non-terminal -> copy everything
|
||||
if (phrasePos == 0) {
|
||||
@ -310,13 +316,13 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
|
||||
// get language model state
|
||||
delete dalm_state;
|
||||
dalm_state = new DALMState( *static_cast<DALMState*>(prevState->GetRightContext()) );
|
||||
wordPos += subPhraseLength;
|
||||
wordPos += subPhraseLength;
|
||||
}
|
||||
|
||||
// internal non-terminal
|
||||
else {
|
||||
// score its prefix
|
||||
size_t wpos = wordPos;
|
||||
size_t wpos = wordPos;
|
||||
for(size_t prefixPos = 0;
|
||||
prefixPos < m_nGramOrder-1 // up to LM order window
|
||||
&& prefixPos < subPhraseLength; // up to length
|
||||
@ -324,7 +330,7 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu
|
||||
const Word &word = prevState->GetPrefix().GetWord(prefixPos);
|
||||
updateChartScore( &prefixScore, &finalizedScore, GetValue(word, dalm_state->get_state()).score, ++wpos );
|
||||
}
|
||||
wordPos += subPhraseLength;
|
||||
wordPos += subPhraseLength;
|
||||
|
||||
// check if we are dealing with a large sub-phrase
|
||||
if (subPhraseLength > m_nGramOrder - 1) {
|
||||
@ -360,36 +366,35 @@ void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt)
|
||||
|
||||
string line;
|
||||
while(getline(vocabStrm, line)) {
|
||||
const Factor *factor = FactorCollection::Instance().AddFactor(line);
|
||||
DALM::VocabId wid = m_vocab->lookup(line.c_str());
|
||||
const Factor *factor = FactorCollection::Instance().AddFactor(line);
|
||||
DALM::VocabId wid = m_vocab->lookup(line.c_str());
|
||||
|
||||
VocabMap::value_type entry(factor, wid);
|
||||
m_vocabMap.insert(entry);
|
||||
VocabMap::value_type entry(factor, wid);
|
||||
m_vocabMap.insert(entry);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
DALM::VocabId LanguageModelDALM::GetVocabId(const Factor *factor) const
|
||||
{
|
||||
VocabMap::left_map::const_iterator iter;
|
||||
iter = m_vocabMap.left.find(factor);
|
||||
if (iter != m_vocabMap.left.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
else {
|
||||
// not in mapping. Must be UNK
|
||||
return m_vocab->unk();
|
||||
}
|
||||
VocabMap::left_map::const_iterator iter;
|
||||
iter = m_vocabMap.left.find(factor);
|
||||
if (iter != m_vocabMap.left.end()) {
|
||||
return iter->second;
|
||||
} else {
|
||||
// not in mapping. Must be UNK
|
||||
return m_vocab->unk();
|
||||
}
|
||||
}
|
||||
|
||||
void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "factor") {
|
||||
m_factorType = Scan<FactorType>(value);
|
||||
} else if (key == "order") {
|
||||
m_nGramOrder = Scan<size_t>(value);
|
||||
} else if (key == "path") {
|
||||
m_filePath = value;
|
||||
} else if (key == "order") {
|
||||
m_nGramOrder = Scan<size_t>(value);
|
||||
} else if (key == "path") {
|
||||
m_filePath = value;
|
||||
} else {
|
||||
LanguageModel::SetParameter(key, value);
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ class LanguageModelDALM : public LanguageModel
|
||||
public:
|
||||
LanguageModelDALM(const std::string &line);
|
||||
virtual ~LanguageModelDALM();
|
||||
|
||||
|
||||
void Load();
|
||||
|
||||
virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
|
||||
@ -38,8 +38,8 @@ public:
|
||||
|
||||
virtual bool IsUseable(const FactorMask &mask) const;
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
protected:
|
||||
const Factor *m_beginSentenceFactor;
|
||||
|
||||
@ -48,22 +48,22 @@ protected:
|
||||
std::string m_filePath;
|
||||
size_t m_nGramOrder; //! max n-gram length contained in this LM
|
||||
|
||||
DALM::Logger *m_logger;
|
||||
DALM::Vocabulary *m_vocab;
|
||||
DALM::LM *m_lm;
|
||||
DALM::VocabId wid_start, wid_end;
|
||||
DALM::Logger *m_logger;
|
||||
DALM::Vocabulary *m_vocab;
|
||||
DALM::LM *m_lm;
|
||||
DALM::VocabId wid_start, wid_end;
|
||||
|
||||
typedef boost::bimap<const Factor *, DALM::VocabId> VocabMap;
|
||||
mutable VocabMap m_vocabMap;
|
||||
typedef boost::bimap<const Factor *, DALM::VocabId> VocabMap;
|
||||
mutable VocabMap m_vocabMap;
|
||||
|
||||
void CreateVocabMapping(const std::string &wordstxt);
|
||||
DALM::VocabId GetVocabId(const Factor *factor) const;
|
||||
void CreateVocabMapping(const std::string &wordstxt);
|
||||
DALM::VocabId GetVocabId(const Factor *factor) const;
|
||||
|
||||
private:
|
||||
LMResult GetValue(DALM::VocabId wid, DALM::State* finalState) const;
|
||||
LMResult GetValue(const Word &word, DALM::State* finalState) const;
|
||||
void updateChartScore(float *prefixScore, float *finalizedScore, float score, size_t wordPos) const;
|
||||
|
||||
LMResult GetValue(const Word &word, DALM::State* finalState) const;
|
||||
void updateChartScore(float *prefixScore, float *finalizedScore, float score, size_t wordPos) const;
|
||||
|
||||
// Convert last words of hypothesis into vocab ids, returning an end pointer.
|
||||
DALM::VocabId *LastIDs(const Hypothesis &hypo, DALM::VocabId *indices) const {
|
||||
DALM::VocabId *index = indices;
|
||||
|
@ -114,7 +114,7 @@ void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullSco
|
||||
} else {
|
||||
ShiftOrPush(contextFactor, word);
|
||||
UTIL_THROW_IF2(contextFactor.size() > GetNGramOrder(),
|
||||
"Can only calculate LM score of phrases up to the n-gram order");
|
||||
"Can only calculate LM score of phrases up to the n-gram order");
|
||||
|
||||
if (word == GetSentenceStartWord()) {
|
||||
// do nothing, don't include prob for <s> unigram
|
||||
@ -253,8 +253,8 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
|
||||
|
||||
// beginning of sentence symbol <s>? -> just update state
|
||||
if (word == GetSentenceStartWord()) {
|
||||
UTIL_THROW_IF2(phrasePos != 0,
|
||||
"Sentence start symbol must be at the beginning of sentence");
|
||||
UTIL_THROW_IF2(phrasePos != 0,
|
||||
"Sentence start symbol must be at the beginning of sentence");
|
||||
delete lmState;
|
||||
lmState = NewState( GetBeginSentenceState() );
|
||||
}
|
||||
|
@ -396,7 +396,7 @@ LanguageModel *ConstructKenLM(const std::string &line)
|
||||
for (size_t i = 1; i < toks.size(); ++i) {
|
||||
vector<string> args = Tokenize(toks[i], "=");
|
||||
UTIL_THROW_IF2(args.size() != 2,
|
||||
"Incorrect format of KenLM property: " << toks[i]);
|
||||
"Incorrect format of KenLM property: " << toks[i]);
|
||||
|
||||
if (args[0] == "factor") {
|
||||
factorType = Scan<FactorType>(args[1]);
|
||||
@ -416,28 +416,28 @@ LanguageModel *ConstructKenLM(const std::string &line)
|
||||
|
||||
LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
|
||||
{
|
||||
lm::ngram::ModelType model_type;
|
||||
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
|
||||
lm::ngram::ModelType model_type;
|
||||
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
|
||||
|
||||
switch(model_type) {
|
||||
case lm::ngram::PROBING:
|
||||
return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::REST_PROBING:
|
||||
return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::TRIE:
|
||||
return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::QUANT_TRIE:
|
||||
return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::ARRAY_TRIE:
|
||||
return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::QUANT_ARRAY_TRIE:
|
||||
return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
|
||||
default:
|
||||
UTIL_THROW2("Unrecognized kenlm model type " << model_type);
|
||||
}
|
||||
} else {
|
||||
switch(model_type) {
|
||||
case lm::ngram::PROBING:
|
||||
return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::REST_PROBING:
|
||||
return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::TRIE:
|
||||
return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::QUANT_TRIE:
|
||||
return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::ARRAY_TRIE:
|
||||
return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::QUANT_ARRAY_TRIE:
|
||||
return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
|
||||
default:
|
||||
UTIL_THROW2("Unrecognized kenlm model type " << model_type);
|
||||
}
|
||||
} else {
|
||||
return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -162,7 +162,7 @@ LMResult LanguageModelMaxEntSRI::GetValue(const vector<const Word*> &contextFact
|
||||
ngram[count] = Vocab_None;
|
||||
|
||||
UTIL_THROW_IF2((*contextFactor[count-1])[factorType] == NULL,
|
||||
"No factor " << factorType << " at position " << (count-1));
|
||||
"No factor " << factorType << " at position " << (count-1));
|
||||
// call sri lm fn
|
||||
VocabIndex lmId = GetLmID((*contextFactor[count-1])[factorType]);
|
||||
ret = GetValue(lmId, ngram+1);
|
||||
|
@ -10,7 +10,7 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
NeuralLMWrapper::NeuralLMWrapper(const std::string &line)
|
||||
:LanguageModelSingleFactor(line)
|
||||
:LanguageModelSingleFactor(line)
|
||||
{
|
||||
// This space intentionally left blank
|
||||
}
|
||||
|
@ -2,8 +2,9 @@
|
||||
|
||||
#include "SingleFactor.h"
|
||||
|
||||
namespace nplm {
|
||||
class neuralLM;
|
||||
namespace nplm
|
||||
{
|
||||
class neuralLM;
|
||||
}
|
||||
|
||||
namespace Moses
|
||||
|
@ -162,7 +162,7 @@ LMResult LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, St
|
||||
ngram[count] = Vocab_None;
|
||||
|
||||
UTIL_THROW_IF2((*contextFactor[count-1])[factorType] == NULL,
|
||||
"No factor " << factorType << " at position " << (count-1));
|
||||
"No factor " << factorType << " at position " << (count-1));
|
||||
// call sri lm fn
|
||||
VocabIndex lmId = GetLmID((*contextFactor[count-1])[factorType]);
|
||||
ret = GetValue(lmId, ngram+1);
|
||||
|
@ -48,8 +48,8 @@ LanguageModelSingleFactor::LanguageModelSingleFactor(const std::string &line)
|
||||
|
||||
LanguageModelSingleFactor::~LanguageModelSingleFactor()
|
||||
{
|
||||
delete m_nullContextState;
|
||||
delete m_beginSentenceState;
|
||||
delete m_nullContextState;
|
||||
delete m_beginSentenceState;
|
||||
}
|
||||
|
||||
const FFState *LanguageModelSingleFactor::GetNullContextState() const
|
||||
|
@ -44,7 +44,7 @@ public:
|
||||
return p.first->second;
|
||||
}
|
||||
Key const& symbol(LabelId i) const {
|
||||
assert(static_cast<size_t>(i)<data.size());
|
||||
assert(static_cast<size_t>(i)<data.size());
|
||||
return data[i];
|
||||
}
|
||||
|
||||
|
@ -113,7 +113,7 @@ void Manager::ProcessSentence()
|
||||
searchTime.start();
|
||||
m_search->ProcessSentence();
|
||||
VERBOSE(1, "Line " << m_lineNumber << ": Search took " << searchTime << " seconds" << endl);
|
||||
IFVERBOSE(2) {
|
||||
IFVERBOSE(2) {
|
||||
GetSentenceStats().StopTimeTotal();
|
||||
TRACE_ERR(GetSentenceStats());
|
||||
}
|
||||
@ -321,12 +321,12 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
|
||||
if (i->forward >= 0) {
|
||||
map<int,const Hypothesis*>::const_iterator idToHypIter = idToHyp.find(i->forward);
|
||||
UTIL_THROW_IF2(idToHypIter == idToHyp.end(),
|
||||
"Couldn't find hypothesis " << i->forward);
|
||||
"Couldn't find hypothesis " << i->forward);
|
||||
const Hypothesis* nextHypo = idToHypIter->second;
|
||||
outgoingHyps[hypo].insert(nextHypo);
|
||||
map<int,float>::const_iterator fscoreIter = fscores.find(nextHypo->GetId());
|
||||
UTIL_THROW_IF2(fscoreIter == fscores.end(),
|
||||
"Couldn't find scores for hypothsis " << nextHypo->GetId());
|
||||
"Couldn't find scores for hypothsis " << nextHypo->GetId());
|
||||
edgeScores[Edge(hypo->GetId(),nextHypo->GetId())] =
|
||||
i->fscore - fscoreIter->second;
|
||||
}
|
||||
@ -344,17 +344,17 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
|
||||
outgoingHyps.find(i->hypo);
|
||||
|
||||
UTIL_THROW_IF2(outIter == outgoingHyps.end(),
|
||||
"Couldn't find hypothesis " << i->hypo->GetId());
|
||||
"Couldn't find hypothesis " << i->hypo->GetId());
|
||||
float sigma = 0;
|
||||
for (set<const Hypothesis*>::const_iterator j = outIter->second.begin();
|
||||
j != outIter->second.end(); ++j) {
|
||||
map<const Hypothesis*, float>::const_iterator succIter = sigmas.find(*j);
|
||||
UTIL_THROW_IF2(succIter == sigmas.end(),
|
||||
"Couldn't find hypothesis " << (*j)->GetId());
|
||||
"Couldn't find hypothesis " << (*j)->GetId());
|
||||
map<Edge,float>::const_iterator edgeScoreIter =
|
||||
edgeScores.find(Edge(i->hypo->GetId(),(*j)->GetId()));
|
||||
UTIL_THROW_IF2(edgeScoreIter == edgeScores.end(),
|
||||
"Couldn't find edge for hypothesis " << (*j)->GetId());
|
||||
"Couldn't find edge for hypothesis " << (*j)->GetId());
|
||||
float term = edgeScoreIter->second + succIter->second; // Add sigma(*j)
|
||||
if (sigma == 0) {
|
||||
sigma = term;
|
||||
@ -387,10 +387,10 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
|
||||
j != outIter->second.end(); ++j) {
|
||||
candidates.push_back(*j);
|
||||
UTIL_THROW_IF2(sigmas.find(*j) == sigmas.end(),
|
||||
"Hypothesis " << (*j)->GetId() << " not found");
|
||||
"Hypothesis " << (*j)->GetId() << " not found");
|
||||
Edge edge(path.back()->GetId(),(*j)->GetId());
|
||||
UTIL_THROW_IF2(edgeScores.find(edge) == edgeScores.end(),
|
||||
"Edge not found");
|
||||
"Edge not found");
|
||||
candidateScores.push_back(sigmas[*j] + edgeScores[edge]);
|
||||
if (scoreTotal == 0) {
|
||||
scoreTotal = candidateScores.back();
|
||||
@ -545,13 +545,14 @@ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo
|
||||
outputWordGraphStream << endl;
|
||||
}
|
||||
|
||||
void Manager::GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *hypo ) {
|
||||
void Manager::GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *hypo )
|
||||
{
|
||||
Phrase translation;
|
||||
hypo->GetOutputPhrase(translation);
|
||||
const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
||||
for (size_t i = 0; i < statefulFFs.size(); ++i) {
|
||||
const StatefulFeatureFunction *ff = statefulFFs[i];
|
||||
if (const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff)) {
|
||||
if (const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff)) {
|
||||
lm->ReportHistoryOrder(out, translation);
|
||||
}
|
||||
}
|
||||
@ -1345,7 +1346,7 @@ void Manager::SerializeSearchGraphPB(
|
||||
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
|
||||
const Hypothesis *loserHypo = *iterArcList;
|
||||
UTIL_THROW_IF2(!connected[loserHypo->GetId()],
|
||||
"Hypothesis " << loserHypo->GetId() << " is not connected");
|
||||
"Hypothesis " << loserHypo->GetId() << " is not connected");
|
||||
Hypergraph_Edge* edge = hg.add_edges();
|
||||
SerializeEdgeInfo(loserHypo, edge);
|
||||
edge->set_head_node(headNodeIdx);
|
||||
|
@ -131,7 +131,7 @@ public:
|
||||
TargetPhraseCollectionWithSourcePhrase const*
|
||||
GetTargetPhraseCollection(Phrase const &src) const {
|
||||
|
||||
assert(m_dict);
|
||||
assert(m_dict);
|
||||
if(src.GetSize()==0) return 0;
|
||||
|
||||
std::pair<MapSrc2Tgt::iterator,bool> piter;
|
||||
@ -314,7 +314,7 @@ public:
|
||||
const std::vector<Phrase> &sourcePhrases) const {
|
||||
// convert into TargetPhraseCollection
|
||||
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
|
||||
"Number of target phrases must equal number of source phrases");
|
||||
"Number of target phrases must equal number of source phrases");
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase;
|
||||
|
||||
@ -351,7 +351,7 @@ public:
|
||||
};
|
||||
|
||||
void CacheSource(ConfusionNet const& src) {
|
||||
assert(m_dict);
|
||||
assert(m_dict);
|
||||
const size_t srcSize=src.GetSize();
|
||||
|
||||
std::vector<size_t> exploredPaths(srcSize+1,0);
|
||||
@ -414,7 +414,7 @@ public:
|
||||
|
||||
//assert that we have the right number of link params in this CN option
|
||||
UTIL_THROW_IF2(currCol[colidx].second.denseScores.size() < m_numInputScores,
|
||||
"Incorrect number of input scores");
|
||||
"Incorrect number of input scores");
|
||||
|
||||
// do not start with epsilon (except at first position)
|
||||
if(isEpsilon && curr.begin()==curr.end() && curr.begin()>0) continue;
|
||||
@ -473,7 +473,7 @@ public:
|
||||
//put input scores in first - already logged, just drop in directly
|
||||
std::vector<float> transcores(m_obj->GetNumScoreComponents());
|
||||
UTIL_THROW_IF2(transcores.size() != weightTrans.size(),
|
||||
"Incorrect number of translation scores");
|
||||
"Incorrect number of translation scores");
|
||||
|
||||
//put in phrase table scores, logging as we insert
|
||||
std::transform(tcands[i].scores.begin()
|
||||
|
@ -560,8 +560,8 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
|
||||
|
||||
vector<float> weights(numFF);
|
||||
for (size_t currFF = 0; currFF < numFF; ++currFF) {
|
||||
UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
|
||||
"Errors converting old phrase-table weights to new weights");
|
||||
UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
|
||||
"Errors converting old phrase-table weights to new weights");
|
||||
float weight = Scan<float>(oldWeights[currOldInd]);
|
||||
weights[currFF] = weight;
|
||||
|
||||
@ -651,8 +651,8 @@ void Parameter::ConvertWeightArgsDistortion()
|
||||
|
||||
vector<float> weights(numFF);
|
||||
for (size_t currFF = 0; currFF < numFF; ++currFF) {
|
||||
UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
|
||||
"Errors converting old distortion weights to new weights");
|
||||
UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
|
||||
"Errors converting old distortion weights to new weights");
|
||||
float weight = Scan<float>(oldWeights[currOldInd]);
|
||||
weights[currFF] = weight;
|
||||
|
||||
@ -666,8 +666,8 @@ void Parameter::ConvertWeightArgsDistortion()
|
||||
|
||||
vector<FactorType> factors = Tokenize<FactorType>(toks[0], "-");
|
||||
UTIL_THROW_IF2(factors.size() != 2,
|
||||
"Error in old factor specification for lexicalized reordering model: "
|
||||
<< toks[0]);
|
||||
"Error in old factor specification for lexicalized reordering model: "
|
||||
<< toks[0]);
|
||||
strme << "input-factor=" << factors[0]
|
||||
<< " output-factor=" << factors[1] << " ";
|
||||
|
||||
@ -731,7 +731,7 @@ void Parameter::ConvertWeightArgsLM()
|
||||
newFeatureName = "KENLM";
|
||||
break;
|
||||
default:
|
||||
UTIL_THROW2("Unkown language model type id:" << lmType);
|
||||
UTIL_THROW2("Unkown language model type id:" << lmType);
|
||||
}
|
||||
|
||||
size_t numFF = 1;
|
||||
@ -740,8 +740,8 @@ void Parameter::ConvertWeightArgsLM()
|
||||
|
||||
vector<float> weightsLM(numFF);
|
||||
for (size_t currFF = 0; currFF < numFF; ++currFF) {
|
||||
UTIL_THROW_IF2(currOldInd >= weights.size(),
|
||||
"Errors converting old LM weights to new weights");
|
||||
UTIL_THROW_IF2(currOldInd >= weights.size(),
|
||||
"Errors converting old LM weights to new weights");
|
||||
weightsLM[currFF] = Scan<float>(weights[currOldInd]);
|
||||
if (isChartDecoding) {
|
||||
weightsLM[currFF] = UntransformLMScore(weightsLM[currFF]);
|
||||
@ -792,8 +792,8 @@ void Parameter::ConvertWeightArgsGeneration(const std::string &oldWeightName, co
|
||||
|
||||
vector<float> weights(numFF);
|
||||
for (size_t currFF = 0; currFF < numFF; ++currFF) {
|
||||
UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
|
||||
"Errors converting old generation weights to new weights");
|
||||
UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
|
||||
"Errors converting old generation weights to new weights");
|
||||
float weight = Scan<float>(oldWeights[currOldInd]);
|
||||
weights[currFF] = weight;
|
||||
|
||||
@ -853,8 +853,8 @@ void Parameter::ConvertPhrasePenalty()
|
||||
{
|
||||
string oldWeightName = "weight-p";
|
||||
if (isParamSpecified(oldWeightName)) {
|
||||
UTIL_THROW_IF2(m_setting[oldWeightName].size() != 1,
|
||||
"There should be only 1 phrase-penalty weight");
|
||||
UTIL_THROW_IF2(m_setting[oldWeightName].size() != 1,
|
||||
"There should be only 1 phrase-penalty weight");
|
||||
float weight = Scan<float>(m_setting[oldWeightName][0]);
|
||||
AddFeature("PhrasePenalty");
|
||||
SetWeight("PhrasePenalty", 0, weight);
|
||||
@ -867,7 +867,7 @@ void Parameter::ConvertWeightArgs()
|
||||
{
|
||||
// can't handle discr LM. must do it manually 'cos of bigram/n-gram split
|
||||
UTIL_THROW_IF2( m_setting.count("weight-dlm") != 0,
|
||||
"Can't handle discr LM. must do it manually 'cos of bigram/n-gram split");
|
||||
"Can't handle discr LM. must do it manually 'cos of bigram/n-gram split");
|
||||
|
||||
// check that old & new format aren't mixed
|
||||
if (m_setting.count("weight") &&
|
||||
@ -912,7 +912,7 @@ void Parameter::CreateWeightsMap()
|
||||
const string &line = vec[i];
|
||||
vector<string> toks = Tokenize(line);
|
||||
UTIL_THROW_IF2(toks.size() < 2,
|
||||
"Error in format of weights: " << line);
|
||||
"Error in format of weights: " << line);
|
||||
|
||||
string name = toks[0];
|
||||
name = name.substr(0, name.size() - 1);
|
||||
@ -936,7 +936,7 @@ void Parameter::WeightOverwrite()
|
||||
|
||||
// should only be on 1 line
|
||||
UTIL_THROW_IF2(vec.size() != 1,
|
||||
"Weight override should only be on 1 line");
|
||||
"Weight override should only be on 1 line");
|
||||
|
||||
string name("");
|
||||
vector<float> weights;
|
||||
@ -1305,8 +1305,8 @@ void Parameter::OverwriteParam(const string ¶mName, PARAM_VEC values)
|
||||
if (m_setting[paramName].size() > 1) {
|
||||
VERBOSE(2," (the parameter had " << m_setting[paramName].size() << " previous values)");
|
||||
UTIL_THROW_IF2(m_setting[paramName].size() != values.size(),
|
||||
"Number of weight override for " << paramName
|
||||
<< " is not the same as the original number of weights");
|
||||
"Number of weight override for " << paramName
|
||||
<< " is not the same as the original number of weights");
|
||||
} else {
|
||||
VERBOSE(2," (the parameter does not have previous values)");
|
||||
m_setting[paramName].resize(values.size());
|
||||
|
@ -83,9 +83,9 @@ void PartialTranslOptColl::Prune()
|
||||
|
||||
// find nth element
|
||||
NTH_ELEMENT4(m_list.begin(),
|
||||
m_list.begin() + m_maxSize,
|
||||
m_list.end(),
|
||||
ComparePartialTranslationOption);
|
||||
m_list.begin() + m_maxSize,
|
||||
m_list.end(),
|
||||
ComparePartialTranslationOption);
|
||||
|
||||
m_worstScore = m_list[ m_maxSize-1 ]->GetFutureScore();
|
||||
// delete the rest
|
||||
|
@ -210,8 +210,8 @@ void Phrase::CreateFromString(FactorDirection direction
|
||||
|
||||
size_t nextPos = annotatedWord.find('[', 1);
|
||||
UTIL_THROW_IF2(nextPos == string::npos,
|
||||
"Incorrect formatting of non-terminal. Should have 2 non-terms, eg. [X][X]. "
|
||||
<< "Current string: " << annotatedWord);
|
||||
"Incorrect formatting of non-terminal. Should have 2 non-terms, eg. [X][X]. "
|
||||
<< "Current string: " << annotatedWord);
|
||||
|
||||
if (direction == Input)
|
||||
annotatedWord = annotatedWord.substr(1, nextPos - 2);
|
||||
|
@ -155,8 +155,8 @@ public:
|
||||
}
|
||||
|
||||
void RemoveWord(size_t pos) {
|
||||
UTIL_THROW_IF2(pos >= m_words.size(),
|
||||
"Referencing position " << pos << " out of bound");
|
||||
UTIL_THROW_IF2(pos >= m_words.size(),
|
||||
"Referencing position " << pos << " out of bound");
|
||||
m_words.erase(m_words.begin() + pos);
|
||||
}
|
||||
|
||||
|
@ -148,7 +148,7 @@ void PrefixTreeMap::GetCandidates(const IPhrase& key, Candidates* cands)
|
||||
return;
|
||||
}
|
||||
UTIL_THROW_IF2(m_Data[key[0]]->findKey(key[0]) >= m_Data[key[0]]->size(),
|
||||
"Key not found: " << key[0]);
|
||||
"Key not found: " << key[0]);
|
||||
|
||||
OFF_T candOffset = m_Data[key[0]]->find(key);
|
||||
if(candOffset == InvalidOffT) {
|
||||
@ -175,7 +175,7 @@ void PrefixTreeMap::GetCandidates(const PPimp& p, Candidates* cands)
|
||||
std::vector< std::string const * > PrefixTreeMap::ConvertPhrase(const IPhrase& p, unsigned int voc) const
|
||||
{
|
||||
UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
|
||||
"Invalid vocab id: " << voc);
|
||||
"Invalid vocab id: " << voc);
|
||||
std::vector< std::string const * > result;
|
||||
result.reserve(p.size());
|
||||
for(IPhrase::const_iterator i = p.begin(); i != p.end(); ++i) {
|
||||
@ -187,7 +187,7 @@ std::vector< std::string const * > PrefixTreeMap::ConvertPhrase(const IPhrase& p
|
||||
IPhrase PrefixTreeMap::ConvertPhrase(const std::vector< std::string >& p, unsigned int voc) const
|
||||
{
|
||||
UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
|
||||
"Invalid vocab id: " << voc);
|
||||
"Invalid vocab id: " << voc);
|
||||
IPhrase result;
|
||||
result.reserve(p.size());
|
||||
for(size_t i = 0; i < p.size(); ++i) {
|
||||
@ -199,14 +199,14 @@ IPhrase PrefixTreeMap::ConvertPhrase(const std::vector< std::string >& p, unsign
|
||||
LabelId PrefixTreeMap::ConvertWord(const std::string& w, unsigned int voc) const
|
||||
{
|
||||
UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
|
||||
"Invalid vocab id: " << voc);
|
||||
"Invalid vocab id: " << voc);
|
||||
return m_Voc[voc]->index(w);
|
||||
}
|
||||
|
||||
std::string PrefixTreeMap::ConvertWord(LabelId w, unsigned int voc) const
|
||||
{
|
||||
UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
|
||||
"Invalid vocab id: " << voc);
|
||||
"Invalid vocab id: " << voc);
|
||||
if(w == PrefixTreeMap::MagicWord) {
|
||||
return "|||";
|
||||
} else if (w == InvalidLabelId) {
|
||||
|
@ -103,7 +103,7 @@ void RuleCube::CreateNeighbor(const RuleCubeItem &item, int dimensionIndex,
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const RuleCube &obj)
|
||||
{
|
||||
out << obj.GetItemSetSize();
|
||||
return out;
|
||||
out << obj.GetItemSetSize();
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ class RuleCubeItemEqualityPred
|
||||
public:
|
||||
bool operator()(const RuleCubeItem *p, const RuleCubeItem *q) const {
|
||||
bool ret = p->GetHypothesisDimensions() == q->GetHypothesisDimensions() &&
|
||||
p->GetTranslationDimension() == q->GetTranslationDimension();
|
||||
p->GetTranslationDimension() == q->GetTranslationDimension();
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
@ -92,7 +92,7 @@ public:
|
||||
~RuleCube();
|
||||
|
||||
float GetTopScore() const {
|
||||
UTIL_THROW_IF2(m_queue.empty(), "Empty queue, nothing to pop");
|
||||
UTIL_THROW_IF2(m_queue.empty(), "Empty queue, nothing to pop");
|
||||
RuleCubeItem *item = m_queue.top();
|
||||
return item->GetScore();
|
||||
}
|
||||
@ -107,8 +107,9 @@ public:
|
||||
return m_transOpt;
|
||||
}
|
||||
|
||||
size_t GetItemSetSize() const
|
||||
{ return m_covered.size(); }
|
||||
size_t GetItemSetSize() const {
|
||||
return m_covered.size();
|
||||
}
|
||||
|
||||
private:
|
||||
typedef boost::unordered_set<RuleCubeItem*,
|
||||
|
@ -102,9 +102,9 @@ private:
|
||||
if (indexIter == s_scoreIndexes.end()) {
|
||||
std::stringstream strme;
|
||||
strme << "ERROR: FeatureFunction: " << sp->GetScoreProducerDescription() <<
|
||||
" not registered with ScoreIndexMap" << std::endl;
|
||||
" not registered with ScoreIndexMap" << std::endl;
|
||||
strme << "You must call ScoreComponentCollection.RegisterScoreProducer() " <<
|
||||
" for every FeatureFunction" << std::endl;
|
||||
" for every FeatureFunction" << std::endl;
|
||||
UTIL_THROW2(strme.str());
|
||||
}
|
||||
return indexIter->second;
|
||||
@ -233,7 +233,7 @@ public:
|
||||
void PlusEquals(const FeatureFunction* sp, const std::vector<float>& scores) {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
UTIL_THROW_IF2(scores.size() != indexes.second - indexes.first,
|
||||
"Number of scores is incorrect");
|
||||
"Number of scores is incorrect");
|
||||
for (size_t i = 0; i < scores.size(); ++i) {
|
||||
m_scores[i + indexes.first] += scores[i];
|
||||
}
|
||||
@ -245,7 +245,7 @@ public:
|
||||
void PlusEquals(const FeatureFunction* sp, float score) {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
|
||||
"Number of scores is incorrect");
|
||||
"Number of scores is incorrect");
|
||||
m_scores[indexes.first] += score;
|
||||
}
|
||||
|
||||
@ -271,7 +271,7 @@ public:
|
||||
void Assign(const FeatureFunction* sp, float score) {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
|
||||
"Feature function must must only contain 1 score");
|
||||
"Feature function must must only contain 1 score");
|
||||
m_scores[indexes.first] = score;
|
||||
}
|
||||
|
||||
@ -302,7 +302,7 @@ public:
|
||||
float PartialInnerProduct(const FeatureFunction* sp, const std::vector<float>& rhs) const {
|
||||
std::vector<float> lhs = GetScoresForProducer(sp);
|
||||
UTIL_THROW_IF2(lhs.size() != rhs.size(),
|
||||
"Number of weights must match number of scores");
|
||||
"Number of weights must match number of scores");
|
||||
return std::inner_product(lhs.begin(), lhs.end(), rhs.begin(), 0.0f);
|
||||
}
|
||||
|
||||
@ -351,7 +351,7 @@ public:
|
||||
float GetScoreForProducer(const FeatureFunction* sp) const {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
UTIL_THROW_IF2(indexes.second - indexes.first != 1,
|
||||
"Feature function must must only contain 1 score");
|
||||
"Feature function must must only contain 1 score");
|
||||
return m_scores[indexes.first];
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,7 @@ Search *Search::CreateSearch(Manager& manager, const InputType &source,
|
||||
case NormalBatch:
|
||||
return new SearchNormalBatch(manager, source, transOptColl);
|
||||
default:
|
||||
UTIL_THROW2("ERROR: search. Aborting\n");
|
||||
UTIL_THROW2("ERROR: search. Aborting\n");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -140,7 +140,7 @@ ExpandHypothesis(const Hypothesis &hypothesis,
|
||||
}
|
||||
m_partial_hypos.push_back(newHypo);
|
||||
} else {
|
||||
UTIL_THROW2("can't use early discarding with batch decoding!");
|
||||
UTIL_THROW2("can't use early discarding with batch decoding!");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -135,7 +135,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
|
||||
std::vector< std::map<std::string, std::string> >::iterator dlt_meta_it = dlt_meta.begin();
|
||||
for (dlt_meta_it = dlt_meta.begin(); dlt_meta_it != dlt_meta.end(); ++dlt_meta_it) {
|
||||
DynamicCacheBasedLanguageModel& cblm = DynamicCacheBasedLanguageModel::InstanceNonConst();
|
||||
std::cerr << "&cblm:|" << &cblm << "|" << std::endl;
|
||||
std::cerr << "&cblm:|" << &cblm << "|" << std::endl;
|
||||
PhraseDictionaryDynamicCacheBased& cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst();
|
||||
if ((*dlt_meta_it).find("cbtm") != (*dlt_meta_it).end()) {
|
||||
if (&cbtm) cbtm.Insert((*dlt_meta_it)["cbtm"]);
|
||||
@ -145,7 +145,7 @@ std::cerr << "&cblm:|" << &cblm << "|" << std::endl;
|
||||
}
|
||||
if ((*dlt_meta_it).find("cbtm-file") != (*dlt_meta_it).end()) {
|
||||
if (&cbtm) cbtm.Load((*dlt_meta_it)["cbtm-file"]);
|
||||
}
|
||||
}
|
||||
if ((*dlt_meta_it).find("cblm") != (*dlt_meta_it).end()) {
|
||||
if (&cblm) cblm.Insert((*dlt_meta_it)["cblm"]);
|
||||
}
|
||||
|
@ -520,15 +520,14 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
string &feature = toks[0];
|
||||
std::map<std::string, std::string>::const_iterator iter = featureNameOverride.find(feature);
|
||||
if (iter == featureNameOverride.end()) {
|
||||
// feature name not override
|
||||
m_registry.Construct(feature, line);
|
||||
}
|
||||
else {
|
||||
// replace feature name with new name
|
||||
string newName = iter->second;
|
||||
feature = newName;
|
||||
string newLine = Join(" ", toks);
|
||||
m_registry.Construct(newName, newLine);
|
||||
// feature name not override
|
||||
m_registry.Construct(feature, line);
|
||||
} else {
|
||||
// replace feature name with new name
|
||||
string newName = iter->second;
|
||||
feature = newName;
|
||||
string newLine = Join(" ", toks);
|
||||
m_registry.Construct(newName, newLine);
|
||||
}
|
||||
}
|
||||
|
||||
@ -633,7 +632,7 @@ void StaticData::LoadNonTerminals()
|
||||
while(getline(inStream, line)) {
|
||||
vector<string> tokens = Tokenize(line);
|
||||
UTIL_THROW_IF2(tokens.size() != 2,
|
||||
"Incorrect unknown LHS format: " << line);
|
||||
"Incorrect unknown LHS format: " << line);
|
||||
UnknownLHSEntry entry(tokens[0], Scan<float>(tokens[1]));
|
||||
m_unknownLHS.push_back(entry);
|
||||
}
|
||||
@ -682,7 +681,7 @@ bool StaticData::LoadDecodeGraphs()
|
||||
decodeGraphInd = Scan<size_t>(token[0]);
|
||||
//the vectorList index can only increment by one
|
||||
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1,
|
||||
"Malformed mapping");
|
||||
"Malformed mapping");
|
||||
if (decodeGraphInd > prevDecodeGraphInd) {
|
||||
prev = NULL;
|
||||
}
|
||||
@ -748,11 +747,11 @@ bool StaticData::LoadDecodeGraphs()
|
||||
// if specified, record maxmimum unseen n-gram size
|
||||
const vector<string> &backoffVector = m_parameter->GetParam("decoding-graph-backoff");
|
||||
for(size_t i=0; i<m_decodeGraphs.size() && i<backoffVector.size(); i++) {
|
||||
DecodeGraph &decodeGraph = *m_decodeGraphs[i];
|
||||
DecodeGraph &decodeGraph = *m_decodeGraphs[i];
|
||||
|
||||
if (i < backoffVector.size()) {
|
||||
decodeGraph.SetBackoff(Scan<size_t>(backoffVector[i]));
|
||||
}
|
||||
if (i < backoffVector.size()) {
|
||||
decodeGraph.SetBackoff(Scan<size_t>(backoffVector[i]));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -995,7 +994,7 @@ bool StaticData::LoadAlternateWeightSettings()
|
||||
currentId = args[1];
|
||||
cerr << "alternate weight setting " << currentId << endl;
|
||||
UTIL_THROW_IF2(m_weightSetting.find(currentId) != m_weightSetting.end(),
|
||||
"Duplicate alternate weight id: " << currentId);
|
||||
"Duplicate alternate weight id: " << currentId);
|
||||
m_weightSetting[ currentId ] = new ScoreComponentCollection;
|
||||
|
||||
// other specifications
|
||||
@ -1040,7 +1039,7 @@ bool StaticData::LoadAlternateWeightSettings()
|
||||
UTIL_THROW_IF2(currentId.empty(), "No alternative weights specified");
|
||||
vector<string> tokens = Tokenize(weightSpecification[i]);
|
||||
UTIL_THROW_IF2(tokens.size() < 2
|
||||
, "Incorrect format for alternate weights: " << weightSpecification[i]);
|
||||
, "Incorrect format for alternate weights: " << weightSpecification[i]);
|
||||
|
||||
// get name and weight values
|
||||
string name = tokens[0];
|
||||
@ -1069,36 +1068,36 @@ bool StaticData::LoadAlternateWeightSettings()
|
||||
|
||||
void StaticData::NoCache()
|
||||
{
|
||||
bool noCache;
|
||||
SetBooleanParameter( &noCache, "no-cache", false );
|
||||
bool noCache;
|
||||
SetBooleanParameter( &noCache, "no-cache", false );
|
||||
|
||||
if (noCache) {
|
||||
const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
|
||||
for (size_t i = 0; i < pts.size(); ++i) {
|
||||
PhraseDictionary &pt = *pts[i];
|
||||
pt.SetParameter("cache-size", "0");
|
||||
}
|
||||
}
|
||||
if (noCache) {
|
||||
const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
|
||||
for (size_t i = 0; i < pts.size(); ++i) {
|
||||
PhraseDictionary &pt = *pts[i];
|
||||
pt.SetParameter("cache-size", "0");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> StaticData::OverrideFeatureNames()
|
||||
{
|
||||
std::map<std::string, std::string> ret;
|
||||
std::map<std::string, std::string> ret;
|
||||
|
||||
const PARAM_VEC ¶ms = m_parameter->GetParam("feature-name-overwrite");
|
||||
if (params.size()) {
|
||||
UTIL_THROW_IF2(params.size() != 1, "Only provide 1 line in the section [feature-name-overwrite]");
|
||||
vector<string> toks = Tokenize(params[0]);
|
||||
UTIL_THROW_IF2(toks.size() % 2 != 0, "Format of -feature-name-overwrite must be [old-name new-name]*");
|
||||
const PARAM_VEC ¶ms = m_parameter->GetParam("feature-name-overwrite");
|
||||
if (params.size()) {
|
||||
UTIL_THROW_IF2(params.size() != 1, "Only provide 1 line in the section [feature-name-overwrite]");
|
||||
vector<string> toks = Tokenize(params[0]);
|
||||
UTIL_THROW_IF2(toks.size() % 2 != 0, "Format of -feature-name-overwrite must be [old-name new-name]*");
|
||||
|
||||
for (size_t i = 0; i < toks.size(); i += 2) {
|
||||
const string &oldName = toks[i];
|
||||
const string &newName = toks[i+1];
|
||||
ret[oldName] = newName;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < toks.size(); i += 2) {
|
||||
const string &oldName = toks[i];
|
||||
const string &newName = toks[i+1];
|
||||
ret[oldName] = newName;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void StaticData::OverrideFeatures()
|
||||
|
@ -645,7 +645,7 @@ public:
|
||||
return false;
|
||||
}
|
||||
std::map< std::string, std::set< std::string > >::const_iterator lookupIgnoreFF
|
||||
= m_weightSettingIgnoreFF.find( m_currentWeightSetting );
|
||||
= m_weightSettingIgnoreFF.find( m_currentWeightSetting );
|
||||
if (lookupIgnoreFF == m_weightSettingIgnoreFF.end()) {
|
||||
return false;
|
||||
}
|
||||
@ -663,7 +663,7 @@ public:
|
||||
return false;
|
||||
}
|
||||
std::map< std::string, std::set< size_t > >::const_iterator lookupIgnoreDP
|
||||
= m_weightSettingIgnoreDP.find( m_currentWeightSetting );
|
||||
= m_weightSettingIgnoreDP.find( m_currentWeightSetting );
|
||||
if (lookupIgnoreDP == m_weightSettingIgnoreDP.end()) {
|
||||
return false;
|
||||
}
|
||||
@ -728,8 +728,9 @@ public:
|
||||
return m_placeHolderFactor;
|
||||
}
|
||||
|
||||
const FeatureRegistry &GetFeatureRegistry() const
|
||||
{ return m_registry; }
|
||||
const FeatureRegistry &GetFeatureRegistry() const {
|
||||
return m_registry;
|
||||
}
|
||||
|
||||
/** check whether we should be using the old code to support binary phrase-table.
|
||||
** eventually, we'll stop support the binary phrase-table and delete this legacy code
|
||||
|
@ -234,7 +234,7 @@ void TargetPhrase::SetProperties(const StringPiece &str)
|
||||
|
||||
vector<string> keyValue = TokenizeFirstOnly(tok, " ");
|
||||
UTIL_THROW_IF2(keyValue.size() != 2,
|
||||
"Incorrect format of property: " << str);
|
||||
"Incorrect format of property: " << str);
|
||||
SetProperty(keyValue[0], keyValue[1]);
|
||||
}
|
||||
}
|
||||
|
@ -99,15 +99,15 @@ public:
|
||||
return m_scoreBreakdown;
|
||||
}
|
||||
|
||||
/*
|
||||
//TODO: Probably shouldn't copy this, but otherwise ownership is unclear
|
||||
void SetSourcePhrase(const Phrase& p) {
|
||||
m_sourcePhrase=p;
|
||||
}
|
||||
const Phrase& GetSourcePhrase() const {
|
||||
return m_sourcePhrase;
|
||||
}
|
||||
*/
|
||||
/*
|
||||
//TODO: Probably shouldn't copy this, but otherwise ownership is unclear
|
||||
void SetSourcePhrase(const Phrase& p) {
|
||||
m_sourcePhrase=p;
|
||||
}
|
||||
const Phrase& GetSourcePhrase() const {
|
||||
return m_sourcePhrase;
|
||||
}
|
||||
*/
|
||||
void SetTargetLHS(const Word *lhs) {
|
||||
m_lhsTarget = lhs;
|
||||
}
|
||||
|
@ -72,15 +72,14 @@ public:
|
||||
|
||||
//! delete an entry from the collection
|
||||
void Remove(const size_t pos) {
|
||||
if (pos < m_collection.size())
|
||||
{
|
||||
if (pos < m_collection.size()) {
|
||||
m_collection.erase(begin() + pos);
|
||||
}
|
||||
}
|
||||
|
||||
//! return an entry of the collection
|
||||
const TargetPhrase* GetTargetPhrase(const size_t pos) const {
|
||||
return m_collection[pos];
|
||||
return m_collection[pos];
|
||||
}
|
||||
|
||||
//! divide collection into 2 buckets using std::nth_element, the top & bottom according to table limit
|
||||
|
@ -10,7 +10,7 @@ namespace Moses
|
||||
|
||||
/***
|
||||
* Return the total wall time that the timer has been in the "running"
|
||||
* state since it was first "started".
|
||||
* state since it was first "started".
|
||||
*/
|
||||
double Timer::get_elapsed_time() const
|
||||
{
|
||||
@ -39,8 +39,7 @@ void Timer::start(const char* msg)
|
||||
if (stopped) {
|
||||
start_time = util::WallTime() - (stop_time - start_time);
|
||||
stopped = false;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
start_time = util::WallTime();
|
||||
running = true;
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ Load(
|
||||
LoadCorpus(Output, targetStrme, m_outputFactors,*m_trgCorpus, m_trgSntBreaks, m_trgVocab);
|
||||
|
||||
UTIL_THROW_IF2(m_srcSntBreaks.size() != m_trgSntBreaks.size(),
|
||||
"Source and target arrays aren't the same size");
|
||||
"Source and target arrays aren't the same size");
|
||||
|
||||
// build suffix arrays and auxilliary arrays
|
||||
cerr << "Building Source Suffix Array...\n";
|
||||
@ -130,7 +130,7 @@ LoadRawAlignments(string& align)
|
||||
vector<int> vtmp;
|
||||
Utils::splitToInt(align, vtmp, "- ");
|
||||
UTIL_THROW_IF2(vtmp.size() % 2 != 0,
|
||||
"Alignment format is incorrect: " << align);
|
||||
"Alignment format is incorrect: " << align);
|
||||
vector<short> vAlgn; // store as short ints for memory
|
||||
for (vector<int>::const_iterator itr = vtmp.begin();
|
||||
itr != vtmp.end(); ++itr) {
|
||||
@ -382,7 +382,7 @@ GetMosesFactorIDs(const SAPhrase& phrase, const Phrase& sourcePhrase) const
|
||||
for(size_t i=0; i < phrase.words.size(); ++i) { // look up trg words
|
||||
Word& word = m_trgVocab->GetWord( phrase.words[i]);
|
||||
UTIL_THROW_IF2(word == m_trgVocab->GetkOOVWord(),
|
||||
"Unknown word at position " << i);
|
||||
"Unknown word at position " << i);
|
||||
targetPhrase->AddWord(word);
|
||||
}
|
||||
// scoring
|
||||
|
@ -35,11 +35,11 @@ std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
|
||||
|
||||
CacheColl::~CacheColl()
|
||||
{
|
||||
for (iterator iter = begin(); iter != end(); ++iter) {
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second;
|
||||
const TargetPhraseCollection *tps = key.first;
|
||||
delete tps;
|
||||
}
|
||||
for (iterator iter = begin(); iter != end(); ++iter) {
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second;
|
||||
const TargetPhraseCollection *tps = key.first;
|
||||
delete tps;
|
||||
}
|
||||
}
|
||||
|
||||
PhraseDictionary::PhraseDictionary(const std::string &line)
|
||||
@ -47,7 +47,7 @@ PhraseDictionary::PhraseDictionary(const std::string &line)
|
||||
,m_tableLimit(20) // default
|
||||
,m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
|
||||
{
|
||||
s_staticColl.push_back(this);
|
||||
s_staticColl.push_back(this);
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
|
@ -60,7 +60,7 @@ class CacheColl : public std::map<size_t, std::pair<const TargetPhraseCollection
|
||||
// 3rd = time of last access
|
||||
|
||||
public:
|
||||
~CacheColl();
|
||||
~CacheColl();
|
||||
};
|
||||
|
||||
/**
|
||||
@ -70,7 +70,7 @@ class PhraseDictionary : public DecodeFeature
|
||||
{
|
||||
public:
|
||||
static const std::vector<PhraseDictionary*>& GetColl() {
|
||||
return s_staticColl;
|
||||
return s_staticColl;
|
||||
}
|
||||
|
||||
PhraseDictionary(const std::string &line);
|
||||
|
@ -35,7 +35,7 @@ PhraseDictionaryDynamicCacheBased *PhraseDictionaryDynamicCacheBased::s_instance
|
||||
|
||||
//! contructor
|
||||
PhraseDictionaryDynamicCacheBased::PhraseDictionaryDynamicCacheBased(const std::string &line)
|
||||
: PhraseDictionary(line)
|
||||
: PhraseDictionary(line)
|
||||
{
|
||||
std::cerr << "Initializing PhraseDictionaryDynamicCacheBased feature..." << std::endl;
|
||||
|
||||
@ -114,15 +114,13 @@ const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhrase
|
||||
TargetPhraseCollection* tpc = NULL;
|
||||
VERBOSE(3,"source:|" << source << "|" << std::endl);
|
||||
cacheMap::const_iterator it = m_cacheTM.find(source);
|
||||
if(it != m_cacheTM.end())
|
||||
{
|
||||
if(it != m_cacheTM.end()) {
|
||||
VERBOSE(3,"source:|" << source << "| FOUND" << std::endl);
|
||||
tpc = (it->second).first;
|
||||
|
||||
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
|
||||
|
||||
while (it2 != tpc->end())
|
||||
{
|
||||
while (it2 != tpc->end()) {
|
||||
((TargetPhrase*) *it2)->Evaluate(source, GetFeaturesToApply());
|
||||
it2++;
|
||||
}
|
||||
@ -145,7 +143,8 @@ ChartRuleLookupManager* PhraseDictionaryDynamicCacheBased::CreateRuleLookupManag
|
||||
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
|
||||
}
|
||||
|
||||
void PhraseDictionaryDynamicCacheBased::SetScoreType(size_t type) {
|
||||
void PhraseDictionaryDynamicCacheBased::SetScoreType(size_t type)
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
|
||||
#endif
|
||||
@ -157,22 +156,22 @@ void PhraseDictionaryDynamicCacheBased::SetScoreType(size_t type) {
|
||||
&& m_score_type != CBTM_SCORE_TYPE_COSINE
|
||||
&& m_score_type != CBTM_SCORE_TYPE_HYPERBOLA_REWARD
|
||||
&& m_score_type != CBTM_SCORE_TYPE_POWER_REWARD
|
||||
&& m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL_REWARD )
|
||||
{
|
||||
&& m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL_REWARD ) {
|
||||
VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBTM_SCORE_TYPE_HYPERBOLA << "." << std::endl);
|
||||
m_score_type = CBTM_SCORE_TYPE_HYPERBOLA;
|
||||
}
|
||||
|
||||
|
||||
VERBOSE(2, "PhraseDictionaryDynamicCacheBased ScoreType: " << m_score_type << std::endl);
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryDynamicCacheBased::SetMaxAge(unsigned int age) {
|
||||
void PhraseDictionaryDynamicCacheBased::SetMaxAge(unsigned int age)
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
|
||||
#endif
|
||||
m_maxAge = age;
|
||||
VERBOSE(2, "PhraseDictionaryCache MaxAge: " << m_maxAge << std::endl);
|
||||
VERBOSE(2, "PhraseDictionaryCache MaxAge: " << m_maxAge << std::endl);
|
||||
}
|
||||
|
||||
|
||||
@ -185,7 +184,7 @@ ostream& operator<<(ostream& out, const PhraseDictionaryDynamicCacheBased& phras
|
||||
float PhraseDictionaryDynamicCacheBased::decaying_score(const int age)
|
||||
{
|
||||
float sc;
|
||||
switch(m_score_type){
|
||||
switch(m_score_type) {
|
||||
case CBTM_SCORE_TYPE_HYPERBOLA:
|
||||
sc = (float) 1.0/age - 1.0;
|
||||
break;
|
||||
@ -218,28 +217,23 @@ void PhraseDictionaryDynamicCacheBased::SetPreComputedScores(const unsigned int
|
||||
VERBOSE(3,"m_maxAge:|" << m_maxAge << "|" << std::endl);
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
|
||||
#endif
|
||||
#endif
|
||||
float sc;
|
||||
for (size_t i=0; i<=m_maxAge; i++)
|
||||
{
|
||||
if (i==m_maxAge){
|
||||
for (size_t i=0; i<=m_maxAge; i++) {
|
||||
if (i==m_maxAge) {
|
||||
if ( m_score_type == CBTM_SCORE_TYPE_HYPERBOLA
|
||||
|| m_score_type == CBTM_SCORE_TYPE_POWER
|
||||
|| m_score_type == CBTM_SCORE_TYPE_EXPONENTIAL
|
||||
|| m_score_type == CBTM_SCORE_TYPE_COSINE )
|
||||
{
|
||||
|| m_score_type == CBTM_SCORE_TYPE_POWER
|
||||
|| m_score_type == CBTM_SCORE_TYPE_EXPONENTIAL
|
||||
|| m_score_type == CBTM_SCORE_TYPE_COSINE ) {
|
||||
sc = decaying_score(m_maxAge)/numScoreComponent;
|
||||
}
|
||||
else{ // m_score_type = CBTM_SCORE_TYPE_XXXXXXXXX_REWARD
|
||||
} else { // m_score_type = CBTM_SCORE_TYPE_XXXXXXXXX_REWARD
|
||||
sc = 0.0;
|
||||
}
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
sc = decaying_score(i)/numScoreComponent;
|
||||
}
|
||||
Scores sc_vec;
|
||||
for (size_t j=0; j<numScoreComponent; j++)
|
||||
{
|
||||
Scores sc_vec;
|
||||
for (size_t j=0; j<numScoreComponent; j++) {
|
||||
sc_vec.push_back(sc); //CHECK THIS SCORE
|
||||
}
|
||||
precomputedScores.push_back(sc_vec);
|
||||
@ -250,12 +244,9 @@ void PhraseDictionaryDynamicCacheBased::SetPreComputedScores(const unsigned int
|
||||
Scores PhraseDictionaryDynamicCacheBased::GetPreComputedScores(const unsigned int age)
|
||||
{
|
||||
VERBOSE(3,"age:|" << age << "|" << std::endl);
|
||||
if (age < precomputedScores.size())
|
||||
{
|
||||
if (age < precomputedScores.size()) {
|
||||
return precomputedScores.at(age);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
return precomputedScores.at(m_maxAge);
|
||||
}
|
||||
}
|
||||
@ -285,13 +276,12 @@ void PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries,
|
||||
std::vector<std::string> pp;
|
||||
|
||||
std::vector<std::string>::iterator it;
|
||||
for(it = entries.begin(); it!=entries.end(); it++)
|
||||
{
|
||||
for(it = entries.begin(); it!=entries.end(); it++) {
|
||||
pp.clear();
|
||||
pp = TokenizeMultiCharSeparator((*it), "|||");
|
||||
VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
|
||||
VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
|
||||
|
||||
|
||||
Update(pp[0], pp[1], ageString);
|
||||
}
|
||||
}
|
||||
@ -303,7 +293,7 @@ void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, s
|
||||
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
|
||||
Phrase sourcePhrase(0);
|
||||
Phrase targetPhrase(0);
|
||||
|
||||
|
||||
char *err_ind_temp;
|
||||
int age = strtod(ageString.c_str(), &err_ind_temp);
|
||||
//target
|
||||
@ -311,7 +301,7 @@ void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, s
|
||||
VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
|
||||
targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, factorDelimiter, NULL);
|
||||
VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl);
|
||||
|
||||
|
||||
//TODO: Would be better to reuse source phrases, but ownership has to be
|
||||
//consistent across phrase table implementations
|
||||
sourcePhrase.Clear();
|
||||
@ -331,8 +321,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age)
|
||||
|
||||
cacheMap::const_iterator it = m_cacheTM.find(sp);
|
||||
VERBOSE(3,"sp:|" << sp << "|" << std::endl);
|
||||
if(it!=m_cacheTM.end())
|
||||
{
|
||||
if(it!=m_cacheTM.end()) {
|
||||
VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
|
||||
// p is found
|
||||
// here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap
|
||||
@ -344,18 +333,15 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age)
|
||||
const Phrase* tp_ptr = NULL;
|
||||
bool found = false;
|
||||
size_t tp_pos=0;
|
||||
while (!found && tp_pos < tpc->GetSize())
|
||||
{
|
||||
while (!found && tp_pos < tpc->GetSize()) {
|
||||
tp_ptr = (const Phrase*) tpc->GetTargetPhrase(tp_pos);
|
||||
if (tp == *tp_ptr)
|
||||
{
|
||||
if (tp == *tp_ptr) {
|
||||
found = true;
|
||||
continue;
|
||||
}
|
||||
tp_pos++;
|
||||
}
|
||||
if (!found)
|
||||
{
|
||||
if (!found) {
|
||||
VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
|
||||
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
|
||||
|
||||
@ -368,9 +354,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age)
|
||||
VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl);
|
||||
VERBOSE(3,"tp:|" << tp << "| INSERTED" << std::endl);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
|
||||
// p is not found
|
||||
// create target collection
|
||||
@ -397,21 +381,19 @@ void PhraseDictionaryDynamicCacheBased::Decay()
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
|
||||
#endif
|
||||
#endif
|
||||
cacheMap::iterator it;
|
||||
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++)
|
||||
{
|
||||
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
|
||||
Decay((*it).first);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryDynamicCacheBased::Decay(Phrase p)
|
||||
{
|
||||
VERBOSE(3,"p:|" << p << "|" << std::endl);
|
||||
cacheMap::const_iterator it = m_cacheTM.find(p);
|
||||
VERBOSE(3,"searching:|" << p << "|" << std::endl);
|
||||
if (it != m_cacheTM.end())
|
||||
{
|
||||
if (it != m_cacheTM.end()) {
|
||||
VERBOSE(3,"found:|" << p << "|" << std::endl);
|
||||
//p is found
|
||||
|
||||
@ -420,8 +402,7 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase p)
|
||||
AgeCollection* ac = TgtCollAgePair.second;
|
||||
|
||||
//loop in inverted order to allow a correct deletion of std::vectors tpc and ac
|
||||
for (int tp_pos = tpc->GetSize() - 1 ; tp_pos >= 0; tp_pos--)
|
||||
{
|
||||
for (int tp_pos = tpc->GetSize() - 1 ; tp_pos >= 0; tp_pos--) {
|
||||
VERBOSE(3,"p:|" << p << "|" << std::endl);
|
||||
unsigned int tp_age = ac->at(tp_pos); //increase the age by 1
|
||||
tp_age++; //increase the age by 1
|
||||
@ -431,29 +412,26 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase p)
|
||||
VERBOSE(3,"p:|" << p << "| " << "tp_age:|" << tp_age << "| " << "*tp_ptr:|" << *tp_ptr << "|" << std::endl);
|
||||
VERBOSE(3,"precomputedScores.size():|" << precomputedScores.size() << "|" << std::endl);
|
||||
|
||||
if (tp_age > m_maxAge){
|
||||
if (tp_age > m_maxAge) {
|
||||
VERBOSE(3,"tp_age:|" << tp_age << "| TOO BIG" << std::endl);
|
||||
tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection
|
||||
ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection
|
||||
m_entries--;
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
VERBOSE(3,"tp_age:|" << tp_age << "| STILL GOOD" << std::endl);
|
||||
tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(tp_age));
|
||||
ac->at(tp_pos) = tp_age;
|
||||
VERBOSE(3,"precomputedScores.size():|" << precomputedScores.size() << "|" << std::endl);
|
||||
}
|
||||
}
|
||||
if (tpc->GetSize() == 0)
|
||||
{// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
||||
if (tpc->GetSize() == 0) {
|
||||
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
||||
(((*it).second).second)->clear();
|
||||
delete ((*it).second).second;
|
||||
delete ((*it).second).first;
|
||||
m_cacheTM.erase(p);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
//do nothing
|
||||
VERBOSE(3,"p:|" << p << "| NOT FOUND" << std::endl);
|
||||
}
|
||||
@ -493,8 +471,7 @@ void PhraseDictionaryDynamicCacheBased::Clear()
|
||||
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
|
||||
#endif
|
||||
cacheMap::const_iterator it;
|
||||
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++)
|
||||
{
|
||||
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
|
||||
(((*it).second).second)->clear();
|
||||
delete ((*it).second).second;
|
||||
delete ((*it).second).first;
|
||||
@ -508,20 +485,18 @@ void PhraseDictionaryDynamicCacheBased::Print() const
|
||||
VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Print()" << std::endl);
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
|
||||
#endif
|
||||
#endif
|
||||
cacheMap::const_iterator it;
|
||||
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++)
|
||||
{
|
||||
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
|
||||
std::string source = (it->first).ToString();
|
||||
TargetPhraseCollection* tpc = (it->second).first;
|
||||
TargetPhraseCollection::iterator itr;
|
||||
for(itr = tpc->begin(); itr != tpc->end(); itr++)
|
||||
{
|
||||
for(itr = tpc->begin(); itr != tpc->end(); itr++) {
|
||||
std::string target = (*itr)->ToString();
|
||||
std::cout << source << " ||| " << target << std::endl;
|
||||
}
|
||||
source.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}// end namespace
|
||||
|
@ -42,9 +42,10 @@
|
||||
#define PI 3.14159265
|
||||
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/** Implementation of a Cache-based phrase table.
|
||||
/** Implementation of a Cache-based phrase table.
|
||||
*/
|
||||
class PhraseDictionaryDynamicCacheBased : public PhraseDictionary
|
||||
{
|
||||
@ -106,7 +107,7 @@ public:
|
||||
|
||||
void SetScoreType(size_t type);
|
||||
void SetMaxAge(unsigned int age);
|
||||
|
||||
|
||||
protected:
|
||||
static PhraseDictionaryDynamicCacheBased *s_instance;
|
||||
|
||||
|
@ -98,9 +98,9 @@ PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase
|
||||
const Word &sourceNonTerm = word;
|
||||
|
||||
UTIL_THROW_IF2(iterAlign == alignmentInfo.end(),
|
||||
"No alignment for non-term at position " << pos);
|
||||
"No alignment for non-term at position " << pos);
|
||||
UTIL_THROW_IF2(iterAlign->first != pos,
|
||||
"Alignment info incorrect at position " << pos);
|
||||
"Alignment info incorrect at position " << pos);
|
||||
|
||||
size_t targetNonTermInd = iterAlign->second;
|
||||
++iterAlign;
|
||||
@ -112,7 +112,7 @@ PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase
|
||||
}
|
||||
|
||||
UTIL_THROW_IF2(currNode == NULL,
|
||||
"Node not found at position " << pos);
|
||||
"Node not found at position " << pos);
|
||||
}
|
||||
|
||||
// finally, the source LHS
|
||||
|
@ -38,18 +38,18 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
|
||||
|
||||
size_t numWeights = m_numScoreComponents;
|
||||
UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &
|
||||
m_pdStr.size()*numWeights != m_multimodelweights.size(),
|
||||
"Number of scores and weights are not equal");
|
||||
m_pdStr.size()*numWeights != m_multimodelweights.size(),
|
||||
"Number of scores and weights are not equal");
|
||||
}
|
||||
|
||||
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::string &line)
|
||||
:PhraseDictionary(line)
|
||||
{
|
||||
if (type == 1) {
|
||||
// PhraseDictionaryMultiModelCounts
|
||||
// PhraseDictionaryMultiModelCounts
|
||||
UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &&
|
||||
m_pdStr.size()*4 != m_multimodelweights.size(),
|
||||
"Number of scores and weights are not equal");
|
||||
m_pdStr.size()*4 != m_multimodelweights.size(),
|
||||
"Number of scores and weights are not equal");
|
||||
}
|
||||
}
|
||||
|
||||
@ -80,7 +80,7 @@ void PhraseDictionaryMultiModel::Load()
|
||||
|
||||
PhraseDictionary *pt = FindPhraseDictionary(ptName);
|
||||
UTIL_THROW_IF2(pt == NULL,
|
||||
"Could not find component phrase table " << ptName);
|
||||
"Could not find component phrase table " << ptName);
|
||||
m_pd.push_back(pt);
|
||||
}
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(const std::st
|
||||
ReadParameters();
|
||||
|
||||
UTIL_THROW_IF2(m_targetTable.size() != m_pdStr.size(),
|
||||
"List of phrase tables and target tables must be equal");
|
||||
"List of phrase tables and target tables must be equal");
|
||||
|
||||
}
|
||||
|
||||
@ -89,11 +89,11 @@ void PhraseDictionaryMultiModelCounts::SetParameter(const std::string& key, cons
|
||||
} else if (key == "lex-e2f") {
|
||||
m_lexE2FStr = Tokenize(value, ",");
|
||||
UTIL_THROW_IF2(m_lexE2FStr.size() != m_pdStr.size(),
|
||||
"Number of scores for lexical probability p(f|e) incorrectly specified");
|
||||
"Number of scores for lexical probability p(f|e) incorrectly specified");
|
||||
} else if (key == "lex-f2e") {
|
||||
m_lexF2EStr = Tokenize(value, ",");
|
||||
UTIL_THROW_IF2(m_lexF2EStr.size() != m_pdStr.size(),
|
||||
"Number of scores for lexical probability p(e|f) incorrectly specified");
|
||||
"Number of scores for lexical probability p(e|f) incorrectly specified");
|
||||
} else if (key == "target-table") {
|
||||
m_targetTable = Tokenize(value, ",");
|
||||
} else {
|
||||
@ -119,14 +119,14 @@ void PhraseDictionaryMultiModelCounts::Load()
|
||||
PhraseDictionary *pt;
|
||||
pt = FindPhraseDictionary(ptName);
|
||||
UTIL_THROW_IF2(pt == NULL,
|
||||
"Could not find component phrase table " << ptName);
|
||||
"Could not find component phrase table " << ptName);
|
||||
m_pd.push_back(pt);
|
||||
|
||||
// reverse
|
||||
const string &target_table = m_targetTable[i];
|
||||
pt = FindPhraseDictionary(target_table);
|
||||
UTIL_THROW_IF2(pt == NULL,
|
||||
"Could not find component phrase table " << target_table);
|
||||
"Could not find component phrase table " << target_table);
|
||||
m_inverse_pd.push_back(pt);
|
||||
|
||||
// lex
|
||||
|
@ -64,9 +64,9 @@ PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const W
|
||||
PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceNonTerm, const Word &targetNonTerm)
|
||||
{
|
||||
UTIL_THROW_IF2(!sourceNonTerm.IsNonTerminal(),
|
||||
"Not a non-terminal: " << sourceNonTerm);
|
||||
"Not a non-terminal: " << sourceNonTerm);
|
||||
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
|
||||
"Not a non-terminal: " << targetNonTerm);
|
||||
"Not a non-terminal: " << targetNonTerm);
|
||||
|
||||
NonTerminalMapKey key(sourceNonTerm, targetNonTerm);
|
||||
return &m_nonTermMap[NonTerminalMapKey(sourceNonTerm, targetNonTerm)];
|
||||
@ -75,7 +75,7 @@ PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const W
|
||||
const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetChild(const Word &sourceTerm) const
|
||||
{
|
||||
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(),
|
||||
"Not a terminal: " << sourceTerm);
|
||||
"Not a terminal: " << sourceTerm);
|
||||
|
||||
TerminalMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
|
||||
return (p == m_sourceTermMap.end()) ? NULL : &p->second;
|
||||
@ -84,9 +84,9 @@ const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetChild(const Wor
|
||||
const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const
|
||||
{
|
||||
UTIL_THROW_IF2(!sourceNonTerm.IsNonTerminal(),
|
||||
"Not a non-terminal: " << sourceNonTerm);
|
||||
"Not a non-terminal: " << sourceNonTerm);
|
||||
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
|
||||
"Not a non-terminal: " << targetNonTerm);
|
||||
"Not a non-terminal: " << targetNonTerm);
|
||||
|
||||
NonTerminalMapKey key(sourceNonTerm, targetNonTerm);
|
||||
NonTerminalMap::const_iterator p = m_nonTermMap.find(key);
|
||||
|
@ -14,20 +14,20 @@ PhraseDictionaryTransliteration::PhraseDictionaryTransliteration(const std::stri
|
||||
{
|
||||
ReadParameters();
|
||||
UTIL_THROW_IF2(m_mosesDir.empty() ||
|
||||
m_scriptDir.empty() ||
|
||||
m_externalDir.empty() ||
|
||||
m_inputLang.empty() ||
|
||||
m_outputLang.empty(), "Must specify all arguments");
|
||||
m_scriptDir.empty() ||
|
||||
m_externalDir.empty() ||
|
||||
m_inputLang.empty() ||
|
||||
m_outputLang.empty(), "Must specify all arguments");
|
||||
}
|
||||
|
||||
void PhraseDictionaryTransliteration::Load()
|
||||
{
|
||||
SetFeaturesToApply();
|
||||
SetFeaturesToApply();
|
||||
}
|
||||
|
||||
void PhraseDictionaryTransliteration::CleanUpAfterSentenceProcessing(const InputType& source)
|
||||
{
|
||||
ReduceCache();
|
||||
ReduceCache();
|
||||
}
|
||||
|
||||
void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
||||
@ -38,14 +38,14 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input
|
||||
InputPath &inputPath = **iter;
|
||||
|
||||
if (!SatisfyBackoff(inputPath)) {
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||
|
||||
if (sourcePhrase.GetSize() != 1) {
|
||||
// only translit single words. A limitation of the translit script
|
||||
continue;
|
||||
// only translit single words. A limitation of the translit script
|
||||
continue;
|
||||
}
|
||||
|
||||
GetTargetPhraseCollection(inputPath);
|
||||
@ -54,90 +54,89 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input
|
||||
|
||||
void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &inputPath) const
|
||||
{
|
||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||
size_t hash = hash_value(sourcePhrase);
|
||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||
size_t hash = hash_value(sourcePhrase);
|
||||
|
||||
CacheColl &cache = GetCache();
|
||||
CacheColl &cache = GetCache();
|
||||
|
||||
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
|
||||
iter = cache.find(hash);
|
||||
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
|
||||
iter = cache.find(hash);
|
||||
|
||||
if (iter != cache.end()) {
|
||||
// already in cache
|
||||
const TargetPhraseCollection *tpColl = iter->second.first;
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
if (iter != cache.end()) {
|
||||
// already in cache
|
||||
const TargetPhraseCollection *tpColl = iter->second.first;
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
} else {
|
||||
// TRANSLITERATE
|
||||
char *ptr = tmpnam(NULL);
|
||||
string inFile(ptr);
|
||||
ptr = tmpnam(NULL);
|
||||
string outDir(ptr);
|
||||
|
||||
ofstream inStream(inFile.c_str());
|
||||
inStream << sourcePhrase.ToString() << endl;
|
||||
inStream.close();
|
||||
|
||||
string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
|
||||
" --transliteration-model-dir " + m_filePath +
|
||||
" --moses-src-dir " + m_mosesDir +
|
||||
" --external-bin-dir " + m_externalDir +
|
||||
" --input-extension " + m_inputLang +
|
||||
" --output-extension " + m_outputLang +
|
||||
" --oov-file " + inFile +
|
||||
" --out-dir " + outDir;
|
||||
|
||||
int ret = system(cmd.c_str());
|
||||
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
|
||||
|
||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
||||
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
|
||||
vector<TargetPhrase*>::const_iterator iter;
|
||||
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
|
||||
TargetPhrase *tp = *iter;
|
||||
tpColl->Add(tp);
|
||||
}
|
||||
else {
|
||||
// TRANSLITERATE
|
||||
char *ptr = tmpnam(NULL);
|
||||
string inFile(ptr);
|
||||
ptr = tmpnam(NULL);
|
||||
string outDir(ptr);
|
||||
|
||||
ofstream inStream(inFile.c_str());
|
||||
inStream << sourcePhrase.ToString() << endl;
|
||||
inStream.close();
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
|
||||
cache[hash] = value;
|
||||
|
||||
string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
|
||||
" --transliteration-model-dir " + m_filePath +
|
||||
" --moses-src-dir " + m_mosesDir +
|
||||
" --external-bin-dir " + m_externalDir +
|
||||
" --input-extension " + m_inputLang +
|
||||
" --output-extension " + m_outputLang +
|
||||
" --oov-file " + inFile +
|
||||
" --out-dir " + outDir;
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
|
||||
int ret = system(cmd.c_str());
|
||||
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
|
||||
// clean up temporary files
|
||||
remove(inFile.c_str());
|
||||
|
||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
||||
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
|
||||
vector<TargetPhrase*>::const_iterator iter;
|
||||
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
|
||||
TargetPhrase *tp = *iter;
|
||||
tpColl->Add(tp);
|
||||
}
|
||||
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
|
||||
cache[hash] = value;
|
||||
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
|
||||
// clean up temporary files
|
||||
remove(inFile.c_str());
|
||||
|
||||
cmd = "rm -rf " + outDir;
|
||||
system(cmd.c_str());
|
||||
}
|
||||
cmd = "rm -rf " + outDir;
|
||||
system(cmd.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(const Phrase &sourcePhrase, const string &outDir) const
|
||||
{
|
||||
std::vector<TargetPhrase*> ret;
|
||||
std::vector<TargetPhrase*> ret;
|
||||
|
||||
string outPath = outDir + "/out.txt";
|
||||
ifstream outStream(outPath.c_str());
|
||||
string outPath = outDir + "/out.txt";
|
||||
ifstream outStream(outPath.c_str());
|
||||
|
||||
string line;
|
||||
while (getline(outStream, line)) {
|
||||
vector<string> toks;
|
||||
Tokenize(toks, line, "\t");
|
||||
UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
|
||||
string line;
|
||||
while (getline(outStream, line)) {
|
||||
vector<string> toks;
|
||||
Tokenize(toks, line, "\t");
|
||||
UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
|
||||
|
||||
TargetPhrase *tp = new TargetPhrase();
|
||||
Word &word = tp->AddWord();
|
||||
word.CreateFromString(Output, m_output, toks[0], false);
|
||||
TargetPhrase *tp = new TargetPhrase();
|
||||
Word &word = tp->AddWord();
|
||||
word.CreateFromString(Output, m_output, toks[0], false);
|
||||
|
||||
float score = Scan<float>(toks[1]);
|
||||
tp->GetScoreBreakdown().PlusEquals(this, score);
|
||||
float score = Scan<float>(toks[1]);
|
||||
tp->GetScoreBreakdown().PlusEquals(this, score);
|
||||
|
||||
// score of all other ff when this rule is being loaded
|
||||
tp->Evaluate(sourcePhrase, GetFeaturesToApply());
|
||||
// score of all other ff when this rule is being loaded
|
||||
tp->Evaluate(sourcePhrase, GetFeaturesToApply());
|
||||
|
||||
ret.push_back(tp);
|
||||
}
|
||||
ret.push_back(tp);
|
||||
}
|
||||
|
||||
outStream.close();
|
||||
outStream.close();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -145,7 +144,7 @@ std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(
|
||||
ChartRuleLookupManager* PhraseDictionaryTransliteration::CreateRuleLookupManager(const ChartParser &parser,
|
||||
const ChartCellCollectionBase &cellCollection)
|
||||
{
|
||||
return NULL;
|
||||
return NULL;
|
||||
//return new ChartRuleLookupManagerSkeleton(parser, cellCollection, *this);
|
||||
}
|
||||
|
||||
@ -154,17 +153,17 @@ PhraseDictionaryTransliteration::
|
||||
SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "moses-dir") {
|
||||
m_mosesDir = value;
|
||||
m_mosesDir = value;
|
||||
} else if (key == "script-dir") {
|
||||
m_scriptDir = value;
|
||||
m_scriptDir = value;
|
||||
} else if (key == "external-dir") {
|
||||
m_externalDir = value;
|
||||
m_externalDir = value;
|
||||
} else if (key == "input-lang") {
|
||||
m_inputLang = value;
|
||||
m_inputLang = value;
|
||||
} else if (key == "output-lang") {
|
||||
m_outputLang = value;
|
||||
m_outputLang = value;
|
||||
} else {
|
||||
PhraseDictionary::SetParameter(key, value);
|
||||
PhraseDictionary::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
@ -177,25 +176,25 @@ bool PhraseDictionaryTransliteration::SatisfyBackoff(const InputPath &inputPath)
|
||||
size_t backoff = decodeGraph->GetBackoff();
|
||||
|
||||
if (backoff == 0) {
|
||||
// ie. don't backoff. Collect ALL translations
|
||||
return true;
|
||||
// ie. don't backoff. Collect ALL translations
|
||||
return true;
|
||||
}
|
||||
|
||||
if (sourcePhrase.GetSize() > backoff) {
|
||||
// source phrase too big
|
||||
return false;
|
||||
// source phrase too big
|
||||
return false;
|
||||
}
|
||||
|
||||
// lookup translation only if no other translations
|
||||
InputPath::TargetPhrases::const_iterator iter;
|
||||
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
|
||||
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
|
||||
const TargetPhraseCollection *tpCollPrev = temp.first;
|
||||
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
|
||||
const TargetPhraseCollection *tpCollPrev = temp.first;
|
||||
|
||||
if (tpCollPrev && tpCollPrev->GetSize()) {
|
||||
// already have translation from another pt. Don't create translations
|
||||
return false;
|
||||
}
|
||||
if (tpCollPrev && tpCollPrev->GetSize()) {
|
||||
// already have translation from another pt. Don't create translations
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -278,7 +278,7 @@ public:
|
||||
}
|
||||
|
||||
PPtr Extend(PPtr p,const std::string& w) {
|
||||
UTIL_THROW_IF2(p == NULL, "Error");
|
||||
UTIL_THROW_IF2(p == NULL, "Error");
|
||||
|
||||
if(w.empty() || w==EPSILON) return p;
|
||||
|
||||
@ -380,8 +380,8 @@ PhraseDictionaryTree::PhraseDictionaryTree()
|
||||
: imp(new PDTimp)
|
||||
{
|
||||
if(sizeof(OFF_T)!=8) {
|
||||
UTIL_THROW2("ERROR: size of type 'OFF_T' has to be 64 bit!\n"
|
||||
"In gcc, use compiler settings '-D_FILE_OFFSET_BITS=64 -D_LARGE_FILES'\n");
|
||||
UTIL_THROW2("ERROR: size of type 'OFF_T' has to be 64 bit!\n"
|
||||
"In gcc, use compiler settings '-D_FILE_OFFSET_BITS=64 -D_LARGE_FILES'\n");
|
||||
}
|
||||
}
|
||||
|
||||
@ -500,7 +500,7 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
|
||||
// init numElement
|
||||
numElement = tokens.size();
|
||||
UTIL_THROW_IF2(numElement < (PrintWordAlignment()?4:3),
|
||||
"Format error");
|
||||
"Format error");
|
||||
}
|
||||
|
||||
if (tokens.size() != numElement) {
|
||||
@ -549,8 +549,8 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
|
||||
PSA::Data& d=psa->insert(f);
|
||||
if(d==InvalidOffT) d=fTell(ot);
|
||||
else {
|
||||
UTIL_THROW2("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '"
|
||||
<<line);
|
||||
UTIL_THROW2("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '"
|
||||
<<line);
|
||||
}
|
||||
}
|
||||
|
||||
@ -559,8 +559,8 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
|
||||
if (!sparseFeatureString.empty()) {
|
||||
std::vector<std::string> sparseTokens = Tokenize(sparseFeatureString);
|
||||
if (sparseTokens.size() % 2 != 0) {
|
||||
UTIL_THROW2("ERROR: incorrectly formatted sparse feature string: " <<
|
||||
sparseFeatureString);
|
||||
UTIL_THROW2("ERROR: incorrectly formatted sparse feature string: " <<
|
||||
sparseFeatureString);
|
||||
}
|
||||
for (size_t i = 0; i < sparseTokens.size(); i+=2) {
|
||||
fnames.push_back(imp->tv.add(sparseTokens[i]));
|
||||
@ -601,13 +601,13 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
|
||||
PSA::Data& d=psa->insert(f);
|
||||
if(d==InvalidOffT) d=fTell(ot);
|
||||
else {
|
||||
UTIL_THROW2("ERROR: xsource phrase already inserted (B)!\nline(" << lnc << "): '"
|
||||
<<line);
|
||||
UTIL_THROW2("ERROR: xsource phrase already inserted (B)!\nline(" << lnc << "): '"
|
||||
<<line);
|
||||
}
|
||||
}
|
||||
tgtCands.push_back(TgtCand(e,sc, alignmentString));
|
||||
UTIL_THROW_IF2(currFirstWord == InvalidLabelId,
|
||||
"Uninitialize word");
|
||||
"Uninitialize word");
|
||||
tgtCands.back().SetFeatures(fnames, fvalues);
|
||||
}
|
||||
if (PrintWordAlignment())
|
||||
|
@ -53,7 +53,7 @@ void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
|
||||
if(m_numScoreComponents!=weight.size()) {
|
||||
std::stringstream strme;
|
||||
UTIL_THROW2("ERROR: mismatch of number of scaling factors: " << weight.size()
|
||||
<< " " << m_numScoreComponents);
|
||||
<< " " << m_numScoreComponents);
|
||||
}
|
||||
|
||||
obj->Create(m_input, m_output, m_filePath, weight);
|
||||
|
@ -14,7 +14,7 @@ SkeletonPT::SkeletonPT(const std::string &line)
|
||||
|
||||
void SkeletonPT::Load()
|
||||
{
|
||||
SetFeaturesToApply();
|
||||
SetFeaturesToApply();
|
||||
}
|
||||
|
||||
void SkeletonPT::InitializeForInput(InputType const& source)
|
||||
@ -37,8 +37,8 @@ void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQu
|
||||
|
||||
// add target phrase to phrase-table cache
|
||||
size_t hash = hash_value(sourcePhrase);
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
|
||||
cache[hash] = value;
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
|
||||
cache[hash] = value;
|
||||
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
}
|
||||
|
@ -80,14 +80,14 @@ void TranslationOption::Evaluate(const InputType &input)
|
||||
const InputPath &TranslationOption::GetInputPath() const
|
||||
{
|
||||
UTIL_THROW_IF2(m_inputPath == NULL,
|
||||
"No input path");
|
||||
"No input path");
|
||||
return *m_inputPath;
|
||||
}
|
||||
|
||||
void TranslationOption::SetInputPath(const InputPath &inputPath)
|
||||
{
|
||||
UTIL_THROW_IF2(m_inputPath,
|
||||
"Input path already specified");
|
||||
"Input path already specified");
|
||||
m_inputPath = &inputPath;
|
||||
}
|
||||
|
||||
|
@ -108,7 +108,7 @@ void TranslationOptionCollection::Prune()
|
||||
if (m_maxNoTransOptPerCoverage > 0 &&
|
||||
fullList.size() > m_maxNoTransOptPerCoverage) {
|
||||
// sort in vector
|
||||
NTH_ELEMENT4(fullList.begin(), fullList.begin() + m_maxNoTransOptPerCoverage, fullList.end(), CompareTranslationOption);
|
||||
NTH_ELEMENT4(fullList.begin(), fullList.begin() + m_maxNoTransOptPerCoverage, fullList.end(), CompareTranslationOption);
|
||||
totalPruned += fullList.size() - m_maxNoTransOptPerCoverage;
|
||||
|
||||
// delete the rest
|
||||
@ -392,7 +392,7 @@ void TranslationOptionCollection::CreateTranslationOptions()
|
||||
// ... and that end at endPos
|
||||
for (size_t endPos = startPos ; endPos < startPos + maxSize ; endPos++) {
|
||||
if (graphInd > 0 && // only skip subsequent graphs
|
||||
backoff != 0 && // use of backoff specified
|
||||
backoff != 0 && // use of backoff specified
|
||||
(endPos-startPos+1 >= backoff || // size exceeds backoff limit or ...
|
||||
m_collection[startPos][endPos-startPos].size() > 0)) { // no phrases found so far
|
||||
VERBOSE(3,"No backoff to graph " << graphInd << " for span [" << startPos << ";" << endPos << "]" << endl);
|
||||
@ -611,7 +611,7 @@ void TranslationOptionCollection::Add(TranslationOption *translationOption)
|
||||
{
|
||||
const WordsRange &coverage = translationOption->GetSourceWordsRange();
|
||||
UTIL_THROW_IF2(coverage.GetEndPos() - coverage.GetStartPos() >= m_collection[coverage.GetStartPos()].size(),
|
||||
"Out of bound access: " << coverage);
|
||||
"Out of bound access: " << coverage);
|
||||
m_collection[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()].Add(translationOption);
|
||||
}
|
||||
|
||||
@ -684,7 +684,7 @@ TranslationOptionList &TranslationOptionCollection::GetTranslationOptionList(siz
|
||||
maxSize = std::min(maxSize, maxSizePhrase);
|
||||
|
||||
UTIL_THROW_IF2(maxSize >= m_collection[startPos].size(),
|
||||
"Out of bound access: " << maxSize);
|
||||
"Out of bound access: " << maxSize);
|
||||
|
||||
return m_collection[startPos][maxSize];
|
||||
}
|
||||
@ -695,7 +695,7 @@ const TranslationOptionList &TranslationOptionCollection::GetTranslationOptionLi
|
||||
maxSize = std::min(maxSize, maxSizePhrase);
|
||||
|
||||
UTIL_THROW_IF2(maxSize >= m_collection[startPos].size(),
|
||||
"Out of bound access: " << maxSize);
|
||||
"Out of bound access: " << maxSize);
|
||||
return m_collection[startPos][maxSize];
|
||||
}
|
||||
|
||||
|
@ -81,7 +81,7 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet
|
||||
const Phrase &prevPhrase = prevPath.GetPhrase();
|
||||
const ScorePair *prevInputScore = prevPath.GetInputScore();
|
||||
UTIL_THROW_IF2(prevInputScore == NULL,
|
||||
"No input score for path: " << prevPath);
|
||||
"No input score for path: " << prevPath);
|
||||
|
||||
// loop thru every word at this position
|
||||
const ConfusionNet::Column &col = input.GetColumn(endPos);
|
||||
|
@ -24,7 +24,7 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
|
||||
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
|
||||
{
|
||||
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
|
||||
"Not for models using the legqacy binary phrase table");
|
||||
"Not for models using the legqacy binary phrase table");
|
||||
|
||||
const InputFeature &inputFeature = InputFeature::Instance();
|
||||
UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified");
|
||||
@ -61,57 +61,57 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
|
||||
}
|
||||
|
||||
// iteratively extend all paths
|
||||
for (size_t endPos = 1; endPos < size; ++endPos) {
|
||||
const std::vector<size_t> &nextNodes = input.GetNextNodes(endPos);
|
||||
for (size_t endPos = 1; endPos < size; ++endPos) {
|
||||
const std::vector<size_t> &nextNodes = input.GetNextNodes(endPos);
|
||||
|
||||
// loop thru every previous paths
|
||||
size_t numPrevPaths = m_inputPathQueue.size();
|
||||
// loop thru every previous paths
|
||||
size_t numPrevPaths = m_inputPathQueue.size();
|
||||
|
||||
for (size_t i = 0; i < numPrevPaths; ++i) {
|
||||
//for (size_t pathInd = 0; pathInd < prevPaths.size(); ++pathInd) {
|
||||
const InputPath &prevPath = *m_inputPathQueue[i];
|
||||
for (size_t i = 0; i < numPrevPaths; ++i) {
|
||||
//for (size_t pathInd = 0; pathInd < prevPaths.size(); ++pathInd) {
|
||||
const InputPath &prevPath = *m_inputPathQueue[i];
|
||||
|
||||
size_t nextNode = prevPath.GetNextNode();
|
||||
if (prevPath.GetWordsRange().GetEndPos() + nextNode != endPos) {
|
||||
continue;
|
||||
}
|
||||
size_t nextNode = prevPath.GetNextNode();
|
||||
if (prevPath.GetWordsRange().GetEndPos() + nextNode != endPos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t startPos = prevPath.GetWordsRange().GetStartPos();
|
||||
size_t startPos = prevPath.GetWordsRange().GetStartPos();
|
||||
|
||||
if (endPos - startPos + 1 > maxPhraseLength) {
|
||||
continue;
|
||||
}
|
||||
if (endPos - startPos + 1 > maxPhraseLength) {
|
||||
continue;
|
||||
}
|
||||
|
||||
WordsRange range(startPos, endPos);
|
||||
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
|
||||
WordsRange range(startPos, endPos);
|
||||
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
|
||||
|
||||
const Phrase &prevPhrase = prevPath.GetPhrase();
|
||||
const ScorePair *prevInputScore = prevPath.GetInputScore();
|
||||
UTIL_THROW_IF2(prevInputScore == NULL,
|
||||
"Null previous score");
|
||||
const Phrase &prevPhrase = prevPath.GetPhrase();
|
||||
const ScorePair *prevInputScore = prevPath.GetInputScore();
|
||||
UTIL_THROW_IF2(prevInputScore == NULL,
|
||||
"Null previous score");
|
||||
|
||||
// loop thru every word at this position
|
||||
const ConfusionNet::Column &col = input.GetColumn(endPos);
|
||||
// loop thru every word at this position
|
||||
const ConfusionNet::Column &col = input.GetColumn(endPos);
|
||||
|
||||
for (size_t i = 0; i < col.size(); ++i) {
|
||||
const Word &word = col[i].first;
|
||||
Phrase subphrase(prevPhrase);
|
||||
subphrase.AddWord(word);
|
||||
for (size_t i = 0; i < col.size(); ++i) {
|
||||
const Word &word = col[i].first;
|
||||
Phrase subphrase(prevPhrase);
|
||||
subphrase.AddWord(word);
|
||||
|
||||
const ScorePair &scores = col[i].second;
|
||||
ScorePair *inputScore = new ScorePair(*prevInputScore);
|
||||
inputScore->PlusEquals(scores);
|
||||
const ScorePair &scores = col[i].second;
|
||||
ScorePair *inputScore = new ScorePair(*prevInputScore);
|
||||
inputScore->PlusEquals(scores);
|
||||
|
||||
InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
|
||||
InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
|
||||
|
||||
size_t nextNode = nextNodes[i];
|
||||
path->SetNextNode(nextNode);
|
||||
size_t nextNode = nextNodes[i];
|
||||
path->SetNextNode(nextNode);
|
||||
|
||||
m_inputPathQueue.push_back(path);
|
||||
} // for (size_t i = 0; i < col.size(); ++i) {
|
||||
m_inputPathQueue.push_back(path);
|
||||
} // for (size_t i = 0; i < col.size(); ++i) {
|
||||
|
||||
} // for (size_t i = 0; i < numPrevPaths; ++i) {
|
||||
}
|
||||
} // for (size_t i = 0; i < numPrevPaths; ++i) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -134,19 +134,18 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
|
||||
const WordsRange &range = path.GetWordsRange();
|
||||
|
||||
if (tpColl) {
|
||||
TargetPhraseCollection::const_iterator iter;
|
||||
for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
|
||||
const TargetPhrase &tp = **iter;
|
||||
TranslationOption *transOpt = new TranslationOption(range, tp);
|
||||
transOpt->SetInputPath(path);
|
||||
transOpt->Evaluate(m_source);
|
||||
TargetPhraseCollection::const_iterator iter;
|
||||
for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
|
||||
const TargetPhrase &tp = **iter;
|
||||
TranslationOption *transOpt = new TranslationOption(range, tp);
|
||||
transOpt->SetInputPath(path);
|
||||
transOpt->Evaluate(m_source);
|
||||
|
||||
Add(transOpt);
|
||||
}
|
||||
}
|
||||
else if (path.GetPhrase().GetSize() == 1) {
|
||||
// unknown word processing
|
||||
ProcessOneUnknownWord(path, path.GetWordsRange().GetEndPos(), 1, path.GetInputScore());
|
||||
Add(transOpt);
|
||||
}
|
||||
} else if (path.GetPhrase().GetSize() == 1) {
|
||||
// unknown word processing
|
||||
ProcessOneUnknownWord(path, path.GetWordsRange().GetEndPos(), 1, path.GetInputScore());
|
||||
}
|
||||
}
|
||||
|
||||
@ -165,16 +164,16 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
|
||||
|
||||
void TranslationOptionCollectionLattice::ProcessUnknownWord(size_t sourcePos)
|
||||
{
|
||||
UTIL_THROW(util::Exception, "ProcessUnknownWord() not implemented for lattice");
|
||||
UTIL_THROW(util::Exception, "ProcessUnknownWord() not implemented for lattice");
|
||||
}
|
||||
|
||||
void TranslationOptionCollectionLattice::CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
|
||||
, size_t startPosition
|
||||
, size_t endPosition
|
||||
, bool adhereTableLimit
|
||||
, size_t graphInd)
|
||||
, size_t startPosition
|
||||
, size_t endPosition
|
||||
, bool adhereTableLimit
|
||||
, size_t graphInd)
|
||||
{
|
||||
UTIL_THROW(util::Exception, "CreateTranslationOptionsForRange() not implemented for lattice");
|
||||
UTIL_THROW(util::Exception, "CreateTranslationOptionsForRange() not implemented for lattice");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -15,10 +15,10 @@ class WordLattice;
|
||||
class TranslationOptionCollectionLattice : public TranslationOptionCollection
|
||||
{
|
||||
protected:
|
||||
/* forcibly create translation option for a 1 word.
|
||||
* call the base class' ProcessOneUnknownWord() for each possible word in the confusion network
|
||||
* at a particular source position
|
||||
*/
|
||||
/* forcibly create translation option for a 1 word.
|
||||
* call the base class' ProcessOneUnknownWord() for each possible word in the confusion network
|
||||
* at a particular source position
|
||||
*/
|
||||
void ProcessUnknownWord(size_t sourcePos); // do not implement
|
||||
|
||||
public:
|
||||
@ -27,10 +27,10 @@ public:
|
||||
void CreateTranslationOptions();
|
||||
|
||||
void CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
|
||||
, size_t startPosition
|
||||
, size_t endPosition
|
||||
, bool adhereTableLimit
|
||||
, size_t graphInd); // do not implement
|
||||
, size_t startPosition
|
||||
, size_t endPosition
|
||||
, bool adhereTableLimit
|
||||
, size_t graphInd); // do not implement
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -54,7 +54,7 @@ public:
|
||||
}
|
||||
void Remove( size_t ind ) {
|
||||
UTIL_THROW_IF2(ind >= m_coll.size(),
|
||||
"Out of bound index " << ind);
|
||||
"Out of bound index " << ind);
|
||||
m_coll.erase( m_coll.begin()+ind );
|
||||
}
|
||||
void Add(TranslationOption *transOpt) {
|
||||
|
@ -184,7 +184,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
|
||||
Word *targetLHS = new Word(true);
|
||||
targetLHS->CreateFromString(Output, outputFactorOrder, targetLHSstr, true);
|
||||
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL,
|
||||
"Null factor left-hand-side");
|
||||
"Null factor left-hand-side");
|
||||
targetPhrase.SetTargetLHS(targetLHS);
|
||||
|
||||
// not tested
|
||||
@ -289,7 +289,7 @@ void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const Word &label
|
||||
, const std::vector<FactorType>& /* factorOrder */)
|
||||
{
|
||||
UTIL_THROW_IF2(!label.IsNonTerminal(),
|
||||
"Label must be a non-terminal");
|
||||
"Label must be a non-terminal");
|
||||
|
||||
SourceLabelOverlap overlapType = StaticData::Instance().GetSourceLabelOverlap();
|
||||
NonTerminalSet &list = GetLabelSet(startPos, endPos);
|
||||
@ -331,7 +331,7 @@ std::ostream& operator<<(std::ostream &out, const TreeInput &input)
|
||||
for (iter = labelSet.begin(); iter != labelSet.end(); ++iter) {
|
||||
const Word &word = *iter;
|
||||
UTIL_THROW_IF2(!word.IsNonTerminal(),
|
||||
"Word must be a non-terminal");
|
||||
"Word must be a non-terminal");
|
||||
out << "[" << startPos <<"," << endPos << "]="
|
||||
<< word << "(" << word.IsNonTerminal() << ") ";
|
||||
}
|
||||
|
@ -200,7 +200,7 @@ Phrase TrellisPath::GetSurfacePhrase() const
|
||||
FactorType factorType = outputFactor[i];
|
||||
const Factor *factor = targetPhrase.GetFactor(pos, factorType);
|
||||
UTIL_THROW_IF2(factor == NULL,
|
||||
"No factor " << factorType << " at position " << pos);
|
||||
"No factor " << factorType << " at position " << pos);
|
||||
newWord[factorType] = factor;
|
||||
}
|
||||
}
|
||||
|
@ -108,30 +108,30 @@ enum DistortionOrientationOptions {
|
||||
|
||||
enum PhraseTableImplementation {
|
||||
Memory = 0
|
||||
,Binary = 1
|
||||
,OnDisk = 2
|
||||
//,GlueRule = 3
|
||||
//,Joshua = 4
|
||||
//,MemorySourceLabel = 5
|
||||
,SCFG = 6
|
||||
//,BerkeleyDb = 7
|
||||
,SuffixArray = 8
|
||||
,Hiero = 9
|
||||
,ALSuffixArray = 10
|
||||
,FuzzyMatch = 11
|
||||
,Compact = 12
|
||||
,Interpolated = 13
|
||||
,DSuffixArray = 14
|
||||
,MemMappedSA = 15
|
||||
,DCacheBased = 32
|
||||
,Binary = 1
|
||||
,OnDisk = 2
|
||||
//,GlueRule = 3
|
||||
//,Joshua = 4
|
||||
//,MemorySourceLabel = 5
|
||||
,SCFG = 6
|
||||
//,BerkeleyDb = 7
|
||||
,SuffixArray = 8
|
||||
,Hiero = 9
|
||||
,ALSuffixArray = 10
|
||||
,FuzzyMatch = 11
|
||||
,Compact = 12
|
||||
,Interpolated = 13
|
||||
,DSuffixArray = 14
|
||||
,MemMappedSA = 15
|
||||
,DCacheBased = 32
|
||||
};
|
||||
|
||||
enum InputTypeEnum {
|
||||
SentenceInput = 0
|
||||
,ConfusionNetworkInput = 1
|
||||
,WordLatticeInput = 2
|
||||
,TreeInputType = 3
|
||||
,WordLatticeInput2 = 4
|
||||
,ConfusionNetworkInput = 1
|
||||
,WordLatticeInput = 2
|
||||
,TreeInputType = 3
|
||||
,WordLatticeInput2 = 4
|
||||
|
||||
};
|
||||
|
||||
@ -145,27 +145,27 @@ enum XmlInputType {
|
||||
|
||||
enum DictionaryFind {
|
||||
Best = 0
|
||||
,All = 1
|
||||
,All = 1
|
||||
};
|
||||
|
||||
enum SearchAlgorithm {
|
||||
Normal = 0
|
||||
,CubePruning = 1
|
||||
,CubeGrowing = 2
|
||||
,ChartDecoding= 3
|
||||
,NormalBatch = 4
|
||||
,ChartIncremental = 5
|
||||
,CubePruning = 1
|
||||
,CubeGrowing = 2
|
||||
,ChartDecoding= 3
|
||||
,NormalBatch = 4
|
||||
,ChartIncremental = 5
|
||||
};
|
||||
|
||||
enum SourceLabelOverlap {
|
||||
SourceLabelOverlapAdd = 0
|
||||
,SourceLabelOverlapReplace = 1
|
||||
,SourceLabelOverlapDiscard = 2
|
||||
,SourceLabelOverlapReplace = 1
|
||||
,SourceLabelOverlapDiscard = 2
|
||||
};
|
||||
|
||||
enum WordAlignmentSort {
|
||||
NoSort = 0
|
||||
,TargetOrder = 1
|
||||
,TargetOrder = 1
|
||||
};
|
||||
|
||||
enum FormatType {
|
||||
|
@ -316,7 +316,7 @@ inline float CalcTranslationScore(const std::vector<float> &probVector,
|
||||
const std::vector<float> &weightT)
|
||||
{
|
||||
UTIL_THROW_IF2(weightT.size() != probVector.size(),
|
||||
"Weight and score vector sizes not the same");
|
||||
"Weight and score vector sizes not the same");
|
||||
float rv=0.0;
|
||||
for(float const *sb=&probVector[0],*se=sb+probVector.size(),*wb=&weightT[0];
|
||||
sb!=se; ++sb, ++wb)
|
||||
|
@ -74,8 +74,8 @@ std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlan
|
||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||
bool firstPass = true;
|
||||
for (unsigned int i = 0 ; i < factorType.size() ; i++) {
|
||||
UTIL_THROW_IF2(factorType[i] >= MAX_NUM_FACTORS,
|
||||
"Trying to reference factor " << factorType[i] << ". Max factor is " << MAX_NUM_FACTORS);
|
||||
UTIL_THROW_IF2(factorType[i] >= MAX_NUM_FACTORS,
|
||||
"Trying to reference factor " << factorType[i] << ". Max factor is " << MAX_NUM_FACTORS);
|
||||
|
||||
const Factor *factor = m_factorArray[factorType[i]];
|
||||
if (factor != NULL) {
|
||||
@ -143,10 +143,10 @@ void Word::OnlyTheseFactors(const FactorMask &factors)
|
||||
|
||||
bool Word::IsEpsilon() const
|
||||
{
|
||||
const Factor *factor = m_factorArray[0];
|
||||
int compare = factor->GetString().compare(EPSILON);
|
||||
const Factor *factor = m_factorArray[0];
|
||||
int compare = factor->GetString().compare(EPSILON);
|
||||
|
||||
return compare == 0;
|
||||
return compare == 0;
|
||||
}
|
||||
|
||||
TO_STRING_BODY(Word);
|
||||
|
@ -13,7 +13,7 @@ namespace Moses
|
||||
WordLattice::WordLattice()
|
||||
{
|
||||
UTIL_THROW_IF2(&InputFeature::Instance() == NULL,
|
||||
"Input feature must be specified");
|
||||
"Input feature must be specified");
|
||||
}
|
||||
|
||||
size_t WordLattice::GetColumnIncrement(size_t i, size_t j) const
|
||||
@ -216,9 +216,8 @@ WordLattice::CreateTranslationOptionCollection() const
|
||||
|
||||
if (StaticData::Instance().GetUseLegacyPT()) {
|
||||
rv = new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
|
||||
}
|
||||
else {
|
||||
rv = new TranslationOptionCollectionLattice(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
|
||||
} else {
|
||||
rv = new TranslationOptionCollectionLattice(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
|
||||
}
|
||||
|
||||
assert(rv);
|
||||
|
@ -222,7 +222,7 @@ public:
|
||||
|
||||
//! converts bitmap into an integer ID, with an additional span covered
|
||||
WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
|
||||
assert(m_size < (1<<16));
|
||||
assert(m_size < (1<<16));
|
||||
|
||||
size_t start = GetFirstGapPos();
|
||||
if (start == NOT_FOUND) start = m_size; // nothing left
|
||||
|
Loading…
Reference in New Issue
Block a user