mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
fix word string for non-terminals, fix sparse feature weight for case of more than 1 DLM feature, fix factors for start and end symbol
This commit is contained in:
parent
24887b40f0
commit
eaf940d5c1
@ -64,7 +64,7 @@ void ScoreComponentCollection::MultiplyEquals(float scalar)
|
||||
// Multiply all weights of this sparse producer by a given scalar
|
||||
void ScoreComponentCollection::MultiplyEquals(const ScoreProducer* sp, float scalar) {
|
||||
assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
|
||||
std::string prefix = sp->GetScoreProducerWeightShortName() + FName::SEP;
|
||||
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
|
||||
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
|
||||
std::stringstream name;
|
||||
name << i->first;
|
||||
|
@ -60,11 +60,6 @@ string TargetNgramFeature::GetScoreProducerWeightShortName(unsigned) const
|
||||
return "dlm";
|
||||
}
|
||||
|
||||
string TargetNgramFeature::GetShortNameWithSEP() const
|
||||
{
|
||||
return "dlm_";
|
||||
}
|
||||
|
||||
size_t TargetNgramFeature::GetNumInputScores() const
|
||||
{
|
||||
return 0;
|
||||
@ -187,8 +182,8 @@ void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream
|
||||
|
||||
FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
vector<const Word*> contextFactor;
|
||||
contextFactor.reserve(m_n);
|
||||
vector<const Word*> contextFactor;
|
||||
contextFactor.reserve(m_n);
|
||||
|
||||
// get index map for underlying hypotheses
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
@ -219,18 +214,21 @@ FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int
|
||||
if (phrasePos==cur_hypo.GetCurrTargetPhrase().GetSize()-1 || prev_is_NT)
|
||||
makeSuffix = true;
|
||||
|
||||
// beginning of sentence symbol <s>?
|
||||
string w = word.GetString(m_factorType);
|
||||
if (w.compare("<s>") == 0)
|
||||
// beginning/end of sentence symbol <s>,</s>?
|
||||
string factorZero = word.GetString(0);
|
||||
if (factorZero.compare("<s>") == 0)
|
||||
prefixTerminals++;
|
||||
// end of sentence symbol </s>?
|
||||
else if (w.compare("</s>") == 0)
|
||||
else if (factorZero.compare("</s>") == 0)
|
||||
suffixTerminals++;
|
||||
// everything else
|
||||
else {
|
||||
stringstream ngram;
|
||||
ngram << GetShortNameWithSEP();
|
||||
ngram << word.GetString(m_factorType);
|
||||
ngram << m_baseName;
|
||||
if (m_factorType == 0)
|
||||
ngram << factorZero;
|
||||
else
|
||||
ngram << word.GetString(m_factorType);
|
||||
accumulator->SparsePlusEquals(ngram.str(), 1);
|
||||
|
||||
if (collectForPrefix)
|
||||
@ -346,7 +344,7 @@ FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int
|
||||
|
||||
// remove duplicates
|
||||
stringstream curr_ngram;
|
||||
curr_ngram << GetShortNameWithSEP();
|
||||
curr_ngram << m_baseName;
|
||||
curr_ngram << (*contextFactor[m_n-2]).GetString(m_factorType);
|
||||
curr_ngram << ":";
|
||||
curr_ngram << (*contextFactor[m_n-1]).GetString(m_factorType);
|
||||
@ -373,7 +371,7 @@ FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int
|
||||
size_t size = contextFactor.size();
|
||||
if (makePrefix && makeSuffix && (size <= m_n)) {
|
||||
stringstream curr_ngram;
|
||||
curr_ngram << GetShortNameWithSEP();
|
||||
curr_ngram << m_baseName;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
curr_ngram << (*contextFactor[i]).GetString(m_factorType);
|
||||
if (i < size-1)
|
||||
@ -394,11 +392,15 @@ void TargetNgramFeature::MakePrefixNgrams(std::vector<const Word*> &contextFacto
|
||||
for (size_t k = 0; k < numberOfStartPos; ++k) {
|
||||
size_t max_end = (size < m_n+k+offset)? size: m_n+k+offset;
|
||||
for (size_t end_pos = 1+k+offset; end_pos < max_end; ++end_pos) {
|
||||
ngram << GetShortNameWithSEP();
|
||||
ngram << m_baseName;
|
||||
for (size_t i=k+offset; i <= end_pos; ++i) {
|
||||
if (i > k+offset)
|
||||
ngram << ":";
|
||||
ngram << (*contextFactor[i]).GetString(m_factorType);
|
||||
string factorZero = (*contextFactor[i]).GetString(0);
|
||||
if (m_factorType == 0 || factorZero.compare("<s>") == 0 || factorZero.compare("</s>") == 0)
|
||||
ngram << factorZero;
|
||||
else
|
||||
ngram << (*contextFactor[i]).GetString(m_factorType);
|
||||
const Word w = *contextFactor[i];
|
||||
}
|
||||
// cerr << "p-ngram: " << ngram.str() << endl;
|
||||
@ -413,12 +415,16 @@ void TargetNgramFeature::MakeSuffixNgrams(std::vector<const Word*> &contextFacto
|
||||
for (size_t k = 0; k < numberOfEndPos; ++k) {
|
||||
size_t end_pos = contextFactor.size()-1-k-offset;
|
||||
for (int start_pos=end_pos-1; (start_pos >= 0) && (end_pos-start_pos < m_n); --start_pos) {
|
||||
ngram << GetShortNameWithSEP();
|
||||
ngram << m_baseName;
|
||||
for (size_t j=start_pos; j <= end_pos; ++j){
|
||||
ngram << (*contextFactor[j]).GetString(m_factorType);
|
||||
if (j < end_pos)
|
||||
string factorZero = (*contextFactor[j]).GetString(0);
|
||||
if (m_factorType == 0 || factorZero.compare("<s>") == 0 || factorZero.compare("</s>") == 0)
|
||||
ngram << factorZero;
|
||||
else
|
||||
ngram << (*contextFactor[j]).GetString(m_factorType);
|
||||
if (j < end_pos)
|
||||
ngram << ":";
|
||||
}
|
||||
}
|
||||
// cerr << "s-ngram: " << ngram.str() << endl;
|
||||
accumulator->SparsePlusEquals(ngram.str(), 1);
|
||||
ngram.str("");
|
||||
|
@ -190,12 +190,13 @@ public:
|
||||
FactorCollection& factorCollection = FactorCollection::Instance();
|
||||
const Factor* bosFactor = factorCollection.AddFactor(Output,m_factorType,BOS_);
|
||||
m_bos.SetFactor(m_factorType,bosFactor);
|
||||
m_baseName = GetScoreProducerDescription();
|
||||
m_baseName.append("_");
|
||||
}
|
||||
|
||||
bool Load(const std::string &filePath);
|
||||
|
||||
std::string GetScoreProducerWeightShortName(unsigned) const;
|
||||
std::string GetShortNameWithSEP() const;
|
||||
size_t GetNumInputScores() const;
|
||||
|
||||
void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
|
||||
@ -219,6 +220,8 @@ private:
|
||||
// additional weight that all sparse weights are scaled with
|
||||
float m_sparseProducerWeight;
|
||||
|
||||
std::string m_baseName;
|
||||
|
||||
void appendNgram(const Word& word, bool& skip, std::stringstream& ngram) const;
|
||||
void MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
|
||||
size_t numberOfStartPos = 1, size_t offset = 0) const;
|
||||
|
@ -103,7 +103,8 @@ void Word::CreateFromString(FactorDirection direction
|
||||
|
||||
vector<string> wordVec;
|
||||
Tokenize(wordVec, str, "|");
|
||||
assert(wordVec.size() == factorOrder.size());
|
||||
if (!isNonTerminal)
|
||||
assert(wordVec.size() == factorOrder.size());
|
||||
|
||||
const Factor *factor;
|
||||
for (size_t ind = 0; ind < wordVec.size(); ++ind) {
|
||||
|
Loading…
Reference in New Issue
Block a user