mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
score-stsg: add --TreeScore option
This commit is contained in:
parent
b7650b2d8b
commit
7cc75a0fa1
@ -20,7 +20,7 @@ public:
|
||||
, negLogProb(false)
|
||||
, noLex(false)
|
||||
, noWordAlignment(false)
|
||||
, pcfg(false) {}
|
||||
, treeScore(false) {}
|
||||
|
||||
// Positional options
|
||||
std::string extractFile;
|
||||
@ -36,7 +36,7 @@ public:
|
||||
bool negLogProb;
|
||||
bool noLex;
|
||||
bool noWordAlignment;
|
||||
bool pcfg;
|
||||
bool treeScore;
|
||||
};
|
||||
|
||||
} // namespace ScoreStsg
|
||||
|
@ -15,7 +15,8 @@ void RuleGroup::SetNewSource(const StringPiece &source)
|
||||
}
|
||||
|
||||
void RuleGroup::AddRule(const StringPiece &target, const StringPiece &ntAlign,
|
||||
const StringPiece &fullAlign, int count)
|
||||
const StringPiece &fullAlign, int count,
|
||||
double treeScore)
|
||||
{
|
||||
if (m_distinctRules.empty() ||
|
||||
ntAlign != m_distinctRules.back().ntAlign ||
|
||||
@ -27,6 +28,7 @@ void RuleGroup::AddRule(const StringPiece &target, const StringPiece &ntAlign,
|
||||
fullAlign.CopyToString(&r.alignments.back().first);
|
||||
r.alignments.back().second = count;
|
||||
r.count = count;
|
||||
r.treeScore = treeScore;
|
||||
m_distinctRules.push_back(r);
|
||||
} else {
|
||||
DistinctRule &r = m_distinctRules.back();
|
||||
|
@ -25,13 +25,14 @@ class RuleGroup
|
||||
{
|
||||
public:
|
||||
// Stores the target-side and NT-alignment of a distinct rule. Also records
|
||||
// the rule's count and the observed symbol alignments (plus their
|
||||
// frequencies).
|
||||
// the rule's count, the observed symbol alignments (plus their frequencies),
|
||||
// and the tree score.
|
||||
struct DistinctRule {
|
||||
std::string target;
|
||||
std::string ntAlign;
|
||||
std::vector<std::pair<std::string, int> > alignments;
|
||||
int count;
|
||||
double treeScore;
|
||||
};
|
||||
|
||||
typedef std::vector<DistinctRule>::const_iterator ConstIterator;
|
||||
@ -58,7 +59,7 @@ public:
|
||||
// values will be checked against those of the previous rule only (in other
|
||||
// words, the input is assumed to be ordered).
|
||||
void AddRule(const StringPiece &target, const StringPiece &ntAlign,
|
||||
const StringPiece &fullAlign, int count);
|
||||
const StringPiece &fullAlign, int count, double treeScore);
|
||||
|
||||
private:
|
||||
std::string m_source;
|
||||
|
@ -28,8 +28,8 @@ namespace ScoreStsg
|
||||
void RuleTableWriter::WriteLine(const TokenizedRuleHalf &source,
|
||||
const TokenizedRuleHalf &target,
|
||||
const std::string &bestAlignment,
|
||||
double lexScore, int count, int totalCount,
|
||||
int distinctCount)
|
||||
double lexScore, double treeScore, int count,
|
||||
int totalCount, int distinctCount)
|
||||
{
|
||||
if (m_options.inverse) {
|
||||
WriteRuleHalf(target);
|
||||
@ -47,7 +47,9 @@ void RuleTableWriter::WriteLine(const TokenizedRuleHalf &source,
|
||||
m_out << MaybeLog(lexScore);
|
||||
}
|
||||
|
||||
// TODO PCFG
|
||||
if (m_options.treeScore && !m_options.inverse) {
|
||||
m_out << " " << MaybeLog(treeScore);
|
||||
}
|
||||
|
||||
m_out << " ||| " << totalCount << " " << count;
|
||||
if (m_options.kneserNey) {
|
||||
|
@ -23,7 +23,7 @@ public:
|
||||
, m_out(out) {}
|
||||
|
||||
void WriteLine(const TokenizedRuleHalf &, const TokenizedRuleHalf &,
|
||||
const std::string &, double, int, int, int);
|
||||
const std::string &, double, double, int, int, int);
|
||||
|
||||
private:
|
||||
double MaybeLog(double a) const {
|
||||
|
@ -83,6 +83,12 @@ int ScoreStsg::Main(int argc, char *argv[])
|
||||
StringPiece fullAlign = *it++;
|
||||
it->CopyToString(&tmp);
|
||||
int count = std::atoi(tmp.c_str());
|
||||
double treeScore = 0.0f;
|
||||
if (m_options.treeScore && !m_options.inverse) {
|
||||
++it;
|
||||
it->CopyToString(&tmp);
|
||||
treeScore = std::atof(tmp.c_str());
|
||||
}
|
||||
|
||||
// If this is the first line or if source has changed since the last
|
||||
// line then process the current rule group and start a new one.
|
||||
@ -95,7 +101,7 @@ int ScoreStsg::Main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
// Add the rule to the current rule group.
|
||||
ruleGroup.AddRule(target, ntAlign, fullAlign, count);
|
||||
ruleGroup.AddRule(target, ntAlign, fullAlign, count, treeScore);
|
||||
}
|
||||
|
||||
// Process the final rule group.
|
||||
@ -223,11 +229,9 @@ void ScoreStsg::ProcessRuleGroup(const RuleGroup &group,
|
||||
double lexProb = ComputeLexProb(m_sourceHalf.frontierSymbols,
|
||||
m_targetHalf.frontierSymbols, m_tgtToSrc);
|
||||
|
||||
// TODO PCFG score
|
||||
|
||||
// Write a line to the rule table.
|
||||
writer.WriteLine(m_sourceHalf, m_targetHalf, bestAlignment, lexProb,
|
||||
p->count, totalCount, distinctCount);
|
||||
rule.treeScore, p->count, totalCount, distinctCount);
|
||||
}
|
||||
}
|
||||
|
||||
@ -348,7 +352,9 @@ void ScoreStsg::ProcessOptions(int argc, char *argv[], Options &options) const
|
||||
("NoWordAlignment",
|
||||
"do not output word alignments")
|
||||
("PCFG",
|
||||
"include pre-computed PCFG score from extract")
|
||||
"synonym for TreeScore (included for compatibility with score)")
|
||||
("TreeScore",
|
||||
"include pre-computed tree score from extract")
|
||||
("UnpairedExtractFormat",
|
||||
"ignored (included for compatibility with score)")
|
||||
;
|
||||
@ -429,8 +435,8 @@ void ScoreStsg::ProcessOptions(int argc, char *argv[], Options &options) const
|
||||
if (vm.count("NoWordAlignment")) {
|
||||
options.noWordAlignment = true;
|
||||
}
|
||||
if (vm.count("PCFG")) {
|
||||
options.pcfg = true;
|
||||
if (vm.count("TreeScore") || vm.count("PCFG")) {
|
||||
options.treeScore = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user