different max-spans depending on whether source non-term is X or syntactic

This commit is contained in:
Hieu Hoang 2014-01-27 17:53:53 +00:00
parent cb2805151b
commit b34b4b978f
3 changed files with 37 additions and 6 deletions

View File

@ -81,6 +81,8 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
ChartParserCallback &outColl)
{
const StaticData &staticData = StaticData::Instance();
const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal();
size_t relEndPos = range.GetEndPos() - range.GetStartPos();
size_t absEndPos = range.GetEndPos();
@ -136,8 +138,6 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
stackInd = relEndPos + 1;
}
// size_t nonTermNumWordsCovered = endPos - startPos + 1;
// get target nonterminals in this span from chart
const ChartCellLabelSet &chartNonTermSet =
GetTargetLabelSet(startPos, endPos);
@ -170,11 +170,21 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
for (iterChartNonTerm = chartNonTermSet.begin(); iterChartNonTerm != chartNonTermSet.end(); ++iterChartNonTerm) {
const ChartCellLabel &cellLabel = iterChartNonTerm->second;
//cerr << sourceLHS << " " << defaultSourceNonTerm << " " << chartNonTerm << " " << defaultTargetNonTerm << endl;
bool doSearch = true;
if (m_dictionary.m_maxSpanDefault != NOT_FOUND) {
// for Hieu's source syntax
const Word &targetLHS = cellLabel.GetLabel();
//bool isSyntaxNonTerm = (sourceLHS != defaultSourceNonTerm) || (chartNonTerm != defaultTargetNonTerm);
bool doSearch = true; //isSyntaxNonTerm ? nonTermNumWordsCovered <= maxSyntaxSpan :
// nonTermNumWordsCovered <= maxDefaultSpan;
bool isSourceSyntaxNonTerm = sourceLHS != defaultSourceNonTerm;
size_t nonTermNumWordsCovered = endPos - startPos + 1;
doSearch = isSourceSyntaxNonTerm ?
nonTermNumWordsCovered <= m_dictionary.m_maxSpanLabelled :
nonTermNumWordsCovered <= m_dictionary.m_maxSpanDefault;
//cerr << "sourceLHS=" << sourceLHS << " targetLHS=" << targetLHS
// << "doSearch=" << doSearch << endl;
}
if (doSearch) {

View File

@ -36,6 +36,8 @@ namespace Moses
{
PhraseDictionaryOnDisk::PhraseDictionaryOnDisk(const std::string &line)
: MyBase(line)
, m_maxSpanDefault(NOT_FOUND)
, m_maxSpanLabelled(NOT_FOUND)
{
ReadParameters();
}
@ -202,5 +204,19 @@ const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionN
return targetPhrases;
}
void PhraseDictionaryOnDisk::SetParameter(const std::string& key, const std::string& value)
{
if (key == "max-span-default") {
m_maxSpanDefault = Scan<size_t>(value);
}
else if (key == "max-span-labelled") {
m_maxSpanLabelled = Scan<size_t>(value);
}
else {
PhraseDictionary::SetParameter(key, value);
}
}
} // namespace

View File

@ -48,6 +48,7 @@ class PhraseDictionaryOnDisk : public PhraseDictionary
{
typedef PhraseDictionary MyBase;
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryOnDisk&);
friend class ChartRuleLookupManagerOnDisk;
protected:
#ifdef WITH_THREADS
@ -56,6 +57,8 @@ protected:
boost::scoped_ptr<OnDiskPt::OnDiskWrapper> m_implementation;
#endif
size_t m_maxSpanDefault, m_maxSpanLabelled;
OnDiskPt::OnDiskWrapper &GetImplementation();
const OnDiskPt::OnDiskWrapper &GetImplementation() const;
@ -81,6 +84,8 @@ public:
const TargetPhraseCollection *GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const;
const TargetPhraseCollection *GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const;
void SetParameter(const std::string& key, const std::string& value);
};
} // namespace Moses