xml constraint

This commit is contained in:
phikoehn 2013-08-15 11:46:45 +01:00
parent abfe02f154
commit b368085609
9 changed files with 75 additions and 18 deletions

View File

@ -122,12 +122,12 @@ project : requirements
; ;
#Add directories here if you want their incidental targets too (i.e. tests). #Add directories here if you want their incidental targets too (i.e. tests).
build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses-chart-cmd mira scripts regression-testing ; build-projects lm util phrase-extract search moses moses/LM mert moses-cmd socket-moses-cmd moses-chart-cmd mira scripts regression-testing ;
alias programs : lm//programs moses-chart-cmd//moses_chart moses-cmd//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs misc//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor mira//mira contrib/server//mosesserver ; alias programs : lm//programs moses-chart-cmd//moses_chart moses-cmd//programs socket-moses-cmd//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs misc//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor mira//mira contrib/server//mosesserver ;
install-bin-libs programs ; install-bin-libs programs ;
install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ; install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd socket-moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
install-headers headers-moses : moses//headers-to-install : moses ; install-headers headers-moses : moses//headers-to-install : moses ;
alias install : prefix-bin prefix-lib headers-base headers-moses ; alias install : prefix-bin prefix-lib headers-base headers-moses ;

View File

@ -77,7 +77,7 @@ Parameter::Parameter()
AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables"); AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
AddParam("distortion", "configurations for each factorized/lexicalized reordering model."); AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");
AddParam("early-distortion-cost", "edc", "include estimate of distortion cost yet to be incurred in the score [Moore & Quirk 2007]. Default is no"); AddParam("early-distortion-cost", "edc", "include estimate of distortion cost yet to be incurred in the score [Moore & Quirk 2007]. Default is no");
AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'"); AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'constraint', 'ignore'");
AddParam("xml-brackets", "xb", "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode" ); AddParam("xml-brackets", "xb", "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode" );
AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation"); AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
AddParam("lminimum-bayes-risk", "lmbr", "use lattice miminum Bayes risk to determine best translation"); AddParam("lminimum-bayes-risk", "lmbr", "use lattice miminum Bayes risk to determine best translation");

View File

@ -487,10 +487,11 @@ bool StaticData::LoadData(Parameter *parameter)
if (m_parameter->GetParam("xml-input").size() == 0) m_xmlInputType = XmlPassThrough; if (m_parameter->GetParam("xml-input").size() == 0) m_xmlInputType = XmlPassThrough;
else if (m_parameter->GetParam("xml-input")[0]=="exclusive") m_xmlInputType = XmlExclusive; else if (m_parameter->GetParam("xml-input")[0]=="exclusive") m_xmlInputType = XmlExclusive;
else if (m_parameter->GetParam("xml-input")[0]=="inclusive") m_xmlInputType = XmlInclusive; else if (m_parameter->GetParam("xml-input")[0]=="inclusive") m_xmlInputType = XmlInclusive;
else if (m_parameter->GetParam("xml-input")[0]=="constraint") m_xmlInputType = XmlConstraint;
else if (m_parameter->GetParam("xml-input")[0]=="ignore") m_xmlInputType = XmlIgnore; else if (m_parameter->GetParam("xml-input")[0]=="ignore") m_xmlInputType = XmlIgnore;
else if (m_parameter->GetParam("xml-input")[0]=="pass-through") m_xmlInputType = XmlPassThrough; else if (m_parameter->GetParam("xml-input")[0]=="pass-through") m_xmlInputType = XmlPassThrough;
else { else {
UserMessage::Add("invalid xml-input value, must be pass-through, exclusive, inclusive, or ignore"); UserMessage::Add("invalid xml-input value, must be pass-through, exclusive, inclusive, constraint, or ignore");
return false; return false;
} }

View File

@ -569,7 +569,6 @@ void TranslationOptionCollection::Sort()
* called by CreateTranslationOptionsForRange() * called by CreateTranslationOptionsForRange()
* \param startPos first position in input sentence * \param startPos first position in input sentence
* \param lastPos last position in input sentence * \param lastPos last position in input sentence
* \param adhereTableLimit whether phrase & generation table limits are adhered to
*/ */
bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t) const bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t) const
{ {
@ -577,6 +576,18 @@ bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t)
//not implemented for base class //not implemented for base class
} }
/** Check if an option conflicts with any constraint XML options. Okay, if XML option is substring in source and target.
* by default, we don't support XML options. subclasses need to override this function.
* called by CreateTranslationOptionsForRange()
* \param startPos first position in input sentence
* \param lastPos last position in input sentence
*/
bool TranslationOptionCollection::ViolatesXmlOptionsConstraint(size_t, size_t, TranslationOption *) const
{
return false;
//not implemented for base class
}
/** Populates the current Collection with XML options exactly covering the range specified. Default implementation does nothing. /** Populates the current Collection with XML options exactly covering the range specified. Default implementation does nothing.
* called by CreateTranslationOptionsForRange() * called by CreateTranslationOptionsForRange()
* \param startPos first position in input sentence * \param startPos first position in input sentence
@ -588,9 +599,7 @@ void TranslationOptionCollection::CreateXmlOptionsForRange(size_t, size_t)
}; };
/** Add translation option to the list
/** add translation option to the list
* \param translationOption translation option to be added */ * \param translationOption translation option to be added */
void TranslationOptionCollection::Add(TranslationOption *translationOption) void TranslationOptionCollection::Add(TranslationOption *translationOption)
{ {

View File

@ -140,6 +140,9 @@ public:
//!Check if this range has XML options //!Check if this range has XML options
virtual bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const; virtual bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
//! Check if a subsumed XML option constraint is satisfied
virtual bool ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const;
//! Create xml-based translation options for the specific input span //! Create xml-based translation options for the specific input span
virtual void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition); virtual void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);

View File

@ -79,6 +79,51 @@ bool TranslationOptionCollectionText::HasXmlOptionsOverlappingRange(size_t start
return source.XmlOverlap(startPosition,endPosition); return source.XmlOverlap(startPosition,endPosition);
} }
/**
* Check if the given translation option violates a specified xml Option
*/
bool TranslationOptionCollectionText::ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const
{
// skip if there is no overlap
Sentence const& source=dynamic_cast<Sentence const&>(m_source);
if (!source.XmlOverlap(startPosition,endPosition)) {
return false;
}
// check for all sub spans
for(size_t start=startPosition; start<=endPosition; start++) {
for(size_t end=start; end<=endPosition; end++) {
// get list of xml options for the subspan
vector <TranslationOption*> xmlOptions;
source.GetXmlTranslationOptions(xmlOptions,start,end);
if (xmlOptions.size() > 0) {
// check if any xml option matches
for(size_t i=0; i<xmlOptions.size(); i++) {
const TargetPhrase &phrase = transOpt->GetTargetPhrase();
const TargetPhrase &xmlPhrase = xmlOptions[i]->GetTargetPhrase();
// check this xml option (if shorter)
if (phrase.GetSize() >= xmlPhrase.GetSize()) {
// match may start in middle of phrase
for(size_t offset=0; offset <= phrase.GetSize()-xmlPhrase.GetSize(); offset++) {
bool match = true;
// match every word (only surface factor)
for(size_t wordPos=0; match && wordPos < xmlPhrase.GetSize(); wordPos++) {
if (phrase.GetFactor( wordPos+offset,0 )->Compare(*(xmlPhrase.GetFactor( wordPos,0 )))) {
match = false;
}
}
if (match) {
return false; // no violation if matching xml option found
}
}
}
}
return true; // there were xml options for this range, but none matched
}
}
}
return false;
}
/** /**
* Create xml-based translation options for the specific input span * Create xml-based translation options for the specific input span
*/ */
@ -140,5 +185,3 @@ void TranslationOptionCollectionText::CreateTranslationOptionsForRange(
} }

View File

@ -51,7 +51,7 @@ public:
TranslationOptionCollectionText(Sentence const& input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold); TranslationOptionCollectionText(Sentence const& input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const; bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
bool ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const;
void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition); void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
void CreateTranslationOptions(); void CreateTranslationOptions();

View File

@ -134,10 +134,11 @@ enum InputTypeEnum {
}; };
enum XmlInputType { enum XmlInputType {
XmlPassThrough = 0, XmlPassThrough = 0,
XmlIgnore = 1, XmlIgnore = 1,
XmlExclusive = 2, XmlExclusive = 2,
XmlInclusive = 3 XmlInclusive = 3,
XmlConstraint = 4
}; };
enum DictionaryFind { enum DictionaryFind {

View File

@ -515,8 +515,8 @@ build-osm
in: corpus word-alignment in: corpus word-alignment
out: osm-model out: osm-model
ignore-unless: operation-sequence-model ignore-unless: operation-sequence-model
rerun-on-change: operation-sequence-model training-options script giza-settings rerun-on-change: operation-sequence-model training-options script giza-settings operation-sequence-model-settings
template: $moses-script-dir/OSM/OSM-Train.sh IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir $srilm-dir template: $moses-script-dir/OSM/OSM-Train.perl --corpus-f IN0.$input-extension --corpus-e IN0.$output-extension --alignment IN1.$alignment-symmetrization-method --order $operation-sequence-model-order --out-dir OUT --moses-src-dir $moses-src-dir --srilm-dir $srilm-dir $operation-sequence-model-settings
default-name: model/OSM default-name: model/OSM
extract-phrases extract-phrases
in: corpus-mml-postfilter=OR=word-alignment scored-corpus in: corpus-mml-postfilter=OR=word-alignment scored-corpus