mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 12:52:29 +03:00
xml constraint
This commit is contained in:
parent
abfe02f154
commit
b368085609
6
Jamroot
6
Jamroot
@ -122,12 +122,12 @@ project : requirements
|
|||||||
;
|
;
|
||||||
|
|
||||||
#Add directories here if you want their incidental targets too (i.e. tests).
|
#Add directories here if you want their incidental targets too (i.e. tests).
|
||||||
build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses-chart-cmd mira scripts regression-testing ;
|
build-projects lm util phrase-extract search moses moses/LM mert moses-cmd socket-moses-cmd moses-chart-cmd mira scripts regression-testing ;
|
||||||
|
|
||||||
alias programs : lm//programs moses-chart-cmd//moses_chart moses-cmd//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs misc//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor mira//mira contrib/server//mosesserver ;
|
alias programs : lm//programs moses-chart-cmd//moses_chart moses-cmd//programs socket-moses-cmd//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs misc//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor mira//mira contrib/server//mosesserver ;
|
||||||
|
|
||||||
install-bin-libs programs ;
|
install-bin-libs programs ;
|
||||||
install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
|
install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd socket-moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
|
||||||
install-headers headers-moses : moses//headers-to-install : moses ;
|
install-headers headers-moses : moses//headers-to-install : moses ;
|
||||||
|
|
||||||
alias install : prefix-bin prefix-lib headers-base headers-moses ;
|
alias install : prefix-bin prefix-lib headers-base headers-moses ;
|
||||||
|
@ -77,7 +77,7 @@ Parameter::Parameter()
|
|||||||
AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
|
AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
|
||||||
AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");
|
AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");
|
||||||
AddParam("early-distortion-cost", "edc", "include estimate of distortion cost yet to be incurred in the score [Moore & Quirk 2007]. Default is no");
|
AddParam("early-distortion-cost", "edc", "include estimate of distortion cost yet to be incurred in the score [Moore & Quirk 2007]. Default is no");
|
||||||
AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'");
|
AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'constraint', 'ignore'");
|
||||||
AddParam("xml-brackets", "xb", "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode" );
|
AddParam("xml-brackets", "xb", "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode" );
|
||||||
AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
|
AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
|
||||||
AddParam("lminimum-bayes-risk", "lmbr", "use lattice miminum Bayes risk to determine best translation");
|
AddParam("lminimum-bayes-risk", "lmbr", "use lattice miminum Bayes risk to determine best translation");
|
||||||
|
@ -487,10 +487,11 @@ bool StaticData::LoadData(Parameter *parameter)
|
|||||||
if (m_parameter->GetParam("xml-input").size() == 0) m_xmlInputType = XmlPassThrough;
|
if (m_parameter->GetParam("xml-input").size() == 0) m_xmlInputType = XmlPassThrough;
|
||||||
else if (m_parameter->GetParam("xml-input")[0]=="exclusive") m_xmlInputType = XmlExclusive;
|
else if (m_parameter->GetParam("xml-input")[0]=="exclusive") m_xmlInputType = XmlExclusive;
|
||||||
else if (m_parameter->GetParam("xml-input")[0]=="inclusive") m_xmlInputType = XmlInclusive;
|
else if (m_parameter->GetParam("xml-input")[0]=="inclusive") m_xmlInputType = XmlInclusive;
|
||||||
|
else if (m_parameter->GetParam("xml-input")[0]=="constraint") m_xmlInputType = XmlConstraint;
|
||||||
else if (m_parameter->GetParam("xml-input")[0]=="ignore") m_xmlInputType = XmlIgnore;
|
else if (m_parameter->GetParam("xml-input")[0]=="ignore") m_xmlInputType = XmlIgnore;
|
||||||
else if (m_parameter->GetParam("xml-input")[0]=="pass-through") m_xmlInputType = XmlPassThrough;
|
else if (m_parameter->GetParam("xml-input")[0]=="pass-through") m_xmlInputType = XmlPassThrough;
|
||||||
else {
|
else {
|
||||||
UserMessage::Add("invalid xml-input value, must be pass-through, exclusive, inclusive, or ignore");
|
UserMessage::Add("invalid xml-input value, must be pass-through, exclusive, inclusive, constraint, or ignore");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -569,7 +569,6 @@ void TranslationOptionCollection::Sort()
|
|||||||
* called by CreateTranslationOptionsForRange()
|
* called by CreateTranslationOptionsForRange()
|
||||||
* \param startPos first position in input sentence
|
* \param startPos first position in input sentence
|
||||||
* \param lastPos last position in input sentence
|
* \param lastPos last position in input sentence
|
||||||
* \param adhereTableLimit whether phrase & generation table limits are adhered to
|
|
||||||
*/
|
*/
|
||||||
bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t) const
|
bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t) const
|
||||||
{
|
{
|
||||||
@ -577,6 +576,18 @@ bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t)
|
|||||||
//not implemented for base class
|
//not implemented for base class
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Check if an option conflicts with any constraint XML options. Okay, if XML option is substring in source and target.
|
||||||
|
* by default, we don't support XML options. subclasses need to override this function.
|
||||||
|
* called by CreateTranslationOptionsForRange()
|
||||||
|
* \param startPos first position in input sentence
|
||||||
|
* \param lastPos last position in input sentence
|
||||||
|
*/
|
||||||
|
bool TranslationOptionCollection::ViolatesXmlOptionsConstraint(size_t, size_t, TranslationOption *) const
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
//not implemented for base class
|
||||||
|
}
|
||||||
|
|
||||||
/** Populates the current Collection with XML options exactly covering the range specified. Default implementation does nothing.
|
/** Populates the current Collection with XML options exactly covering the range specified. Default implementation does nothing.
|
||||||
* called by CreateTranslationOptionsForRange()
|
* called by CreateTranslationOptionsForRange()
|
||||||
* \param startPos first position in input sentence
|
* \param startPos first position in input sentence
|
||||||
@ -588,9 +599,7 @@ void TranslationOptionCollection::CreateXmlOptionsForRange(size_t, size_t)
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/** Add translation option to the list
|
||||||
|
|
||||||
/** add translation option to the list
|
|
||||||
* \param translationOption translation option to be added */
|
* \param translationOption translation option to be added */
|
||||||
void TranslationOptionCollection::Add(TranslationOption *translationOption)
|
void TranslationOptionCollection::Add(TranslationOption *translationOption)
|
||||||
{
|
{
|
||||||
|
@ -140,6 +140,9 @@ public:
|
|||||||
//!Check if this range has XML options
|
//!Check if this range has XML options
|
||||||
virtual bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
|
virtual bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
|
||||||
|
|
||||||
|
//! Check if a subsumed XML option constraint is satisfied
|
||||||
|
virtual bool ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const;
|
||||||
|
|
||||||
//! Create xml-based translation options for the specific input span
|
//! Create xml-based translation options for the specific input span
|
||||||
virtual void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
|
virtual void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
|
||||||
|
|
||||||
|
@ -79,6 +79,51 @@ bool TranslationOptionCollectionText::HasXmlOptionsOverlappingRange(size_t start
|
|||||||
return source.XmlOverlap(startPosition,endPosition);
|
return source.XmlOverlap(startPosition,endPosition);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the given translation option violates a specified xml Option
|
||||||
|
*/
|
||||||
|
bool TranslationOptionCollectionText::ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const
|
||||||
|
{
|
||||||
|
// skip if there is no overlap
|
||||||
|
Sentence const& source=dynamic_cast<Sentence const&>(m_source);
|
||||||
|
if (!source.XmlOverlap(startPosition,endPosition)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// check for all sub spans
|
||||||
|
for(size_t start=startPosition; start<=endPosition; start++) {
|
||||||
|
for(size_t end=start; end<=endPosition; end++) {
|
||||||
|
// get list of xml options for the subspan
|
||||||
|
vector <TranslationOption*> xmlOptions;
|
||||||
|
source.GetXmlTranslationOptions(xmlOptions,start,end);
|
||||||
|
if (xmlOptions.size() > 0) {
|
||||||
|
// check if any xml option matches
|
||||||
|
for(size_t i=0; i<xmlOptions.size(); i++) {
|
||||||
|
const TargetPhrase &phrase = transOpt->GetTargetPhrase();
|
||||||
|
const TargetPhrase &xmlPhrase = xmlOptions[i]->GetTargetPhrase();
|
||||||
|
// check this xml option (if shorter)
|
||||||
|
if (phrase.GetSize() >= xmlPhrase.GetSize()) {
|
||||||
|
// match may start in middle of phrase
|
||||||
|
for(size_t offset=0; offset <= phrase.GetSize()-xmlPhrase.GetSize(); offset++) {
|
||||||
|
bool match = true;
|
||||||
|
// match every word (only surface factor)
|
||||||
|
for(size_t wordPos=0; match && wordPos < xmlPhrase.GetSize(); wordPos++) {
|
||||||
|
if (phrase.GetFactor( wordPos+offset,0 )->Compare(*(xmlPhrase.GetFactor( wordPos,0 )))) {
|
||||||
|
match = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (match) {
|
||||||
|
return false; // no violation if matching xml option found
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true; // there were xml options for this range, but none matched
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create xml-based translation options for the specific input span
|
* Create xml-based translation options for the specific input span
|
||||||
*/
|
*/
|
||||||
@ -140,5 +185,3 @@ void TranslationOptionCollectionText::CreateTranslationOptionsForRange(
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ public:
|
|||||||
TranslationOptionCollectionText(Sentence const& input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
|
TranslationOptionCollectionText(Sentence const& input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
|
||||||
|
|
||||||
bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
|
bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
|
||||||
|
bool ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const;
|
||||||
void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
|
void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
|
||||||
|
|
||||||
void CreateTranslationOptions();
|
void CreateTranslationOptions();
|
||||||
|
@ -134,10 +134,11 @@ enum InputTypeEnum {
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum XmlInputType {
|
enum XmlInputType {
|
||||||
XmlPassThrough = 0,
|
XmlPassThrough = 0,
|
||||||
XmlIgnore = 1,
|
XmlIgnore = 1,
|
||||||
XmlExclusive = 2,
|
XmlExclusive = 2,
|
||||||
XmlInclusive = 3
|
XmlInclusive = 3,
|
||||||
|
XmlConstraint = 4
|
||||||
};
|
};
|
||||||
|
|
||||||
enum DictionaryFind {
|
enum DictionaryFind {
|
||||||
|
@ -515,8 +515,8 @@ build-osm
|
|||||||
in: corpus word-alignment
|
in: corpus word-alignment
|
||||||
out: osm-model
|
out: osm-model
|
||||||
ignore-unless: operation-sequence-model
|
ignore-unless: operation-sequence-model
|
||||||
rerun-on-change: operation-sequence-model training-options script giza-settings
|
rerun-on-change: operation-sequence-model training-options script giza-settings operation-sequence-model-settings
|
||||||
template: $moses-script-dir/OSM/OSM-Train.sh IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir $srilm-dir
|
template: $moses-script-dir/OSM/OSM-Train.perl --corpus-f IN0.$input-extension --corpus-e IN0.$output-extension --alignment IN1.$alignment-symmetrization-method --order $operation-sequence-model-order --out-dir OUT --moses-src-dir $moses-src-dir --srilm-dir $srilm-dir $operation-sequence-model-settings
|
||||||
default-name: model/OSM
|
default-name: model/OSM
|
||||||
extract-phrases
|
extract-phrases
|
||||||
in: corpus-mml-postfilter=OR=word-alignment scored-corpus
|
in: corpus-mml-postfilter=OR=word-alignment scored-corpus
|
||||||
|
Loading…
Reference in New Issue
Block a user