mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 04:43:03 +03:00
xml constraint
This commit is contained in:
parent
abfe02f154
commit
b368085609
6
Jamroot
6
Jamroot
@ -122,12 +122,12 @@ project : requirements
|
||||
;
|
||||
|
||||
#Add directories here if you want their incidental targets too (i.e. tests).
|
||||
build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses-chart-cmd mira scripts regression-testing ;
|
||||
build-projects lm util phrase-extract search moses moses/LM mert moses-cmd socket-moses-cmd moses-chart-cmd mira scripts regression-testing ;
|
||||
|
||||
alias programs : lm//programs moses-chart-cmd//moses_chart moses-cmd//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs misc//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor mira//mira contrib/server//mosesserver ;
|
||||
alias programs : lm//programs moses-chart-cmd//moses_chart moses-cmd//programs socket-moses-cmd//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs misc//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor mira//mira contrib/server//mosesserver ;
|
||||
|
||||
install-bin-libs programs ;
|
||||
install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
|
||||
install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd socket-moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
|
||||
install-headers headers-moses : moses//headers-to-install : moses ;
|
||||
|
||||
alias install : prefix-bin prefix-lib headers-base headers-moses ;
|
||||
|
@ -77,7 +77,7 @@ Parameter::Parameter()
|
||||
AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
|
||||
AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");
|
||||
AddParam("early-distortion-cost", "edc", "include estimate of distortion cost yet to be incurred in the score [Moore & Quirk 2007]. Default is no");
|
||||
AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'");
|
||||
AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'constraint', 'ignore'");
|
||||
AddParam("xml-brackets", "xb", "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode" );
|
||||
AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
|
||||
AddParam("lminimum-bayes-risk", "lmbr", "use lattice miminum Bayes risk to determine best translation");
|
||||
|
@ -487,10 +487,11 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
if (m_parameter->GetParam("xml-input").size() == 0) m_xmlInputType = XmlPassThrough;
|
||||
else if (m_parameter->GetParam("xml-input")[0]=="exclusive") m_xmlInputType = XmlExclusive;
|
||||
else if (m_parameter->GetParam("xml-input")[0]=="inclusive") m_xmlInputType = XmlInclusive;
|
||||
else if (m_parameter->GetParam("xml-input")[0]=="constraint") m_xmlInputType = XmlConstraint;
|
||||
else if (m_parameter->GetParam("xml-input")[0]=="ignore") m_xmlInputType = XmlIgnore;
|
||||
else if (m_parameter->GetParam("xml-input")[0]=="pass-through") m_xmlInputType = XmlPassThrough;
|
||||
else {
|
||||
UserMessage::Add("invalid xml-input value, must be pass-through, exclusive, inclusive, or ignore");
|
||||
UserMessage::Add("invalid xml-input value, must be pass-through, exclusive, inclusive, constraint, or ignore");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -569,7 +569,6 @@ void TranslationOptionCollection::Sort()
|
||||
* called by CreateTranslationOptionsForRange()
|
||||
* \param startPos first position in input sentence
|
||||
* \param lastPos last position in input sentence
|
||||
* \param adhereTableLimit whether phrase & generation table limits are adhered to
|
||||
*/
|
||||
bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t) const
|
||||
{
|
||||
@ -577,6 +576,18 @@ bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t)
|
||||
//not implemented for base class
|
||||
}
|
||||
|
||||
/** Check if an option conflicts with any constraint XML options. Okay, if XML option is substring in source and target.
|
||||
* by default, we don't support XML options. subclasses need to override this function.
|
||||
* called by CreateTranslationOptionsForRange()
|
||||
* \param startPos first position in input sentence
|
||||
* \param lastPos last position in input sentence
|
||||
*/
|
||||
bool TranslationOptionCollection::ViolatesXmlOptionsConstraint(size_t, size_t, TranslationOption *) const
|
||||
{
|
||||
return false;
|
||||
//not implemented for base class
|
||||
}
|
||||
|
||||
/** Populates the current Collection with XML options exactly covering the range specified. Default implementation does nothing.
|
||||
* called by CreateTranslationOptionsForRange()
|
||||
* \param startPos first position in input sentence
|
||||
@ -588,9 +599,7 @@ void TranslationOptionCollection::CreateXmlOptionsForRange(size_t, size_t)
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/** add translation option to the list
|
||||
/** Add translation option to the list
|
||||
* \param translationOption translation option to be added */
|
||||
void TranslationOptionCollection::Add(TranslationOption *translationOption)
|
||||
{
|
||||
|
@ -140,6 +140,9 @@ public:
|
||||
//!Check if this range has XML options
|
||||
virtual bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
|
||||
|
||||
//! Check if a subsumed XML option constraint is satisfied
|
||||
virtual bool ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const;
|
||||
|
||||
//! Create xml-based translation options for the specific input span
|
||||
virtual void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
|
||||
|
||||
|
@ -79,6 +79,51 @@ bool TranslationOptionCollectionText::HasXmlOptionsOverlappingRange(size_t start
|
||||
return source.XmlOverlap(startPosition,endPosition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the given translation option violates a specified xml Option
|
||||
*/
|
||||
bool TranslationOptionCollectionText::ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const
|
||||
{
|
||||
// skip if there is no overlap
|
||||
Sentence const& source=dynamic_cast<Sentence const&>(m_source);
|
||||
if (!source.XmlOverlap(startPosition,endPosition)) {
|
||||
return false;
|
||||
}
|
||||
// check for all sub spans
|
||||
for(size_t start=startPosition; start<=endPosition; start++) {
|
||||
for(size_t end=start; end<=endPosition; end++) {
|
||||
// get list of xml options for the subspan
|
||||
vector <TranslationOption*> xmlOptions;
|
||||
source.GetXmlTranslationOptions(xmlOptions,start,end);
|
||||
if (xmlOptions.size() > 0) {
|
||||
// check if any xml option matches
|
||||
for(size_t i=0; i<xmlOptions.size(); i++) {
|
||||
const TargetPhrase &phrase = transOpt->GetTargetPhrase();
|
||||
const TargetPhrase &xmlPhrase = xmlOptions[i]->GetTargetPhrase();
|
||||
// check this xml option (if shorter)
|
||||
if (phrase.GetSize() >= xmlPhrase.GetSize()) {
|
||||
// match may start in middle of phrase
|
||||
for(size_t offset=0; offset <= phrase.GetSize()-xmlPhrase.GetSize(); offset++) {
|
||||
bool match = true;
|
||||
// match every word (only surface factor)
|
||||
for(size_t wordPos=0; match && wordPos < xmlPhrase.GetSize(); wordPos++) {
|
||||
if (phrase.GetFactor( wordPos+offset,0 )->Compare(*(xmlPhrase.GetFactor( wordPos,0 )))) {
|
||||
match = false;
|
||||
}
|
||||
}
|
||||
if (match) {
|
||||
return false; // no violation if matching xml option found
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true; // there were xml options for this range, but none matched
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create xml-based translation options for the specific input span
|
||||
*/
|
||||
@ -140,5 +185,3 @@ void TranslationOptionCollectionText::CreateTranslationOptionsForRange(
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -51,7 +51,7 @@ public:
|
||||
TranslationOptionCollectionText(Sentence const& input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
|
||||
|
||||
bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
|
||||
|
||||
bool ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const;
|
||||
void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
|
||||
|
||||
void CreateTranslationOptions();
|
||||
|
@ -134,10 +134,11 @@ enum InputTypeEnum {
|
||||
};
|
||||
|
||||
enum XmlInputType {
|
||||
XmlPassThrough = 0,
|
||||
XmlIgnore = 1,
|
||||
XmlExclusive = 2,
|
||||
XmlInclusive = 3
|
||||
XmlPassThrough = 0,
|
||||
XmlIgnore = 1,
|
||||
XmlExclusive = 2,
|
||||
XmlInclusive = 3,
|
||||
XmlConstraint = 4
|
||||
};
|
||||
|
||||
enum DictionaryFind {
|
||||
|
@ -515,8 +515,8 @@ build-osm
|
||||
in: corpus word-alignment
|
||||
out: osm-model
|
||||
ignore-unless: operation-sequence-model
|
||||
rerun-on-change: operation-sequence-model training-options script giza-settings
|
||||
template: $moses-script-dir/OSM/OSM-Train.sh IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir $srilm-dir
|
||||
rerun-on-change: operation-sequence-model training-options script giza-settings operation-sequence-model-settings
|
||||
template: $moses-script-dir/OSM/OSM-Train.perl --corpus-f IN0.$input-extension --corpus-e IN0.$output-extension --alignment IN1.$alignment-symmetrization-method --order $operation-sequence-model-order --out-dir OUT --moses-src-dir $moses-src-dir --srilm-dir $srilm-dir $operation-sequence-model-settings
|
||||
default-name: model/OSM
|
||||
extract-phrases
|
||||
in: corpus-mml-postfilter=OR=word-alignment scored-corpus
|
||||
|
Loading…
Reference in New Issue
Block a user