integrated xml passthrough handling (by Nicola Bertoldi)

This commit is contained in:
Christian Buck 2013-04-12 19:43:53 +01:00
parent 9c2c6c603b
commit bb5d70fc7c
8 changed files with 74 additions and 9 deletions

View File

@ -8,6 +8,7 @@
#include "moses/ChartManager.h"
#include "moses/Hypothesis.h"
#include "moses/Manager.h"
#include "moses/Phrase.h"
#include "moses/StaticData.h"
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/TranslationSystem.h"
@ -177,11 +178,9 @@ public:
const TranslationSystem& system = getTranslationSystem(params);
stringstream out, graphInfo, transCollOpts;
map<string, xmlrpc_c::value> retData;
if (staticData.IsChart()) {
TreeInput tinput;
const vector<FactorType> &inputFactorOrder =
staticData.GetInputFactorOrder();
TreeInput tinput;
const vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
stringstream in(source + "\n");
tinput.Read(in,inputFactorOrder);
ChartManager manager(tinput, &system);
@ -190,11 +189,12 @@ public:
outputChartHypo(out,hypo);
} else {
Sentence sentence;
const vector<FactorType> &inputFactorOrder =
staticData.GetInputFactorOrder();
const vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
stringstream in(source + "\n");
sentence.Read(in,inputFactorOrder);
size_t lineNumber = 0; // TODO: Include sentence request number here?
size_t lineNumber = 0; // TODO: Include sentence request number here?
const string passthrough_data = sentence.GetPassthroughInformation();
out << passthrough_data;
Manager manager(lineNumber, sentence, staticData.GetSearchAlgorithm(), &system);
manager.ProcessSentence();
const Hypothesis* hypo = manager.GetBestHypothesis();
@ -213,8 +213,7 @@ public:
insertTranslationOptions(manager,retData);
}
}
pair<string, xmlrpc_c::value>
text("text", xmlrpc_c::value_string(out.str()));
pair<string, xmlrpc_c::value> text("text", xmlrpc_c::value_string(out.str()));
retData.insert(text);
cerr << "Output: " << out.str() << endl;
*retvalP = xmlrpc_c::value_struct(retData);

View File

@ -56,6 +56,7 @@ protected:
long m_segId;
ReorderingConstraint m_reorderingConstraint; /**< limits on reordering specified either by "-mp" switch or xml tags */
std::string m_textType;
std::string m_passthrough;
public:
@ -112,6 +113,12 @@ public:
void SetTextType(std::string type) {
m_textType = type;
}
std::string GetPassthroughInformation() const {
return m_passthrough;
}
void SetPassthroughInformation(std::string &passthrough) {
m_passthrough = passthrough;
}
//! returns the number of words moved
virtual int ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const;

View File

@ -187,6 +187,8 @@ Parameter::Parameter()
AddParam("report-segmentation", "t", "report phrase segmentation in the output");
AddParam("print-id", "prefix translations with id. Default if false");
AddParam("print-passthrough", "output the sgml tag <passthrough> without any computation on that. Default is false");
AddParam("print-passthrough-in-n-best", "output the sgml tag <passthrough> without any computation on that in each entry of the n-best-list. Default is false");
}

View File

@ -113,6 +113,12 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
}
}
// if sentence contains passthrough info "<passthrough tag1=""/>"
if (1 || staticData.IsPassthroughEnabled() || staticData.IsPassthroughInNBestEnabled()) {
std::string passthrough = PassthroughSGML(line,"passthrough");
this->SetPassthroughInformation(passthrough);
}
// parse XML markup in translation line
//const StaticData &staticData = StaticData::Instance();
std::vector<XmlOption*> xmlOptionsList(0);

View File

@ -186,6 +186,9 @@ bool StaticData::LoadData(Parameter *parameter)
m_needAlignmentInfo = true;
}
SetBooleanParameter( &m_PrintPassthroughInformation, "print-passthrough", false );
SetBooleanParameter( &m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false );
if (m_parameter->GetParam("alignment-output-file").size() > 0) {
m_alignmentOutputFile = Scan<std::string>(m_parameter->GetParam("alignment-output-file")[0]);
m_needAlignmentInfo = true;

View File

@ -174,6 +174,8 @@ protected:
bool m_PrintAlignmentInfo;
bool m_needAlignmentInfo;
bool m_PrintAlignmentInfoNbest;
bool m_PrintPassthroughInformation;
bool m_PrintPassthroughInformationInNBest;
std::string m_alignmentOutputFile;
@ -737,6 +739,12 @@ public:
bool PrintAlignmentInfoInNbest() const {
return m_PrintAlignmentInfoNbest;
}
bool IsPassthroughEnabled() const {
return m_PrintPassthroughInformation;
}
bool IsPassthroughInNBestEnabled() const {
return m_PrintPassthroughInformationInNBest;
}
WordAlignmentSort GetWordAlignmentSort() const {
return m_wordAlignmentSort;
}

View File

@ -166,6 +166,43 @@ std::map<std::string, std::string> ProcessAndStripSGML(std::string &line)
return meta;
}
std::string PassthroughSGML(std::string &line, const std::string tagName, const std::string& lbrackStr, const std::string& rbrackStr)
{
string lbrack = lbrackStr; // = "<";
string rbrack = rbrackStr; // = ">";
std::string meta = "";
std::string lline = ToLower(line);
size_t open = lline.find(lbrack+tagName);
//check whether the tag exists; if not return the empty string
if (open == std::string::npos) return meta;
size_t close = lline.find(rbrack, open);
//check whether the tag is closed with '/>'; if not return the empty string
if (close == std::string::npos)
{
TRACE_ERR("PassthroughSGML error: the <passthrough info/> tag does not end properly\n");
return meta;
}
// extract the tag
std::string tmp = line.substr(open, close - open + 1);
meta = line.substr(open, close - open + 1);
// strip the tag from the line
line = line.substr(0, open) + line.substr(close + 1, std::string::npos);
TRACE_ERR("The input contains a <passthrough info/> tag:" << meta << std::endl);
lline = ToLower(line);
open = lline.find(lbrack+tagName);
if (open != std::string::npos)
{
TRACE_ERR("PassthroughSGML error: there are two <passthrough> tags\n");
}
return meta;
}
}

View File

@ -344,6 +344,8 @@ double GetUserTime();
// dump SGML parser for <seg> tags
std::map<std::string, std::string> ProcessAndStripSGML(std::string &line);
std::string PassthroughSGML(std::string &line, const std::string tagName,const std::string& lbrackStr="<", const std::string& rbrackStr=">");
/**
* Returns the first string bounded by the delimiters (default delimiters are " " and "\t")i starting from position first_pos
* and and stores the starting position of the next string (in first_str)
@ -383,6 +385,7 @@ T log_sum (T log_a, T log_b)
return ( v );
}
}
#endif