mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
added support tp specify translation options via xml for chart decoder
This commit is contained in:
parent
9e5e502687
commit
292c75cb1a
@ -29,6 +29,7 @@
|
|||||||
#include "ChartTrellisPathList.h"
|
#include "ChartTrellisPathList.h"
|
||||||
#include "StaticData.h"
|
#include "StaticData.h"
|
||||||
#include "DecodeStep.h"
|
#include "DecodeStep.h"
|
||||||
|
#include "TreeInput.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Moses;
|
using namespace Moses;
|
||||||
@ -79,6 +80,8 @@ void ChartManager::ProcessSentence()
|
|||||||
VERBOSE(2,"Decoding: " << endl);
|
VERBOSE(2,"Decoding: " << endl);
|
||||||
//ChartHypothesis::ResetHypoCount();
|
//ChartHypothesis::ResetHypoCount();
|
||||||
|
|
||||||
|
AddXmlChartOptions();
|
||||||
|
|
||||||
// MAIN LOOP
|
// MAIN LOOP
|
||||||
size_t size = m_source.GetSize();
|
size_t size = m_source.GetSize();
|
||||||
for (size_t width = 1; width <= size; ++width) {
|
for (size_t width = 1; width <= size; ++width) {
|
||||||
@ -122,6 +125,28 @@ void ChartManager::ProcessSentence()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ChartManager::AddXmlChartOptions() {
|
||||||
|
TreeInput const &source = dynamic_cast<TreeInput const&>(m_source);
|
||||||
|
const std::vector <ChartTranslationOption*> xmlChartOptionsList = source.GetXmlChartTranslationOptions();
|
||||||
|
IFVERBOSE(2) { cerr << "AddXmlChartOptions " << xmlChartOptionsList.size() << endl; }
|
||||||
|
if (xmlChartOptionsList.size() == 0) return;
|
||||||
|
|
||||||
|
for(std::vector<ChartTranslationOption*>::const_iterator i = xmlChartOptionsList.begin();
|
||||||
|
i != xmlChartOptionsList.end(); ++i) {
|
||||||
|
ChartTranslationOption* opt = *i;
|
||||||
|
|
||||||
|
Moses::Scores wordPenaltyScore(1, -0.434294482); // TODO what is this number?
|
||||||
|
opt->GetTargetPhraseCollection().GetCollection()[0]->SetScore((ScoreProducer*)m_system->GetWordPenaltyProducer(), wordPenaltyScore);
|
||||||
|
|
||||||
|
const WordsRange &range = opt->GetSourceWordsRange();
|
||||||
|
RuleCubeItem* item = new RuleCubeItem( *opt, m_hypoStackColl );
|
||||||
|
ChartHypothesis* hypo = new ChartHypothesis(*opt, *item, *this);
|
||||||
|
hypo->CalcScore();
|
||||||
|
ChartCell &cell = m_hypoStackColl.Get(range);
|
||||||
|
cell.AddHypothesis(hypo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const ChartHypothesis *ChartManager::GetBestHypothesis() const
|
const ChartHypothesis *ChartManager::GetBestHypothesis() const
|
||||||
{
|
{
|
||||||
size_t size = m_source.GetSize();
|
size_t size = m_source.GetSize();
|
||||||
|
@ -65,6 +65,7 @@ public:
|
|||||||
ChartManager(InputType const& source, const TranslationSystem* system);
|
ChartManager(InputType const& source, const TranslationSystem* system);
|
||||||
~ChartManager();
|
~ChartManager();
|
||||||
void ProcessSentence();
|
void ProcessSentence();
|
||||||
|
void AddXmlChartOptions();
|
||||||
const ChartHypothesis *GetBestHypothesis() const;
|
const ChartHypothesis *GetBestHypothesis() const;
|
||||||
void CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDistinct=0) const;
|
void CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDistinct=0) const;
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ namespace Moses
|
|||||||
* \param reorderingConstraint reordering constraint zones specified by xml
|
* \param reorderingConstraint reordering constraint zones specified by xml
|
||||||
* \param walls reordering constraint walls specified by xml
|
* \param walls reordering constraint walls specified by xml
|
||||||
*/
|
*/
|
||||||
bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput> &sourceLabels)
|
bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput> &sourceLabels, std::vector<XmlOption*> &xmlOptions)
|
||||||
{
|
{
|
||||||
//parse XML markup in translation line
|
//parse XML markup in translation line
|
||||||
|
|
||||||
@ -41,6 +41,10 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
|
|||||||
string cleanLine; // return string (text without xml)
|
string cleanLine; // return string (text without xml)
|
||||||
size_t wordPos = 0; // position in sentence (in terms of number of words)
|
size_t wordPos = 0; // position in sentence (in terms of number of words)
|
||||||
|
|
||||||
|
// keep this handy for later
|
||||||
|
const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
|
||||||
|
const string &factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||||
|
|
||||||
// loop through the tokens
|
// loop through the tokens
|
||||||
for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) {
|
for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) {
|
||||||
// not a xml tag, but regular text (may contain many words)
|
// not a xml tag, but regular text (may contain many words)
|
||||||
@ -145,14 +149,63 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
WordsRange range(startPos,endPos-1);
|
// may be either a input span label ("label"), or a specified output translation "translation"
|
||||||
// specified translations -> vector of phrases
|
string label = ParseXmlTagAttribute(tagContent,"label");
|
||||||
// multiple translations may be specified, separated by "||"
|
string translation = ParseXmlTagAttribute(tagContent,"translation");
|
||||||
vector<string> altTexts = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"label"), "||");
|
|
||||||
CHECK(altTexts.size() == 1);
|
|
||||||
|
|
||||||
XMLParseOutput item(altTexts[0], range);
|
// specified label
|
||||||
sourceLabels.push_back(item);
|
if (translation.length() == 0 && label.length() > 0) {
|
||||||
|
WordsRange range(startPos,endPos-1); // really?
|
||||||
|
XMLParseOutput item(label, range);
|
||||||
|
sourceLabels.push_back(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
// specified translations -> vector of phrases, separated by "||"
|
||||||
|
if (translation.length() > 0 && StaticData::Instance().GetXmlInputType() != XmlIgnore) {
|
||||||
|
vector<string> altTexts = TokenizeMultiCharSeparator(translation, "||");
|
||||||
|
vector<string> altLabel = TokenizeMultiCharSeparator(label, "||");
|
||||||
|
vector<string> altProbs = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"prob"), "||");
|
||||||
|
//TRACE_ERR("number of translations: " << altTexts.size() << endl);
|
||||||
|
for (size_t i=0; i<altTexts.size(); ++i) {
|
||||||
|
// set target phrase
|
||||||
|
TargetPhrase targetPhrase(Output);
|
||||||
|
targetPhrase.CreateFromString(outputFactorOrder,altTexts[i],factorDelimiter);
|
||||||
|
|
||||||
|
// set constituent label
|
||||||
|
string targetLHSstr;
|
||||||
|
if (altLabel.size() > i && altLabel[i].size() > 0) {
|
||||||
|
targetLHSstr = altLabel[i];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const UnknownLHSList &lhsList = StaticData::Instance().GetUnknownLHS();
|
||||||
|
UnknownLHSList::const_iterator iterLHS = lhsList.begin();
|
||||||
|
targetLHSstr = iterLHS->first;
|
||||||
|
}
|
||||||
|
Word targetLHS(true);
|
||||||
|
targetLHS.CreateFromString(Output, outputFactorOrder, targetLHSstr, true);
|
||||||
|
CHECK(targetLHS.GetFactor(0) != NULL);
|
||||||
|
targetPhrase.SetTargetLHS(targetLHS);
|
||||||
|
|
||||||
|
// get probability
|
||||||
|
float probValue = 1;
|
||||||
|
if (altProbs.size() > i && altProbs[i].size() > 0) {
|
||||||
|
probValue = Scan<float>(altProbs[i]);
|
||||||
|
}
|
||||||
|
// convert from prob to log-prob
|
||||||
|
float scoreValue = FloorScore(TransformScore(probValue));
|
||||||
|
targetPhrase.SetScore(scoreValue);
|
||||||
|
|
||||||
|
// set span and create XmlOption
|
||||||
|
WordsRange range(startPos+1,endPos);
|
||||||
|
XmlOption *option = new XmlOption(range,targetPhrase);
|
||||||
|
CHECK(option);
|
||||||
|
xmlOptions.push_back(option);
|
||||||
|
|
||||||
|
VERBOSE(2,"xml translation = [" << range << "] " << targetLHSstr << " -> " << altTexts[i] << " prob: " << probValue << endl);
|
||||||
|
}
|
||||||
|
altTexts.clear();
|
||||||
|
altProbs.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -179,7 +232,8 @@ int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
|
|||||||
//line = Trim(line);
|
//line = Trim(line);
|
||||||
|
|
||||||
std::vector<XMLParseOutput> sourceLabels;
|
std::vector<XMLParseOutput> sourceLabels;
|
||||||
ProcessAndStripXMLTags(line, sourceLabels);
|
std::vector<XmlOption*> xmlOptionsList;
|
||||||
|
ProcessAndStripXMLTags(line, sourceLabels, xmlOptionsList);
|
||||||
|
|
||||||
// do words 1st - hack
|
// do words 1st - hack
|
||||||
stringstream strme;
|
stringstream strme;
|
||||||
@ -211,6 +265,42 @@ int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// XML Options
|
||||||
|
|
||||||
|
//only fill the vector if we are parsing XML
|
||||||
|
if (staticData.GetXmlInputType() != XmlPassThrough ) {
|
||||||
|
//TODO: needed to handle exclusive
|
||||||
|
//for (size_t i=0; i<GetSize(); i++) {
|
||||||
|
// m_xmlCoverageMap.push_back(false);
|
||||||
|
//}
|
||||||
|
|
||||||
|
//iterXMLOpts will be empty for XmlIgnore
|
||||||
|
//look at each column
|
||||||
|
for(std::vector<XmlOption*>::const_iterator iterXmlOpts = xmlOptionsList.begin();
|
||||||
|
iterXmlOpts != xmlOptionsList.end(); iterXmlOpts++) {
|
||||||
|
|
||||||
|
const XmlOption *xmlOption = *iterXmlOpts;
|
||||||
|
TargetPhrase *targetPhrase = new TargetPhrase(xmlOption->targetPhrase);
|
||||||
|
*targetPhrase = xmlOption->targetPhrase; // copy everything
|
||||||
|
WordsRange *range = new WordsRange(xmlOption->range);
|
||||||
|
const StackVec emptyStackVec; // hmmm... maybe dangerous, but it is never consulted
|
||||||
|
|
||||||
|
TargetPhraseCollection *tpc = new TargetPhraseCollection;
|
||||||
|
tpc->Add(targetPhrase);
|
||||||
|
|
||||||
|
ChartTranslationOption *transOpt = new ChartTranslationOption(*tpc, emptyStackVec, *range, 0.0f);
|
||||||
|
m_xmlChartOptionsList.push_back(transOpt);
|
||||||
|
|
||||||
|
//TODO: needed to handle exclusive
|
||||||
|
//for(size_t j=transOpt->GetSourceWordsRange().GetStartPos(); j<=transOpt->GetSourceWordsRange().GetEndPos(); j++) {
|
||||||
|
// m_xmlCoverageMap[j]=true;
|
||||||
|
//}
|
||||||
|
|
||||||
|
delete xmlOption;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "Sentence.h"
|
#include "Sentence.h"
|
||||||
|
#include "ChartTranslationOption.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
@ -25,6 +26,7 @@ class TreeInput : public Sentence
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::vector<std::vector<NonTerminalSet> > m_sourceChart;
|
std::vector<std::vector<NonTerminalSet> > m_sourceChart;
|
||||||
|
std::vector <ChartTranslationOption*> m_xmlChartOptionsList;
|
||||||
|
|
||||||
void AddChartLabel(size_t startPos, size_t endPos, const std::string &label
|
void AddChartLabel(size_t startPos, size_t endPos, const std::string &label
|
||||||
,const std::vector<FactorType>& factorOrder);
|
,const std::vector<FactorType>& factorOrder);
|
||||||
@ -34,7 +36,7 @@ protected:
|
|||||||
return m_sourceChart[startPos][endPos - startPos];
|
return m_sourceChart[startPos][endPos - startPos];
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ProcessAndStripXMLTags(std::string &line, std::vector<XMLParseOutput> &sourceLabels);
|
bool ProcessAndStripXMLTags(std::string &line, std::vector<XMLParseOutput> &sourceLabels, std::vector<XmlOption*> &res);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
TreeInput()
|
TreeInput()
|
||||||
@ -57,6 +59,9 @@ public:
|
|||||||
return m_sourceChart[startPos][endPos - startPos];
|
return m_sourceChart[startPos][endPos - startPos];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector <ChartTranslationOption*> GetXmlChartTranslationOptions() const {
|
||||||
|
return m_xmlChartOptionsList;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user