mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 11:28:48 +03:00
Code cleanup and refactoring.
This commit is contained in:
parent
240b88c683
commit
831dc83778
@ -47,14 +47,24 @@ ChartParserUnknown::~ChartParserUnknown()
|
||||
// RemoveAllInColl(m_cacheTargetPhraseCollection);
|
||||
}
|
||||
|
||||
void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, ChartParserCallback &to)
|
||||
AllOptions::ptr const&
|
||||
ChartParserUnknown::
|
||||
options() const
|
||||
{
|
||||
return m_ttask.lock()->options();
|
||||
}
|
||||
|
||||
void
|
||||
ChartParserUnknown::
|
||||
Process(const Word &sourceWord, const Range &range, ChartParserCallback &to)
|
||||
{
|
||||
// unknown word, add as trans opt
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance();
|
||||
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer
|
||||
= UnknownWordPenaltyProducer::Instance();
|
||||
|
||||
size_t isDigit = 0;
|
||||
if (staticData.options().unk.drop) {
|
||||
if (options()->unk.drop) {
|
||||
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
|
||||
const StringPiece s = f->GetString();
|
||||
isDigit = s.find_first_of("0123456789");
|
||||
@ -79,9 +89,9 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
|
||||
}
|
||||
|
||||
//TranslationOption *transOpt;
|
||||
if (! staticData.options().unk.drop || isDigit) {
|
||||
if (! options()->unk.drop || isDigit) {
|
||||
// loop
|
||||
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
|
||||
const UnknownLHSList &lhsList = options()->syntax.unknown_lhs; // staticData.GetUnknownLHS();
|
||||
UnknownLHSList::const_iterator iterLHS;
|
||||
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
|
||||
const string &targetLHSStr = iterLHS->first;
|
||||
@ -91,8 +101,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
|
||||
//const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
|
||||
Word *targetLHS = new Word(true);
|
||||
|
||||
targetLHS->CreateFromString(Output, staticData.options().output.factor_order,
|
||||
targetLHSStr, true);
|
||||
targetLHS->CreateFromString(Output, options()->output.factor_order,
|
||||
targetLHSStr, true);
|
||||
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
|
||||
|
||||
// add to dictionary
|
||||
@ -108,9 +118,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
|
||||
targetPhrase->SetAlignmentInfo("0-0");
|
||||
targetPhrase->EvaluateInIsolation(*unksrc);
|
||||
|
||||
AllOptions const& opts = staticData.options();
|
||||
if (!opts.output.detailed_tree_transrep_filepath.empty() ||
|
||||
opts.nbest.print_trees || staticData.GetTreeStructure() != NULL) {
|
||||
if (!options()->output.detailed_tree_transrep_filepath.empty() ||
|
||||
options()->nbest.print_trees || staticData.GetTreeStructure() != NULL) {
|
||||
std::string prop = "[ ";
|
||||
prop += (*targetLHS)[0]->GetString().as_string() + " ";
|
||||
prop += sourceWord[0]->GetString().as_string() + " ]";
|
||||
@ -126,15 +135,15 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
|
||||
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
|
||||
// loop
|
||||
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
|
||||
const UnknownLHSList &lhsList = options()->syntax.unknown_lhs;//staticData.GetUnknownLHS();
|
||||
UnknownLHSList::const_iterator iterLHS;
|
||||
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
|
||||
const string &targetLHSStr = iterLHS->first;
|
||||
//float prob = iterLHS->second;
|
||||
|
||||
Word *targetLHS = new Word(true);
|
||||
targetLHS->CreateFromString(Output, staticData.options().output.factor_order,
|
||||
targetLHSStr, true);
|
||||
targetLHS->CreateFromString(Output, staticData.options().output.factor_order,
|
||||
targetLHSStr, true);
|
||||
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
|
||||
|
||||
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
|
||||
@ -214,9 +223,7 @@ void ChartParser::Create(const Range &range, ChartParserCallback &to)
|
||||
if (range.GetNumWordsCovered() == 1
|
||||
&& range.GetStartPos() != 0
|
||||
&& range.GetStartPos() != m_source.GetSize()-1) {
|
||||
bool always = m_ttask.lock()->options()->unk.always_create_direct_transopt;
|
||||
// bool alwaysCreateDirectTranslationOption
|
||||
// = StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
|
||||
bool always = options()->unk.always_create_direct_transopt;
|
||||
if (to.Empty() || always) {
|
||||
// create unknown words for 1 word coverage where we don't have any trans options
|
||||
const Word &sourceWord = m_source.GetWord(range.GetStartPos());
|
||||
@ -291,4 +298,14 @@ long ChartParser::GetTranslationId() const
|
||||
{
|
||||
return m_source.GetTranslationId();
|
||||
}
|
||||
|
||||
|
||||
AllOptions::ptr const&
|
||||
ChartParser::
|
||||
options() const
|
||||
{
|
||||
return m_ttask.lock()->options();
|
||||
}
|
||||
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -57,6 +57,7 @@ public:
|
||||
private:
|
||||
std::vector<Phrase*> m_unksrcs;
|
||||
std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection;
|
||||
AllOptions::ptr const& options() const;
|
||||
};
|
||||
|
||||
class ChartParser
|
||||
@ -78,6 +79,8 @@ public:
|
||||
return m_unknown.GetUnknownSources();
|
||||
}
|
||||
|
||||
AllOptions::ptr const& options() const;
|
||||
|
||||
private:
|
||||
ChartParserUnknown m_unknown;
|
||||
std::vector <DecodeGraph*> m_decodeGraphList;
|
||||
|
@ -66,9 +66,8 @@ ConfusionNet(AllOptions::ptr const& opts) : InputType(opts)
|
||||
{
|
||||
stats.createOne();
|
||||
|
||||
const StaticData& SD = StaticData::Instance();
|
||||
if (SD.IsSyntax()) {
|
||||
m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal());
|
||||
if (is_syntax(opts->search.algo)) {
|
||||
m_defaultLabelSet.insert(opts->syntax.input_default_non_terminal);
|
||||
}
|
||||
UTIL_THROW_IF2(InputFeature::InstancePtr() == NULL, "Input feature must be specified");
|
||||
}
|
||||
@ -92,14 +91,14 @@ ConfusionNet(Sentence const& s) : InputType(s.options())
|
||||
|
||||
bool
|
||||
ConfusionNet::
|
||||
ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
|
||||
ReadF(std::istream& in, int format)
|
||||
{
|
||||
VERBOSE(2, "read confusion net with format "<<format<<"\n");
|
||||
switch(format) {
|
||||
case 0:
|
||||
return ReadFormat0(in,factorOrder);
|
||||
return ReadFormat0(in);
|
||||
case 1:
|
||||
return ReadFormat1(in,factorOrder);
|
||||
return ReadFormat1(in);
|
||||
default:
|
||||
std::cerr << "ERROR: unknown format '"<<format
|
||||
<<"' in ConfusionNet::Read";
|
||||
@ -109,22 +108,20 @@ ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
|
||||
|
||||
int
|
||||
ConfusionNet::
|
||||
Read(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts)
|
||||
Read(std::istream& in)
|
||||
{
|
||||
int rv=ReadF(in,factorOrder,0);
|
||||
int rv=ReadF(in,0);
|
||||
if(rv) stats.collect(*this);
|
||||
return rv;
|
||||
}
|
||||
|
||||
bool
|
||||
ConfusionNet::
|
||||
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
ReadFormat0(std::istream& in)
|
||||
{
|
||||
Clear();
|
||||
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
|
||||
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
const InputFeature *inputFeature = InputFeature::InstancePtr();
|
||||
size_t numInputScores = inputFeature->GetNumInputScores();
|
||||
size_t numRealWordCount = inputFeature->GetNumRealWordsInInput();
|
||||
@ -140,7 +137,6 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
Column col;
|
||||
while(is>>word) {
|
||||
Word w;
|
||||
// String2Word(word,w,factorOrder);
|
||||
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
|
||||
std::vector<float> probs(totalCount, 0.0);
|
||||
for(size_t i=0; i < numInputScores; i++) {
|
||||
@ -179,9 +175,10 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
|
||||
bool
|
||||
ConfusionNet::
|
||||
ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
ReadFormat1(std::istream& in)
|
||||
{
|
||||
Clear();
|
||||
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
|
||||
std::string line;
|
||||
if(!getline(in,line)) return 0;
|
||||
size_t s;
|
||||
|
@ -30,8 +30,8 @@ protected:
|
||||
std::vector<Column> data;
|
||||
NonTerminalSet m_defaultLabelSet;
|
||||
|
||||
bool ReadFormat0(std::istream&,const std::vector<FactorType>& factorOrder);
|
||||
bool ReadFormat1(std::istream&,const std::vector<FactorType>& factorOrder);
|
||||
bool ReadFormat0(std::istream&);
|
||||
bool ReadFormat1(std::istream&);
|
||||
void String2Word(const std::string& s,Word& w,const std::vector<FactorType>& factorOrder);
|
||||
|
||||
public:
|
||||
@ -46,7 +46,8 @@ public:
|
||||
|
||||
const Column& GetColumn(size_t i) const {
|
||||
UTIL_THROW_IF2(i >= data.size(),
|
||||
"Out of bounds. Trying to access " << i << " when vector only contains " << data.size());
|
||||
"Out of bounds. Trying to access " << i
|
||||
<< " when vector only contains " << data.size());
|
||||
return data[i];
|
||||
}
|
||||
const Column& operator[](size_t i) const {
|
||||
@ -64,11 +65,10 @@ public:
|
||||
data.clear();
|
||||
}
|
||||
|
||||
bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0);
|
||||
bool ReadF(std::istream&, int format=0);
|
||||
virtual void Print(std::ostream&) const;
|
||||
|
||||
int Read(std::istream& in,const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts);
|
||||
int Read(std::istream& in);
|
||||
|
||||
Phrase GetSubString(const Range&) const; //TODO not defined
|
||||
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include "CountNonTerms.h"
|
||||
#include "moses/Util.h"
|
||||
#include "moses/TargetPhrase.h"
|
||||
#include "moses/StaticData.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -21,8 +20,6 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedScores) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
vector<float> scores(m_numScoreComponents, 0);
|
||||
size_t indScore = 0;
|
||||
|
||||
@ -39,7 +36,7 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
|
||||
if (m_targetSyntax) {
|
||||
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
|
||||
const Word &word = targetPhrase.GetWord(i);
|
||||
if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
|
||||
if (word.IsNonTerminal() && word != m_output_default_nonterminal) {
|
||||
++scores[indScore];
|
||||
}
|
||||
}
|
||||
@ -49,7 +46,7 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
|
||||
if (m_sourceSyntax) {
|
||||
for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
|
||||
const Word &word = sourcePhrase.GetWord(i);
|
||||
if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
|
||||
if (word.IsNonTerminal() && word != m_input_default_nonterminal) {
|
||||
++scores[indScore];
|
||||
}
|
||||
}
|
||||
@ -72,5 +69,13 @@ void CountNonTerms::SetParameter(const std::string& key, const std::string& valu
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
CountNonTerms::
|
||||
Load(AllOptions const& opts)
|
||||
{
|
||||
m_input_default_nonterminal = opts.syntax.input_default_non_terminal;
|
||||
m_output_default_nonterminal = opts.syntax.output_default_non_terminal;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -7,6 +7,8 @@ namespace Moses
|
||||
|
||||
class CountNonTerms : public StatelessFeatureFunction
|
||||
{
|
||||
Word m_input_default_nonterminal;
|
||||
Word m_output_default_nonterminal;
|
||||
public:
|
||||
CountNonTerms(const std::string &line);
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
@ -41,6 +43,7 @@ public:
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
void Load(AllOptions const& opts);
|
||||
protected:
|
||||
bool m_all, m_sourceSyntax, m_targetSyntax;
|
||||
};
|
||||
|
@ -14,11 +14,11 @@ RuleScope::RuleScope(const std::string &line)
|
||||
{
|
||||
}
|
||||
|
||||
bool IsAmbiguous(const Word &word, bool sourceSyntax)
|
||||
{
|
||||
const Word &inputDefaultNonTerminal = StaticData::Instance().GetInputDefaultNonTerminal();
|
||||
return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal);
|
||||
}
|
||||
// bool IsAmbiguous(const Word &word, bool sourceSyntax)
|
||||
// {
|
||||
// const Word &inputDefaultNonTerminal = StaticData::Instance().GetInputDefaultNonTerminal();
|
||||
// return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal);
|
||||
// }
|
||||
|
||||
void RuleScope::EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
|
@ -98,6 +98,7 @@ void SoftSourceSyntacticConstraintsFeature::Load(AllOptions const& opts)
|
||||
if (!m_targetSourceLHSJointCountFile.empty()) {
|
||||
LoadTargetSourceLeftHandSideJointCountFile();
|
||||
}
|
||||
m_output_default_nonterminal = opts.syntax.output_default_non_terminal;
|
||||
}
|
||||
|
||||
void SoftSourceSyntacticConstraintsFeature::LoadSourceLabelSet()
|
||||
@ -311,8 +312,8 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
|
||||
std::vector<float> newScores(m_numScoreComponents,0);
|
||||
|
||||
const TreeInput& treeInput = static_cast<const TreeInput&>(input);
|
||||
const StaticData& staticData = StaticData::Instance();
|
||||
const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
|
||||
// const StaticData& staticData = StaticData::Instance();
|
||||
// const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
|
||||
|
||||
size_t nNTs = 1;
|
||||
bool treeInputMismatchLHSBinary = true;
|
||||
@ -365,7 +366,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
|
||||
|
||||
for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin();
|
||||
treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) {
|
||||
if (*treeInputLabelsIt != outputDefaultNonTerminal) {
|
||||
if (*treeInputLabelsIt != m_output_default_nonterminal) {
|
||||
boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel
|
||||
= m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]);
|
||||
if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
|
||||
@ -387,7 +388,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
|
||||
|
||||
for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin();
|
||||
treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) {
|
||||
if (*treeInputLabelsIt != outputDefaultNonTerminal) {
|
||||
if (*treeInputLabelsIt != m_output_default_nonterminal) {
|
||||
boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel
|
||||
= m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]);
|
||||
if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
|
||||
@ -568,7 +569,8 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
|
||||
}
|
||||
if ( treeInputLabelsLHS.size() == 0 ) {
|
||||
scoreBreakdown.PlusEquals(this,
|
||||
"LHSPAIR_" + targetLHS->GetString().as_string() + "_" + outputDefaultNonTerminal[0]->GetString().as_string(),
|
||||
"LHSPAIR_" + targetLHS->GetString().as_string() + "_"
|
||||
+ m_output_default_nonterminal[0]->GetString().as_string(),
|
||||
1);
|
||||
if (!m_targetSourceLHSJointCountFile.empty()) {
|
||||
t2sLabelsScore = TransformScore(m_floor);
|
||||
|
@ -101,6 +101,7 @@ protected:
|
||||
std::pair<float,float> GetLabelPairProbabilities(const Factor* target,
|
||||
const size_t source) const;
|
||||
|
||||
Word m_output_default_nonterminal;
|
||||
};
|
||||
|
||||
|
||||
|
@ -47,11 +47,12 @@ void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType
|
||||
const Word& lhsLabel = targetPhrase.GetTargetLHS();
|
||||
|
||||
const StaticData& staticData = StaticData::Instance();
|
||||
const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
|
||||
|
||||
std::vector<float> newScores(m_numScoreComponents,0.0); // m_numScoreComponents == 2 // first fires for matches, second for mismatches
|
||||
std::vector<float> newScores(m_numScoreComponents,0.0);
|
||||
// m_numScoreComponents == 2 // first fires for matches, second for mismatches
|
||||
|
||||
if ( (treeInputLabels.find(lhsLabel) != treeInputLabels.end()) && (lhsLabel != outputDefaultNonTerminal) ) {
|
||||
if ( (treeInputLabels.find(lhsLabel) != treeInputLabels.end())
|
||||
&& (lhsLabel != m_output_default_nonterminal) ) {
|
||||
// match
|
||||
newScores[0] = 1.0;
|
||||
} else {
|
||||
@ -62,6 +63,12 @@ void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType
|
||||
scoreBreakdown.PlusEquals(this, newScores);
|
||||
}
|
||||
|
||||
void
|
||||
SourceGHKMTreeInputMatchFeature::
|
||||
Load(AllOptions const& opts)
|
||||
{
|
||||
m_output_default_nonterminal = opts.syntax.output_default_non_terminal;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "StatelessFeatureFunction.h"
|
||||
#include "moses/parameters/AllOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -8,6 +9,7 @@ namespace Moses
|
||||
// assumes that source-side syntax labels are stored in the target non-terminal field of the rules
|
||||
class SourceGHKMTreeInputMatchFeature : public StatelessFeatureFunction
|
||||
{
|
||||
Word m_output_default_nonterminal;
|
||||
public:
|
||||
SourceGHKMTreeInputMatchFeature(const std::string &line);
|
||||
|
||||
@ -40,6 +42,7 @@ public:
|
||||
void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const {};
|
||||
|
||||
void Load(AllOptions const& opts);
|
||||
};
|
||||
|
||||
|
||||
|
@ -18,9 +18,7 @@ namespace Moses
|
||||
|
||||
//! populate this InputType with data from in stream
|
||||
int ForestInput::
|
||||
Read(std::istream &in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts)
|
||||
Read(std::istream &in)
|
||||
{
|
||||
using Syntax::F2S::Forest;
|
||||
|
||||
@ -48,7 +46,7 @@ Read(std::istream &in,
|
||||
std::getline(in, line);
|
||||
} else {
|
||||
do {
|
||||
ParseHyperedgeLine(line, factorOrder);
|
||||
ParseHyperedgeLine(line);
|
||||
std::getline(in, line);
|
||||
} while (line != "");
|
||||
}
|
||||
@ -58,7 +56,7 @@ Read(std::istream &in,
|
||||
// not sure ForestInput needs to.
|
||||
std::stringstream strme;
|
||||
strme << "<s> " << sentence << " </s>" << std::endl;
|
||||
Sentence::Read(strme, factorOrder, opts);
|
||||
Sentence::Read(strme);
|
||||
|
||||
// Find the maximum end position of any vertex (0 if forest is empty).
|
||||
std::size_t maxEnd = FindMaxEnd(*m_forest);
|
||||
@ -70,6 +68,9 @@ Read(std::istream &in,
|
||||
assert(topVertices.size() >= 1);
|
||||
}
|
||||
|
||||
|
||||
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
|
||||
|
||||
// Add <s> vertex.
|
||||
Forest::Vertex *startSymbol = NULL;
|
||||
{
|
||||
@ -122,7 +123,9 @@ Read(std::istream &in,
|
||||
return 1;
|
||||
}
|
||||
|
||||
Syntax::F2S::Forest::Vertex *ForestInput::AddOrDeleteVertex(Forest::Vertex *v)
|
||||
Syntax::F2S::Forest::Vertex*
|
||||
ForestInput::
|
||||
AddOrDeleteVertex(Forest::Vertex *v)
|
||||
{
|
||||
std::pair<VertexSet::iterator, bool> ret = m_vertexSet.insert(v);
|
||||
if (ret.second) {
|
||||
@ -172,14 +175,16 @@ void ForestInput::FindTopVertices(Forest &forest,
|
||||
std::back_inserter(topVertices));
|
||||
}
|
||||
|
||||
void ForestInput::ParseHyperedgeLine(
|
||||
const std::string &line, const std::vector<FactorType>& factorOrder)
|
||||
void
|
||||
ForestInput::
|
||||
ParseHyperedgeLine(const std::string &line)
|
||||
{
|
||||
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
|
||||
using Syntax::F2S::Forest;
|
||||
|
||||
const util::AnyCharacter delimiter(" \t");
|
||||
util::TokenIter<util::AnyCharacter, true> p(line, delimiter);
|
||||
Forest::Vertex *v = AddOrDeleteVertex(ParseVertex(*p, factorOrder));
|
||||
Forest::Vertex *v = AddOrDeleteVertex(ParseVertex(*p));
|
||||
Forest::Hyperedge *e = new Forest::Hyperedge();
|
||||
e->head = v;
|
||||
++p;
|
||||
@ -188,7 +193,7 @@ void ForestInput::ParseHyperedgeLine(
|
||||
//throw Exception("");
|
||||
}
|
||||
for (++p; *p != "|||"; ++p) {
|
||||
v = ParseVertex(*p, factorOrder);
|
||||
v = ParseVertex(*p);
|
||||
if (!v->pvertex.symbol.IsNonTerminal()) {
|
||||
// Egret does not give start/end for terminals.
|
||||
v->pvertex.span = Range(e->head->pvertex.span.GetStartPos(),
|
||||
@ -203,11 +208,11 @@ void ForestInput::ParseHyperedgeLine(
|
||||
e->head->incoming.push_back(e);
|
||||
}
|
||||
|
||||
Syntax::F2S::Forest::Vertex *ForestInput::ParseVertex(
|
||||
const StringPiece &s, const std::vector<FactorType>& factorOrder)
|
||||
Syntax::F2S::Forest::Vertex*
|
||||
ForestInput::ParseVertex(const StringPiece &s)
|
||||
{
|
||||
using Syntax::F2S::Forest;
|
||||
|
||||
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
|
||||
Word symbol;
|
||||
std::size_t pos = s.rfind('[');
|
||||
if (pos == std::string::npos) {
|
||||
|
@ -29,9 +29,7 @@ public:
|
||||
|
||||
//! populate this InputType with data from in stream
|
||||
virtual int
|
||||
Read(std::istream& in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts);
|
||||
Read(std::istream& in);
|
||||
|
||||
//! Output debugging info to stream out
|
||||
virtual void Print(std::ostream&) const;
|
||||
@ -76,11 +74,9 @@ private:
|
||||
|
||||
void FindTopVertices(Forest &, std::vector<Forest::Vertex *> &);
|
||||
|
||||
void ParseHyperedgeLine(const std::string &,
|
||||
const std::vector<FactorType> &);
|
||||
void ParseHyperedgeLine(const std::string &);
|
||||
|
||||
Forest::Vertex *ParseVertex(const StringPiece &,
|
||||
const std::vector<FactorType> &);
|
||||
Forest::Vertex *ParseVertex(const StringPiece &);
|
||||
|
||||
boost::shared_ptr<Forest> m_forest;
|
||||
Forest::Vertex *m_rootVertex;
|
||||
|
@ -228,13 +228,13 @@ BufferInput()
|
||||
m_buffered_ahead -= ret->GetSize();
|
||||
} else {
|
||||
source.reset(new itype(m_options));
|
||||
if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts))
|
||||
if (!source->Read(*m_inputStream))
|
||||
return ret;
|
||||
ret = source;
|
||||
}
|
||||
while (m_buffered_ahead < m_look_ahead) {
|
||||
source.reset(new itype(m_options));
|
||||
if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts))
|
||||
if (!source->Read(*m_inputStream))
|
||||
break;
|
||||
m_future_input.push_back(source);
|
||||
m_buffered_ahead += source->GetSize();
|
||||
|
@ -190,9 +190,10 @@ public:
|
||||
|
||||
//! populate this InputType with data from in stream
|
||||
virtual int
|
||||
Read(std::istream& in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts) =0;
|
||||
Read(std::istream& in) = 0;
|
||||
// ,
|
||||
// std::vector<FactorType> const& factorOrder,
|
||||
// AllOptions const& opts) =0;
|
||||
|
||||
//! Output debugging info to stream out
|
||||
virtual void Print(std::ostream&) const =0;
|
||||
|
@ -38,9 +38,8 @@ MockHypothesisGuard
|
||||
m_uwp("UnknownWordPenalty"), m_dist("Distortion")
|
||||
{
|
||||
BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size());
|
||||
std::vector<Moses::FactorType> factors(1,0);
|
||||
AllOptions::ptr opts(new AllOptions(StaticData::Instance().options()));
|
||||
m_sentence.reset(new Sentence(opts,0, sourceSentence, &factors));
|
||||
m_sentence.reset(new Sentence(opts, 0, sourceSentence));
|
||||
m_ttask = TranslationTask::create(m_sentence);
|
||||
m_manager.reset(new Manager(m_ttask));
|
||||
|
||||
@ -59,16 +58,14 @@ MockHypothesisGuard
|
||||
for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) {
|
||||
Hypothesis* prevHypo = m_hypothesis;
|
||||
Range range(ai->first,ai->second);
|
||||
const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(),
|
||||
range);
|
||||
|
||||
const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), range);
|
||||
m_targetPhrases.push_back(TargetPhrase(NULL));
|
||||
// m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL);
|
||||
vector<FactorType> const& factors = opts->output.factor_order;
|
||||
m_targetPhrases.back().CreateFromString(Input, factors, *ti, NULL);
|
||||
m_toptions.push_back(new TranslationOption
|
||||
(range,m_targetPhrases.back()));
|
||||
m_hypothesis = new Hypothesis(*prevHypo, *m_toptions.back(), newBitmap, m_manager->GetNextHypoId());
|
||||
|
||||
m_hypothesis = new Hypothesis(*prevHypo, *m_toptions.back(), newBitmap,
|
||||
m_manager->GetNextHypoId());
|
||||
}
|
||||
|
||||
|
||||
|
@ -43,9 +43,8 @@ namespace Moses
|
||||
Sentence::
|
||||
Sentence(AllOptions::ptr const& opts) : Phrase(0) , InputType(opts)
|
||||
{
|
||||
const StaticData& SD = StaticData::Instance();
|
||||
if (SD.IsSyntax())
|
||||
m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal());
|
||||
if (is_syntax(opts->search.algo))
|
||||
m_defaultLabelSet.insert(opts->syntax.input_default_non_terminal);
|
||||
}
|
||||
|
||||
Sentence::
|
||||
@ -146,65 +145,59 @@ aux_interpret_dlt(string& line) // whatever DLT means ... --- UG
|
||||
|
||||
void
|
||||
Sentence::
|
||||
aux_interpret_xml(AllOptions const& opts, std::string& line, std::vector<size_t> & xmlWalls,
|
||||
aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
|
||||
std::vector<std::pair<size_t, std::string> >& placeholders)
|
||||
{
|
||||
// parse XML markup in translation line
|
||||
|
||||
const StaticData &SD = StaticData::Instance();
|
||||
|
||||
using namespace std;
|
||||
if (opts.input.xml_policy != XmlPassThrough) {
|
||||
int offset = SD.IsSyntax() ? 1 : 0;
|
||||
bool OK = ProcessAndStripXMLTags(opts, line, m_xmlOptions,
|
||||
if (m_options->input.xml_policy != XmlPassThrough) {
|
||||
bool OK = ProcessAndStripXMLTags(*m_options, line,
|
||||
m_xmlOptions,
|
||||
m_reorderingConstraint,
|
||||
xmlWalls, placeholders, offset,
|
||||
SD.GetXmlBrackets().first,
|
||||
SD.GetXmlBrackets().second);
|
||||
UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
|
||||
xmlWalls, placeholders);
|
||||
UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sentence::
|
||||
init(AllOptions::ptr const& opts, string line, std::vector<FactorType> const& factorOrder)
|
||||
init(string line)
|
||||
{
|
||||
using namespace std;
|
||||
const StaticData &SD = StaticData::Instance();
|
||||
|
||||
m_frontSpanCoveredLength = 0;
|
||||
m_sourceCompleted.resize(0);
|
||||
|
||||
if (SD.ContinuePartialTranslation())
|
||||
if (m_options->input.continue_partial_translation)
|
||||
aux_init_partial_translation(line);
|
||||
|
||||
|
||||
line = Trim(line);
|
||||
aux_interpret_sgml_markup(line); // for "<seg id=..." markup
|
||||
aux_interpret_dlt(line); // some poorly documented cache-based stuff
|
||||
|
||||
// if sentences is specified as "<passthrough tag1=""/>"
|
||||
if (SD.options().output.PrintPassThrough ||
|
||||
SD.options().nbest.include_passthrough) {
|
||||
if (m_options->output.PrintPassThrough ||m_options->nbest.include_passthrough) {
|
||||
string pthru = PassthroughSGML(line,"passthrough");
|
||||
this->SetPassthroughInformation(pthru);
|
||||
}
|
||||
|
||||
vector<size_t> xmlWalls;
|
||||
vector<pair<size_t, string> >placeholders;
|
||||
aux_interpret_xml(*opts, line, xmlWalls, placeholders);
|
||||
aux_interpret_xml(line, xmlWalls, placeholders);
|
||||
|
||||
Phrase::CreateFromString(Input, factorOrder, line, NULL);
|
||||
Phrase::CreateFromString(Input, m_options->input.factor_order, line, NULL);
|
||||
|
||||
ProcessPlaceholders(placeholders);
|
||||
|
||||
if (SD.IsSyntax()) InitStartEndWord();
|
||||
if (is_syntax(m_options->search.algo))
|
||||
InitStartEndWord();
|
||||
|
||||
// now that we have final word positions in phrase (from
|
||||
// CreateFromString), we can make input phrase objects to go with
|
||||
// our XmlOptions and create TranslationOptions
|
||||
|
||||
// only fill the vector if we are parsing XML
|
||||
if (opts->input.xml_policy != XmlPassThrough) {
|
||||
if (m_options->input.xml_policy != XmlPassThrough) {
|
||||
m_xmlCoverageMap.assign(GetSize(), false);
|
||||
BOOST_FOREACH(XmlOption const* o, m_xmlOptions) {
|
||||
Range const& r = o->range;
|
||||
@ -217,7 +210,7 @@ init(AllOptions::ptr const& opts, string line, std::vector<FactorType> const& fa
|
||||
m_reorderingConstraint.InitializeWalls(GetSize());
|
||||
|
||||
// set reordering walls, if "-monotone-at-punction" is set
|
||||
if (SD.UseReorderingConstraint() && GetSize()) {
|
||||
if (m_options->reordering.monotone_at_punct && GetSize()) {
|
||||
Range r(0, GetSize()-1);
|
||||
m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r));
|
||||
}
|
||||
@ -232,14 +225,12 @@ init(AllOptions::ptr const& opts, string line, std::vector<FactorType> const& fa
|
||||
|
||||
int
|
||||
Sentence::
|
||||
Read(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts)
|
||||
Read(std::istream& in)
|
||||
{
|
||||
std::string line;
|
||||
if (getline(in, line, '\n').eof())
|
||||
return 0;
|
||||
init(m_options, line, factorOrder);
|
||||
init(line);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -247,7 +238,7 @@ void
|
||||
Sentence::
|
||||
ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders)
|
||||
{
|
||||
FactorType placeholderFactor = StaticData::Instance().options().input.placeholder_factor;
|
||||
FactorType placeholderFactor = m_options->input.placeholder_factor;
|
||||
if (placeholderFactor == NOT_FOUND) {
|
||||
return;
|
||||
}
|
||||
@ -325,7 +316,7 @@ void Sentence::GetXmlTranslationOptions(std::vector <TranslationOption*> &list,
|
||||
|
||||
std::vector <ChartTranslationOptions*>
|
||||
Sentence::
|
||||
GetXmlChartTranslationOptions(AllOptions const& opts) const
|
||||
GetXmlChartTranslationOptions() const
|
||||
{
|
||||
std::vector <ChartTranslationOptions*> ret;
|
||||
|
||||
@ -333,7 +324,7 @@ GetXmlChartTranslationOptions(AllOptions const& opts) const
|
||||
// this code is a copy of the 1 in Sentence.
|
||||
|
||||
//only fill the vector if we are parsing XML
|
||||
if (opts.input.xml_policy != XmlPassThrough ) {
|
||||
if (m_options->input.xml_policy != XmlPassThrough ) {
|
||||
//TODO: needed to handle exclusive
|
||||
//for (size_t i=0; i<GetSize(); i++) {
|
||||
// m_xmlCoverageMap.push_back(false);
|
||||
@ -374,12 +365,10 @@ CreateFromString(vector<FactorType> const& FOrder, string const& phraseString)
|
||||
}
|
||||
|
||||
Sentence::
|
||||
Sentence(AllOptions::ptr const& opts, size_t const transId,
|
||||
string stext, vector<FactorType> const* IFO)
|
||||
Sentence(AllOptions::ptr const& opts, size_t const transId, string stext)
|
||||
: InputType(opts, transId)
|
||||
{
|
||||
if (IFO) init(opts,stext, *IFO);
|
||||
else init(opts, stext, opts->input.factor_order);
|
||||
init(stext);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -64,8 +64,8 @@ protected:
|
||||
|
||||
public:
|
||||
Sentence(AllOptions::ptr const& opts);
|
||||
Sentence(AllOptions::ptr const& opts, size_t const transId, std::string stext,
|
||||
std::vector<FactorType> const* IFO = NULL);
|
||||
Sentence(AllOptions::ptr const& opts, size_t const transId, std::string stext);
|
||||
// std::vector<FactorType> const* IFO = NULL);
|
||||
// Sentence(size_t const transId, std::string const& stext);
|
||||
~Sentence();
|
||||
|
||||
@ -94,11 +94,11 @@ public:
|
||||
//! populates vector argument with XML force translation options for the specific range passed
|
||||
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list) const;
|
||||
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
|
||||
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions(AllOptions const& opts) const;
|
||||
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
|
||||
|
||||
virtual int
|
||||
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts);
|
||||
Read(std::istream& in);
|
||||
// , const std::vector<FactorType>& factorOrder, AllOptions const& opts);
|
||||
|
||||
void Print(std::ostream& out) const;
|
||||
|
||||
@ -115,9 +115,7 @@ public:
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
init(AllOptions::ptr const& opts, std::string line,
|
||||
std::vector<FactorType> const& factorOrder);
|
||||
void init(std::string line);
|
||||
|
||||
std::vector<std::map<std::string,std::string> > const&
|
||||
GetDltMeta() const {
|
||||
@ -139,7 +137,7 @@ private:
|
||||
|
||||
void
|
||||
aux_interpret_xml
|
||||
(AllOptions const& opts, std::string& line, std::vector<size_t> & xmlWalls,
|
||||
(std::string& line, std::vector<size_t> & xmlWalls,
|
||||
std::vector<std::pair<size_t, std::string> >& placeholders);
|
||||
|
||||
void
|
||||
|
@ -219,6 +219,8 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
const PARAM_VEC *params;
|
||||
|
||||
m_options.init(*parameter);
|
||||
if (is_syntax(m_options.search.algo))
|
||||
m_options.syntax.LoadNonTerminals(*parameter, FactorCollection::Instance());
|
||||
|
||||
if (IsSyntax())
|
||||
LoadChartDecodingParameters();
|
||||
|
@ -337,33 +337,10 @@ public:
|
||||
return m_includeLHSInSearchGraph;
|
||||
}
|
||||
|
||||
std::pair<std::string,std::string> GetXmlBrackets() const {
|
||||
return m_xmlBrackets;
|
||||
}
|
||||
|
||||
// bool PrintTranslationOptions() const {
|
||||
// return m_printTranslationOptions;
|
||||
// }
|
||||
|
||||
// bool PrintAllDerivations() const {
|
||||
// return m_printAllDerivations;
|
||||
// }
|
||||
|
||||
const UnknownLHSList &GetUnknownLHS() const {
|
||||
return m_unknownLHS;
|
||||
}
|
||||
|
||||
const Word &GetInputDefaultNonTerminal() const {
|
||||
return m_inputDefaultNonTerminal;
|
||||
}
|
||||
const Word &GetOutputDefaultNonTerminal() const {
|
||||
return m_outputDefaultNonTerminal;
|
||||
}
|
||||
|
||||
SourceLabelOverlap GetSourceLabelOverlap() const {
|
||||
return m_sourceLabelOverlap;
|
||||
}
|
||||
|
||||
size_t GetRuleLimit() const {
|
||||
return m_ruleLimit;
|
||||
}
|
||||
|
@ -3,9 +3,8 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "moses/FF/UnknownWordPenaltyProducer.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include "util/string_stream.hh"
|
||||
|
||||
#include "moses/parameters/AllOptions.h"
|
||||
namespace Moses
|
||||
{
|
||||
namespace Syntax
|
||||
@ -14,13 +13,13 @@ namespace F2S
|
||||
{
|
||||
|
||||
GlueRuleSynthesizer::
|
||||
GlueRuleSynthesizer(HyperTree &trie, const std::vector<FactorType> &iFactors)
|
||||
: m_hyperTree(trie)
|
||||
GlueRuleSynthesizer(Moses::AllOptions const& opts, HyperTree &trie)
|
||||
: m_input_default_nonterminal(opts.syntax.input_default_non_terminal)
|
||||
, m_output_default_nonterminal(opts.syntax.output_default_non_terminal)
|
||||
, m_hyperTree(trie)
|
||||
{
|
||||
// const std::vector<FactorType> &inputFactorOrder =
|
||||
// StaticData::Instance().GetInputFactorOrder();
|
||||
Word *lhs = NULL;
|
||||
m_dummySourcePhrase.CreateFromString(Input, iFactors, "hello", &lhs);
|
||||
m_dummySourcePhrase.CreateFromString(Input, opts.input.factor_order, "hello", &lhs);
|
||||
delete lhs;
|
||||
}
|
||||
|
||||
@ -47,11 +46,10 @@ void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e,
|
||||
}
|
||||
}
|
||||
|
||||
TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
|
||||
const Forest::Hyperedge &e)
|
||||
TargetPhrase*
|
||||
GlueRuleSynthesizer::
|
||||
SynthesizeTargetPhrase(const Forest::Hyperedge &e)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
|
||||
UnknownWordPenaltyProducer::Instance();
|
||||
|
||||
@ -61,7 +59,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
|
||||
for (std::size_t i = 0; i < e.tail.size(); ++i) {
|
||||
const Word &symbol = e.tail[i]->pvertex.symbol;
|
||||
if (symbol.IsNonTerminal()) {
|
||||
targetPhrase->AddWord(staticData.GetOutputDefaultNonTerminal());
|
||||
targetPhrase->AddWord(m_output_default_nonterminal);
|
||||
} else {
|
||||
// TODO Check this
|
||||
Word &targetWord = targetPhrase->AddWord();
|
||||
@ -75,7 +73,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
|
||||
float score = LOWEST_SCORE;
|
||||
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score);
|
||||
targetPhrase->EvaluateInIsolation(m_dummySourcePhrase);
|
||||
Word *targetLhs = new Word(staticData.GetOutputDefaultNonTerminal());
|
||||
Word *targetLhs = new Word(m_output_default_nonterminal);
|
||||
targetPhrase->SetTargetLHS(targetLhs);
|
||||
targetPhrase->SetAlignmentInfo(alignmentSS.str());
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class AllOptions;
|
||||
namespace Syntax
|
||||
{
|
||||
namespace F2S
|
||||
@ -16,9 +17,11 @@ namespace F2S
|
||||
|
||||
class GlueRuleSynthesizer : public HyperTreeCreator
|
||||
{
|
||||
Word m_input_default_nonterminal;
|
||||
Word m_output_default_nonterminal;
|
||||
public:
|
||||
GlueRuleSynthesizer(HyperTree &, std::vector<FactorType> const& iFactors);
|
||||
|
||||
GlueRuleSynthesizer(Moses::AllOptions const& opts, HyperTree &);
|
||||
|
||||
// Synthesize the minimal, monotone rule that can be applied to the given
|
||||
// hyperedge and add it to the rule trie.
|
||||
void SynthesizeRule(const Forest::Hyperedge &);
|
||||
|
@ -74,8 +74,7 @@ void Manager<RuleMatcher>::Decode()
|
||||
RuleMatcherCallback callback(m_stackMap, ruleLimit);
|
||||
|
||||
// Create a glue rule synthesizer.
|
||||
GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie,
|
||||
options()->input.factor_order);
|
||||
GlueRuleSynthesizer glueRuleSynthesizer(*options(), *m_glueRuleTrie);
|
||||
|
||||
// Sort the input forest's vertices into bottom-up topological order.
|
||||
std::vector<const Forest::Vertex *> sortedVertices;
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "moses/FF/UnknownWordPenaltyProducer.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -12,7 +12,9 @@ namespace Syntax
|
||||
namespace T2S
|
||||
{
|
||||
|
||||
void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node)
|
||||
void
|
||||
GlueRuleSynthesizer::
|
||||
SynthesizeRule(const InputTree::Node &node)
|
||||
{
|
||||
const Word &sourceLhs = node.pvertex.symbol;
|
||||
boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node));
|
||||
@ -22,7 +24,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node)
|
||||
tpc->Add(tp);
|
||||
}
|
||||
|
||||
Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node)
|
||||
Phrase*
|
||||
GlueRuleSynthesizer::
|
||||
SynthesizeSourcePhrase(const InputTree::Node &node)
|
||||
{
|
||||
Phrase *phrase = new Phrase(node.children.size());
|
||||
for (std::vector<InputTree::Node*>::const_iterator p = node.children.begin();
|
||||
@ -37,11 +41,10 @@ Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node)
|
||||
return phrase;
|
||||
}
|
||||
|
||||
TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
|
||||
const InputTree::Node &node, const Phrase &sourceRhs)
|
||||
TargetPhrase*
|
||||
GlueRuleSynthesizer::
|
||||
SynthesizeTargetPhrase(const InputTree::Node &node, const Phrase &sourceRhs)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
|
||||
UnknownWordPenaltyProducer::Instance();
|
||||
|
||||
@ -51,7 +54,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
|
||||
for (std::size_t i = 0; i < node.children.size(); ++i) {
|
||||
const Word &symbol = node.children[i]->pvertex.symbol;
|
||||
if (symbol.IsNonTerminal()) {
|
||||
targetPhrase->AddWord(staticData.GetOutputDefaultNonTerminal());
|
||||
targetPhrase->AddWord(m_output_default_nonterminal);
|
||||
} else {
|
||||
// TODO Check this
|
||||
Word &targetWord = targetPhrase->AddWord();
|
||||
@ -65,7 +68,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
|
||||
float score = LOWEST_SCORE;
|
||||
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score);
|
||||
targetPhrase->EvaluateInIsolation(sourceRhs);
|
||||
Word *targetLhs = new Word(staticData.GetOutputDefaultNonTerminal());
|
||||
Word *targetLhs = new Word(m_output_default_nonterminal);
|
||||
targetPhrase->SetTargetLHS(targetLhs);
|
||||
targetPhrase->SetAlignmentInfo(alignmentSS.str());
|
||||
|
||||
|
@ -16,9 +16,13 @@ namespace T2S
|
||||
|
||||
class GlueRuleSynthesizer : public RuleTrieCreator
|
||||
{
|
||||
Word m_output_default_nonterminal;
|
||||
public:
|
||||
GlueRuleSynthesizer(RuleTrie &trie) : m_ruleTrie(trie) {}
|
||||
|
||||
GlueRuleSynthesizer(RuleTrie &trie, Word dflt_nonterm)
|
||||
: m_ruleTrie(trie)
|
||||
, m_output_default_nonterminal(dflt_nonterm)
|
||||
{}
|
||||
|
||||
// Synthesize the minimal, montone rule that can be applied to the given node
|
||||
// and add it to the rule trie.
|
||||
void SynthesizeRule(const InputTree::Node &);
|
||||
|
@ -111,7 +111,8 @@ void Manager<RuleMatcher>::Decode()
|
||||
F2S::RuleMatcherCallback callback(m_stackMap, ruleLimit);
|
||||
|
||||
// Create a glue rule synthesizer.
|
||||
GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie);
|
||||
Word dflt_nonterm = options()->syntax.output_default_non_terminal;
|
||||
GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie, dflt_nonterm);
|
||||
|
||||
// Visit each node of the input tree in post-order.
|
||||
for (std::vector<InputTree::Node>::const_iterator p =
|
||||
|
@ -47,9 +47,7 @@ void TabbedSentence::CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
|
||||
int
|
||||
TabbedSentence::
|
||||
Read(std::istream& in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts)
|
||||
Read(std::istream& in)
|
||||
{
|
||||
TabbedColumns allColumns;
|
||||
|
||||
@ -60,17 +58,14 @@ Read(std::istream& in,
|
||||
boost::split(allColumns, line, boost::is_any_of("\t"));
|
||||
|
||||
if(allColumns.size() < 2) {
|
||||
std::stringstream dummyStream;
|
||||
dummyStream << line << std::endl;
|
||||
return Sentence::Read(dummyStream, factorOrder, opts);
|
||||
Sentence::init(line);
|
||||
} else {
|
||||
m_columns.resize(allColumns.size() - 1);
|
||||
std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
|
||||
|
||||
std::stringstream dummyStream;
|
||||
dummyStream << allColumns[0] << std::endl;
|
||||
return Sentence::Read(dummyStream, factorOrder, opts);
|
||||
Sentence::init(allColumns[0]);
|
||||
}
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -68,8 +68,7 @@ public:
|
||||
, const std::string &tabbedString);
|
||||
|
||||
virtual int
|
||||
Read(std::istream& in,const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts);
|
||||
Read(std::istream& in);
|
||||
|
||||
const TabbedColumns& GetColumns() const {
|
||||
return m_columns;
|
||||
|
@ -51,6 +51,7 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
|
||||
|
||||
size_t sourceSize = parser.GetSize();
|
||||
m_expandableDottedRuleListVec.resize(sourceSize);
|
||||
m_input_default_nonterminal = parser.options()->syntax.input_default_non_terminal;
|
||||
|
||||
for (size_t ind = 0; ind < m_expandableDottedRuleListVec.size(); ++ind) {
|
||||
DottedRuleOnDisk *initDottedRule = new DottedRuleOnDisk(m_dbWrapper.GetRootSourceNode());
|
||||
@ -81,7 +82,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
||||
ChartParserCallback &outColl)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal();
|
||||
// const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal();
|
||||
const Range &range = inputPath.GetWordsRange();
|
||||
|
||||
size_t relEndPos = range.GetEndPos() - range.GetStartPos();
|
||||
@ -178,7 +179,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
||||
if (m_dictionary.m_maxSpanDefault != NOT_FOUND) {
|
||||
// for Hieu's source syntax
|
||||
|
||||
bool isSourceSyntaxNonTerm = sourceLHS != defaultSourceNonTerm;
|
||||
bool isSourceSyntaxNonTerm = sourceLHS != m_input_default_nonterminal; // defaultSourceNonTerm;
|
||||
size_t nonTermNumWordsCovered = endPos - startPos + 1;
|
||||
|
||||
doSearch = isSourceSyntaxNonTerm ?
|
||||
|
@ -57,6 +57,7 @@ private:
|
||||
std::vector<DottedRuleStackOnDisk*> m_expandableDottedRuleListVec;
|
||||
std::map<uint64_t, TargetPhraseCollection::shared_ptr > m_cache;
|
||||
std::list<const OnDiskPt::PhraseNode*> m_sourcePhraseNode;
|
||||
Word m_input_default_nonterminal;
|
||||
};
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -240,25 +240,20 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line,
|
||||
//! populate this InputType with data from in stream
|
||||
int
|
||||
TreeInput::
|
||||
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts)
|
||||
Read(std::istream& in)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
string line;
|
||||
if (getline(in, line, '\n').eof())
|
||||
return 0;
|
||||
// remove extra spaces
|
||||
//line = Trim(line);
|
||||
|
||||
|
||||
m_labelledSpans.clear();
|
||||
ProcessAndStripXMLTags(opts, line, m_labelledSpans, m_xmlOptions);
|
||||
ProcessAndStripXMLTags(*m_options, line, m_labelledSpans, m_xmlOptions);
|
||||
|
||||
// do words 1st - hack
|
||||
stringstream strme;
|
||||
strme << line << endl;
|
||||
|
||||
Sentence::Read(strme, factorOrder, opts);
|
||||
Sentence::Read(strme);
|
||||
|
||||
// size input chart
|
||||
size_t sourceSize = GetSize();
|
||||
@ -270,19 +265,21 @@ Read(std::istream& in, const std::vector<FactorType>& factorOrder,
|
||||
|
||||
// do source labels
|
||||
vector<XMLParseOutput>::const_iterator iterLabel;
|
||||
for (iterLabel = m_labelledSpans.begin(); iterLabel != m_labelledSpans.end(); ++iterLabel) {
|
||||
for (iterLabel = m_labelledSpans.begin();
|
||||
iterLabel != m_labelledSpans.end(); ++iterLabel) {
|
||||
const XMLParseOutput &labelItem = *iterLabel;
|
||||
const Range &range = labelItem.m_range;
|
||||
const string &label = labelItem.m_label;
|
||||
AddChartLabel(range.GetStartPos() + 1, range.GetEndPos() + 1, label, factorOrder);
|
||||
AddChartLabel(range.GetStartPos() + 1, range.GetEndPos() + 1, label);
|
||||
}
|
||||
|
||||
// default label
|
||||
bool only4empty = m_options->syntax.default_non_term_only_for_empty_range;
|
||||
for (size_t startPos = 0; startPos < sourceSize; ++startPos) {
|
||||
for (size_t endPos = startPos; endPos < sourceSize; ++endPos) {
|
||||
NonTerminalSet &list = GetLabelSet(startPos, endPos);
|
||||
if (list.size() == 0 || !staticData.GetDefaultNonTermOnlyForEmptyRange()) {
|
||||
AddChartLabel(startPos, endPos, staticData.GetInputDefaultNonTerminal(), factorOrder);
|
||||
if (list.size() == 0 || ! only4empty ) {
|
||||
AddChartLabel(startPos, endPos, m_options->syntax.input_default_non_terminal);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -303,13 +300,13 @@ TranslationOptionCollection* TreeInput::CreateTranslationOptionCollection() cons
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const Word &label
|
||||
, const std::vector<FactorType>& /* factorOrder */)
|
||||
void
|
||||
TreeInput::
|
||||
AddChartLabel(size_t startPos, size_t endPos, const Word &label)
|
||||
{
|
||||
UTIL_THROW_IF2(!label.IsNonTerminal(),
|
||||
"Label must be a non-terminal");
|
||||
|
||||
SourceLabelOverlap overlapType = StaticData::Instance().GetSourceLabelOverlap();
|
||||
SourceLabelOverlap overlapType = m_options->syntax.source_label_overlap;
|
||||
NonTerminalSet &list = GetLabelSet(startPos, endPos);
|
||||
switch (overlapType) {
|
||||
case SourceLabelOverlapAdd:
|
||||
@ -327,14 +324,17 @@ void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const Word &label
|
||||
}
|
||||
}
|
||||
|
||||
void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const string &label
|
||||
, const std::vector<FactorType>& factorOrder)
|
||||
void
|
||||
TreeInput::
|
||||
AddChartLabel(size_t startPos, size_t endPos, const string &label)
|
||||
{
|
||||
const std::vector<FactorType>& fOrder = m_options->input.factor_order;
|
||||
Word word(true);
|
||||
const Factor *factor = FactorCollection::Instance().AddFactor(Input, factorOrder[0], label, true); // TODO - no factors
|
||||
const Factor *factor
|
||||
= FactorCollection::Instance().AddFactor(Input, fOrder[0], label, true);
|
||||
// TODO - no factors
|
||||
word.SetFactor(0, factor);
|
||||
|
||||
AddChartLabel(startPos, endPos, word, factorOrder);
|
||||
AddChartLabel(startPos, endPos, word);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const TreeInput &input)
|
||||
|
@ -35,10 +35,9 @@ protected:
|
||||
std::vector<std::vector<NonTerminalSet> > m_sourceChart;
|
||||
std::vector<XMLParseOutput> m_labelledSpans;
|
||||
|
||||
void AddChartLabel(size_t startPos, size_t endPos, const std::string &label
|
||||
,const std::vector<FactorType>& factorOrder);
|
||||
void AddChartLabel(size_t startPos, size_t endPos, const Word &label
|
||||
,const std::vector<FactorType>& factorOrder);
|
||||
void AddChartLabel(size_t startPos, size_t endPos, const std::string &label);
|
||||
void AddChartLabel(size_t startPos, size_t endPos, const Word &label);
|
||||
|
||||
NonTerminalSet &GetLabelSet(size_t startPos, size_t endPos) {
|
||||
return m_sourceChart[startPos][endPos - startPos];
|
||||
}
|
||||
@ -56,9 +55,7 @@ public:
|
||||
|
||||
//! populate this InputType with data from in stream
|
||||
virtual int
|
||||
Read(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts);
|
||||
Read(std::istream& in);
|
||||
|
||||
//! Output debugging info to stream out
|
||||
virtual void Print(std::ostream&) const;
|
||||
|
@ -52,18 +52,15 @@ void WordLattice::Print(std::ostream& out) const
|
||||
|
||||
int
|
||||
WordLattice::
|
||||
InitializeFromPCNDataType
|
||||
(const PCN::CN& cn, size_t const maxPhraseLength,
|
||||
const std::vector<FactorType>& factorOrder,
|
||||
const std::string& debug_line)
|
||||
InitializeFromPCNDataType(const PCN::CN& cn, const std::string& debug_line)
|
||||
{
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
|
||||
size_t const maxPhraseLength = m_options->search.max_phrase_length;
|
||||
|
||||
const InputFeature *inputFeature = InputFeature::InstancePtr();
|
||||
size_t numInputScores = inputFeature->GetNumInputScores();
|
||||
size_t numRealWordCount = inputFeature->GetNumRealWordsInInput();
|
||||
|
||||
// size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
|
||||
|
||||
bool addRealWordCount = (numRealWordCount > 0);
|
||||
|
||||
//when we have one more weight than params, we add a word count feature
|
||||
@ -150,9 +147,7 @@ InitializeFromPCNDataType
|
||||
|
||||
int
|
||||
WordLattice::
|
||||
Read(std::istream& in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts)
|
||||
Read(std::istream& in)
|
||||
{
|
||||
Clear();
|
||||
std::string line;
|
||||
@ -163,8 +158,7 @@ Read(std::istream& in,
|
||||
}
|
||||
|
||||
PCN::CN cn = PCN::parsePCN(line);
|
||||
return InitializeFromPCNDataType(cn, opts.search.max_phrase_length,
|
||||
factorOrder, line);
|
||||
return InitializeFromPCNDataType(cn, line);
|
||||
}
|
||||
|
||||
void WordLattice::GetAsEdgeMatrix(std::vector<std::vector<bool> >& edges) const
|
||||
@ -228,17 +222,10 @@ TranslationOptionCollection*
|
||||
WordLattice
|
||||
::CreateTranslationOptionCollection(ttasksptr const& ttask) const
|
||||
{
|
||||
// size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
|
||||
// float translationOptionThreshold = StaticData::Instance().GetTranslationOptionThreshold();
|
||||
|
||||
size_t maxNoTransOptPerCoverage = ttask->options()->search.max_trans_opt_per_cov;
|
||||
// StaticData::Instance().GetMaxNoTransOptPerCoverage();
|
||||
size_t maxNoTransOptPerCoverage = ttask->options()->search.max_trans_opt_per_cov;
|
||||
float translationOptionThreshold = ttask->options()->search.trans_opt_threshold;
|
||||
// StaticData::Instance().GetTranslationOptionThreshold();
|
||||
|
||||
|
||||
TranslationOptionCollection *rv = NULL;
|
||||
//rv = new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
|
||||
|
||||
if (StaticData::Instance().GetUseLegacyPT()) {
|
||||
rv = new TranslationOptionCollectionConfusionNet(ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold);
|
||||
|
@ -40,14 +40,11 @@ public:
|
||||
/** Given a lattice represented using the PCN::CN data type (topologically sorted agency list
|
||||
* representation), initialize the WordLattice object
|
||||
*/
|
||||
int InitializeFromPCNDataType(const PCN::CN& cn, size_t const maxPhraseLength,
|
||||
const std::vector<FactorType>& factorOrder,
|
||||
const std::string& debug_line = "");
|
||||
int InitializeFromPCNDataType(const PCN::CN& cn, const std::string& debug_line = "");
|
||||
|
||||
/** Read from PLF format (1 lattice per line)
|
||||
*/
|
||||
int Read(std::istream& in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts);
|
||||
int Read(std::istream& in);
|
||||
|
||||
/** Convert internal representation into an edge matrix
|
||||
* @note edges[1][2] means there is an edge from 1 to 2
|
||||
|
@ -159,16 +159,19 @@ vector<string> TokenizeXml(const string& str, const std::string& lbrackStr, cons
|
||||
* \param rbrackStr xml tag's right bracket string, typically ">"
|
||||
*/
|
||||
bool
|
||||
ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption const*> &res,
|
||||
ProcessAndStripXMLTags(AllOptions const& opts, string &line,
|
||||
vector<XmlOption const*> &res,
|
||||
ReorderingConstraint &reorderingConstraint,
|
||||
vector< size_t > &walls,
|
||||
std::vector< std::pair<size_t, std::string> > &placeholders,
|
||||
int offset, const std::string& lbrackStr,
|
||||
const std::string& rbrackStr)
|
||||
std::vector< std::pair<size_t, std::string> > &placeholders)
|
||||
{
|
||||
//parse XML markup in translation line
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::string& lbrackStr = opts.input.xml_brackets.first;
|
||||
const std::string& rbrackStr = opts.input.xml_brackets.second;
|
||||
int offset = is_syntax(opts.search.algo) ? 1 : 0;
|
||||
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// hack. What pt should XML trans opt be assigned to?
|
||||
PhraseDictionary *firstPt = NULL;
|
||||
@ -177,7 +180,6 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption co
|
||||
}
|
||||
|
||||
// no xml tag? we're done.
|
||||
//if (line.find_first_of('<') == string::npos) {
|
||||
if (line.find(lbrackStr) == string::npos) {
|
||||
return true;
|
||||
}
|
||||
@ -195,7 +197,6 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption co
|
||||
size_t wordPos = 0; // position in sentence (in terms of number of words)
|
||||
|
||||
const vector<FactorType> &outputFactorOrder = opts.output.factor_order;
|
||||
// const string &factorDelimiter = staticData.GetFactorDelimiter();
|
||||
|
||||
// loop through the tokens
|
||||
for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) {
|
||||
@ -459,7 +460,7 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption co
|
||||
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i], NULL);
|
||||
|
||||
// lhs
|
||||
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
|
||||
const UnknownLHSList &lhsList = opts.syntax.unknown_lhs; // staticData.GetUnknownLHS();
|
||||
if (!lhsList.empty()) {
|
||||
const Factor *factor = FactorCollection::Instance().AddFactor(lhsList[0].first, true);
|
||||
Word *targetLHS = new Word(true);
|
||||
|
@ -32,10 +32,10 @@ std::vector<std::string> TokenizeXml(const std::string& str, const std::string&
|
||||
|
||||
bool ProcessAndStripXMLTags(AllOptions const& opts,
|
||||
std::string &line, std::vector<XmlOption const*> &res,
|
||||
ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls,
|
||||
std::vector< std::pair<size_t, std::string> > &placeholders,
|
||||
int offset,
|
||||
const std::string& lbrackStr="<", const std::string& rbrackStr=">");
|
||||
ReorderingConstraint &reorderingConstraint,
|
||||
std::vector< size_t > &walls,
|
||||
std::vector< std::pair<size_t, std::string> > &placeholders);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -327,7 +327,7 @@ run_chart_decoder()
|
||||
{
|
||||
Moses::TreeInput tinput(m_options);
|
||||
istringstream buf(m_source_string + "\n");
|
||||
tinput.Read(buf, options()->input.factor_order, *m_options);
|
||||
tinput.Read(buf);
|
||||
|
||||
Moses::ChartManager manager(this->self());
|
||||
manager.Decode();
|
||||
|
Loading…
Reference in New Issue
Block a user