Code cleanup and refactoring.

This commit is contained in:
Ulrich Germann 2015-12-10 03:17:36 +00:00
parent 240b88c683
commit 831dc83778
37 changed files with 255 additions and 266 deletions

View File

@ -47,14 +47,24 @@ ChartParserUnknown::~ChartParserUnknown()
// RemoveAllInColl(m_cacheTargetPhraseCollection);
}
void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, ChartParserCallback &to)
AllOptions::ptr const&
ChartParserUnknown::
options() const
{
return m_ttask.lock()->options();
}
void
ChartParserUnknown::
Process(const Word &sourceWord, const Range &range, ChartParserCallback &to)
{
// unknown word, add as trans opt
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance();
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer
= UnknownWordPenaltyProducer::Instance();
size_t isDigit = 0;
if (staticData.options().unk.drop) {
if (options()->unk.drop) {
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
const StringPiece s = f->GetString();
isDigit = s.find_first_of("0123456789");
@ -79,9 +89,9 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
}
//TranslationOption *transOpt;
if (! staticData.options().unk.drop || isDigit) {
if (! options()->unk.drop || isDigit) {
// loop
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
const UnknownLHSList &lhsList = options()->syntax.unknown_lhs; // staticData.GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS;
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
const string &targetLHSStr = iterLHS->first;
@ -91,8 +101,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
//const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, staticData.options().output.factor_order,
targetLHSStr, true);
targetLHS->CreateFromString(Output, options()->output.factor_order,
targetLHSStr, true);
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
// add to dictionary
@ -108,9 +118,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
targetPhrase->SetAlignmentInfo("0-0");
targetPhrase->EvaluateInIsolation(*unksrc);
AllOptions const& opts = staticData.options();
if (!opts.output.detailed_tree_transrep_filepath.empty() ||
opts.nbest.print_trees || staticData.GetTreeStructure() != NULL) {
if (!options()->output.detailed_tree_transrep_filepath.empty() ||
options()->nbest.print_trees || staticData.GetTreeStructure() != NULL) {
std::string prop = "[ ";
prop += (*targetLHS)[0]->GetString().as_string() + " ";
prop += sourceWord[0]->GetString().as_string() + " ]";
@ -126,15 +135,15 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
// loop
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
const UnknownLHSList &lhsList = options()->syntax.unknown_lhs;//staticData.GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS;
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
const string &targetLHSStr = iterLHS->first;
//float prob = iterLHS->second;
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, staticData.options().output.factor_order,
targetLHSStr, true);
targetLHS->CreateFromString(Output, staticData.options().output.factor_order,
targetLHSStr, true);
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
@ -214,9 +223,7 @@ void ChartParser::Create(const Range &range, ChartParserCallback &to)
if (range.GetNumWordsCovered() == 1
&& range.GetStartPos() != 0
&& range.GetStartPos() != m_source.GetSize()-1) {
bool always = m_ttask.lock()->options()->unk.always_create_direct_transopt;
// bool alwaysCreateDirectTranslationOption
// = StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
bool always = options()->unk.always_create_direct_transopt;
if (to.Empty() || always) {
// create unknown words for 1 word coverage where we don't have any trans options
const Word &sourceWord = m_source.GetWord(range.GetStartPos());
@ -291,4 +298,14 @@ long ChartParser::GetTranslationId() const
{
return m_source.GetTranslationId();
}
AllOptions::ptr const&
ChartParser::
options() const
{
return m_ttask.lock()->options();
}
} // namespace Moses

View File

@ -57,6 +57,7 @@ public:
private:
std::vector<Phrase*> m_unksrcs;
std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection;
AllOptions::ptr const& options() const;
};
class ChartParser
@ -78,6 +79,8 @@ public:
return m_unknown.GetUnknownSources();
}
AllOptions::ptr const& options() const;
private:
ChartParserUnknown m_unknown;
std::vector <DecodeGraph*> m_decodeGraphList;

View File

@ -66,9 +66,8 @@ ConfusionNet(AllOptions::ptr const& opts) : InputType(opts)
{
stats.createOne();
const StaticData& SD = StaticData::Instance();
if (SD.IsSyntax()) {
m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal());
if (is_syntax(opts->search.algo)) {
m_defaultLabelSet.insert(opts->syntax.input_default_non_terminal);
}
UTIL_THROW_IF2(InputFeature::InstancePtr() == NULL, "Input feature must be specified");
}
@ -92,14 +91,14 @@ ConfusionNet(Sentence const& s) : InputType(s.options())
bool
ConfusionNet::
ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
ReadF(std::istream& in, int format)
{
VERBOSE(2, "read confusion net with format "<<format<<"\n");
switch(format) {
case 0:
return ReadFormat0(in,factorOrder);
return ReadFormat0(in);
case 1:
return ReadFormat1(in,factorOrder);
return ReadFormat1(in);
default:
std::cerr << "ERROR: unknown format '"<<format
<<"' in ConfusionNet::Read";
@ -109,22 +108,20 @@ ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
int
ConfusionNet::
Read(std::istream& in,
const std::vector<FactorType>& factorOrder,
AllOptions const& opts)
Read(std::istream& in)
{
int rv=ReadF(in,factorOrder,0);
int rv=ReadF(in,0);
if(rv) stats.collect(*this);
return rv;
}
bool
ConfusionNet::
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
ReadFormat0(std::istream& in)
{
Clear();
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
// const StaticData &staticData = StaticData::Instance();
const InputFeature *inputFeature = InputFeature::InstancePtr();
size_t numInputScores = inputFeature->GetNumInputScores();
size_t numRealWordCount = inputFeature->GetNumRealWordsInInput();
@ -140,7 +137,6 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
Column col;
while(is>>word) {
Word w;
// String2Word(word,w,factorOrder);
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
std::vector<float> probs(totalCount, 0.0);
for(size_t i=0; i < numInputScores; i++) {
@ -179,9 +175,10 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
bool
ConfusionNet::
ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder)
ReadFormat1(std::istream& in)
{
Clear();
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
std::string line;
if(!getline(in,line)) return 0;
size_t s;

View File

@ -30,8 +30,8 @@ protected:
std::vector<Column> data;
NonTerminalSet m_defaultLabelSet;
bool ReadFormat0(std::istream&,const std::vector<FactorType>& factorOrder);
bool ReadFormat1(std::istream&,const std::vector<FactorType>& factorOrder);
bool ReadFormat0(std::istream&);
bool ReadFormat1(std::istream&);
void String2Word(const std::string& s,Word& w,const std::vector<FactorType>& factorOrder);
public:
@ -46,7 +46,8 @@ public:
const Column& GetColumn(size_t i) const {
UTIL_THROW_IF2(i >= data.size(),
"Out of bounds. Trying to access " << i << " when vector only contains " << data.size());
"Out of bounds. Trying to access " << i
<< " when vector only contains " << data.size());
return data[i];
}
const Column& operator[](size_t i) const {
@ -64,11 +65,10 @@ public:
data.clear();
}
bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0);
bool ReadF(std::istream&, int format=0);
virtual void Print(std::ostream&) const;
int Read(std::istream& in,const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
int Read(std::istream& in);
Phrase GetSubString(const Range&) const; //TODO not defined
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined

View File

@ -1,7 +1,6 @@
#include "CountNonTerms.h"
#include "moses/Util.h"
#include "moses/TargetPhrase.h"
#include "moses/StaticData.h"
using namespace std;
@ -21,8 +20,6 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
{
const StaticData &staticData = StaticData::Instance();
vector<float> scores(m_numScoreComponents, 0);
size_t indScore = 0;
@ -39,7 +36,7 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
if (m_targetSyntax) {
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Word &word = targetPhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) {
if (word.IsNonTerminal() && word != m_output_default_nonterminal) {
++scores[indScore];
}
}
@ -49,7 +46,7 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
if (m_sourceSyntax) {
for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) {
const Word &word = sourcePhrase.GetWord(i);
if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) {
if (word.IsNonTerminal() && word != m_input_default_nonterminal) {
++scores[indScore];
}
}
@ -72,5 +69,13 @@ void CountNonTerms::SetParameter(const std::string& key, const std::string& valu
}
}
void
CountNonTerms::
Load(AllOptions const& opts)
{
m_input_default_nonterminal = opts.syntax.input_default_non_terminal;
m_output_default_nonterminal = opts.syntax.output_default_non_terminal;
}
}

View File

@ -7,6 +7,8 @@ namespace Moses
class CountNonTerms : public StatelessFeatureFunction
{
Word m_input_default_nonterminal;
Word m_output_default_nonterminal;
public:
CountNonTerms(const std::string &line);
bool IsUseable(const FactorMask &mask) const {
@ -41,6 +43,7 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void Load(AllOptions const& opts);
protected:
bool m_all, m_sourceSyntax, m_targetSyntax;
};

View File

@ -14,11 +14,11 @@ RuleScope::RuleScope(const std::string &line)
{
}
bool IsAmbiguous(const Word &word, bool sourceSyntax)
{
const Word &inputDefaultNonTerminal = StaticData::Instance().GetInputDefaultNonTerminal();
return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal);
}
// bool IsAmbiguous(const Word &word, bool sourceSyntax)
// {
// const Word &inputDefaultNonTerminal = StaticData::Instance().GetInputDefaultNonTerminal();
// return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal);
// }
void RuleScope::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase

View File

@ -98,6 +98,7 @@ void SoftSourceSyntacticConstraintsFeature::Load(AllOptions const& opts)
if (!m_targetSourceLHSJointCountFile.empty()) {
LoadTargetSourceLeftHandSideJointCountFile();
}
m_output_default_nonterminal = opts.syntax.output_default_non_terminal;
}
void SoftSourceSyntacticConstraintsFeature::LoadSourceLabelSet()
@ -311,8 +312,8 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
std::vector<float> newScores(m_numScoreComponents,0);
const TreeInput& treeInput = static_cast<const TreeInput&>(input);
const StaticData& staticData = StaticData::Instance();
const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
// const StaticData& staticData = StaticData::Instance();
// const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
size_t nNTs = 1;
bool treeInputMismatchLHSBinary = true;
@ -365,7 +366,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin();
treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) {
if (*treeInputLabelsIt != outputDefaultNonTerminal) {
if (*treeInputLabelsIt != m_output_default_nonterminal) {
boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel
= m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]);
if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
@ -387,7 +388,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin();
treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) {
if (*treeInputLabelsIt != outputDefaultNonTerminal) {
if (*treeInputLabelsIt != m_output_default_nonterminal) {
boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel
= m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]);
if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) {
@ -568,7 +569,8 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
}
if ( treeInputLabelsLHS.size() == 0 ) {
scoreBreakdown.PlusEquals(this,
"LHSPAIR_" + targetLHS->GetString().as_string() + "_" + outputDefaultNonTerminal[0]->GetString().as_string(),
"LHSPAIR_" + targetLHS->GetString().as_string() + "_"
+ m_output_default_nonterminal[0]->GetString().as_string(),
1);
if (!m_targetSourceLHSJointCountFile.empty()) {
t2sLabelsScore = TransformScore(m_floor);

View File

@ -101,6 +101,7 @@ protected:
std::pair<float,float> GetLabelPairProbabilities(const Factor* target,
const size_t source) const;
Word m_output_default_nonterminal;
};

View File

@ -47,11 +47,12 @@ void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType
const Word& lhsLabel = targetPhrase.GetTargetLHS();
const StaticData& staticData = StaticData::Instance();
const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal();
std::vector<float> newScores(m_numScoreComponents,0.0); // m_numScoreComponents == 2 // first fires for matches, second for mismatches
std::vector<float> newScores(m_numScoreComponents,0.0);
// m_numScoreComponents == 2 // first fires for matches, second for mismatches
if ( (treeInputLabels.find(lhsLabel) != treeInputLabels.end()) && (lhsLabel != outputDefaultNonTerminal) ) {
if ( (treeInputLabels.find(lhsLabel) != treeInputLabels.end())
&& (lhsLabel != m_output_default_nonterminal) ) {
// match
newScores[0] = 1.0;
} else {
@ -62,6 +63,12 @@ void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType
scoreBreakdown.PlusEquals(this, newScores);
}
void
SourceGHKMTreeInputMatchFeature::
Load(AllOptions const& opts)
{
m_output_default_nonterminal = opts.syntax.output_default_non_terminal;
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include "StatelessFeatureFunction.h"
#include "moses/parameters/AllOptions.h"
namespace Moses
{
@ -8,6 +9,7 @@ namespace Moses
// assumes that source-side syntax labels are stored in the target non-terminal field of the rules
class SourceGHKMTreeInputMatchFeature : public StatelessFeatureFunction
{
Word m_output_default_nonterminal;
public:
SourceGHKMTreeInputMatchFeature(const std::string &line);
@ -40,6 +42,7 @@ public:
void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const {};
void Load(AllOptions const& opts);
};

View File

@ -18,9 +18,7 @@ namespace Moses
//! populate this InputType with data from in stream
int ForestInput::
Read(std::istream &in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
Read(std::istream &in)
{
using Syntax::F2S::Forest;
@ -48,7 +46,7 @@ Read(std::istream &in,
std::getline(in, line);
} else {
do {
ParseHyperedgeLine(line, factorOrder);
ParseHyperedgeLine(line);
std::getline(in, line);
} while (line != "");
}
@ -58,7 +56,7 @@ Read(std::istream &in,
// not sure ForestInput needs to.
std::stringstream strme;
strme << "<s> " << sentence << " </s>" << std::endl;
Sentence::Read(strme, factorOrder, opts);
Sentence::Read(strme);
// Find the maximum end position of any vertex (0 if forest is empty).
std::size_t maxEnd = FindMaxEnd(*m_forest);
@ -70,6 +68,9 @@ Read(std::istream &in,
assert(topVertices.size() >= 1);
}
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
// Add <s> vertex.
Forest::Vertex *startSymbol = NULL;
{
@ -122,7 +123,9 @@ Read(std::istream &in,
return 1;
}
Syntax::F2S::Forest::Vertex *ForestInput::AddOrDeleteVertex(Forest::Vertex *v)
Syntax::F2S::Forest::Vertex*
ForestInput::
AddOrDeleteVertex(Forest::Vertex *v)
{
std::pair<VertexSet::iterator, bool> ret = m_vertexSet.insert(v);
if (ret.second) {
@ -172,14 +175,16 @@ void ForestInput::FindTopVertices(Forest &forest,
std::back_inserter(topVertices));
}
void ForestInput::ParseHyperedgeLine(
const std::string &line, const std::vector<FactorType>& factorOrder)
void
ForestInput::
ParseHyperedgeLine(const std::string &line)
{
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
using Syntax::F2S::Forest;
const util::AnyCharacter delimiter(" \t");
util::TokenIter<util::AnyCharacter, true> p(line, delimiter);
Forest::Vertex *v = AddOrDeleteVertex(ParseVertex(*p, factorOrder));
Forest::Vertex *v = AddOrDeleteVertex(ParseVertex(*p));
Forest::Hyperedge *e = new Forest::Hyperedge();
e->head = v;
++p;
@ -188,7 +193,7 @@ void ForestInput::ParseHyperedgeLine(
//throw Exception("");
}
for (++p; *p != "|||"; ++p) {
v = ParseVertex(*p, factorOrder);
v = ParseVertex(*p);
if (!v->pvertex.symbol.IsNonTerminal()) {
// Egret does not give start/end for terminals.
v->pvertex.span = Range(e->head->pvertex.span.GetStartPos(),
@ -203,11 +208,11 @@ void ForestInput::ParseHyperedgeLine(
e->head->incoming.push_back(e);
}
Syntax::F2S::Forest::Vertex *ForestInput::ParseVertex(
const StringPiece &s, const std::vector<FactorType>& factorOrder)
Syntax::F2S::Forest::Vertex*
ForestInput::ParseVertex(const StringPiece &s)
{
using Syntax::F2S::Forest;
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
Word symbol;
std::size_t pos = s.rfind('[');
if (pos == std::string::npos) {

View File

@ -29,9 +29,7 @@ public:
//! populate this InputType with data from in stream
virtual int
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts);
Read(std::istream& in);
//! Output debugging info to stream out
virtual void Print(std::ostream&) const;
@ -76,11 +74,9 @@ private:
void FindTopVertices(Forest &, std::vector<Forest::Vertex *> &);
void ParseHyperedgeLine(const std::string &,
const std::vector<FactorType> &);
void ParseHyperedgeLine(const std::string &);
Forest::Vertex *ParseVertex(const StringPiece &,
const std::vector<FactorType> &);
Forest::Vertex *ParseVertex(const StringPiece &);
boost::shared_ptr<Forest> m_forest;
Forest::Vertex *m_rootVertex;

View File

@ -228,13 +228,13 @@ BufferInput()
m_buffered_ahead -= ret->GetSize();
} else {
source.reset(new itype(m_options));
if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts))
if (!source->Read(*m_inputStream))
return ret;
ret = source;
}
while (m_buffered_ahead < m_look_ahead) {
source.reset(new itype(m_options));
if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts))
if (!source->Read(*m_inputStream))
break;
m_future_input.push_back(source);
m_buffered_ahead += source->GetSize();

View File

@ -190,9 +190,10 @@ public:
//! populate this InputType with data from in stream
virtual int
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts) =0;
Read(std::istream& in) = 0;
// ,
// std::vector<FactorType> const& factorOrder,
// AllOptions const& opts) =0;
//! Output debugging info to stream out
virtual void Print(std::ostream&) const =0;

View File

@ -38,9 +38,8 @@ MockHypothesisGuard
m_uwp("UnknownWordPenalty"), m_dist("Distortion")
{
BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size());
std::vector<Moses::FactorType> factors(1,0);
AllOptions::ptr opts(new AllOptions(StaticData::Instance().options()));
m_sentence.reset(new Sentence(opts,0, sourceSentence, &factors));
m_sentence.reset(new Sentence(opts, 0, sourceSentence));
m_ttask = TranslationTask::create(m_sentence);
m_manager.reset(new Manager(m_ttask));
@ -59,16 +58,14 @@ MockHypothesisGuard
for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) {
Hypothesis* prevHypo = m_hypothesis;
Range range(ai->first,ai->second);
const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(),
range);
const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), range);
m_targetPhrases.push_back(TargetPhrase(NULL));
// m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL);
vector<FactorType> const& factors = opts->output.factor_order;
m_targetPhrases.back().CreateFromString(Input, factors, *ti, NULL);
m_toptions.push_back(new TranslationOption
(range,m_targetPhrases.back()));
m_hypothesis = new Hypothesis(*prevHypo, *m_toptions.back(), newBitmap, m_manager->GetNextHypoId());
m_hypothesis = new Hypothesis(*prevHypo, *m_toptions.back(), newBitmap,
m_manager->GetNextHypoId());
}

View File

@ -43,9 +43,8 @@ namespace Moses
Sentence::
Sentence(AllOptions::ptr const& opts) : Phrase(0) , InputType(opts)
{
const StaticData& SD = StaticData::Instance();
if (SD.IsSyntax())
m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal());
if (is_syntax(opts->search.algo))
m_defaultLabelSet.insert(opts->syntax.input_default_non_terminal);
}
Sentence::
@ -146,65 +145,59 @@ aux_interpret_dlt(string& line) // whatever DLT means ... --- UG
void
Sentence::
aux_interpret_xml(AllOptions const& opts, std::string& line, std::vector<size_t> & xmlWalls,
aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
std::vector<std::pair<size_t, std::string> >& placeholders)
{
// parse XML markup in translation line
const StaticData &SD = StaticData::Instance();
using namespace std;
if (opts.input.xml_policy != XmlPassThrough) {
int offset = SD.IsSyntax() ? 1 : 0;
bool OK = ProcessAndStripXMLTags(opts, line, m_xmlOptions,
if (m_options->input.xml_policy != XmlPassThrough) {
bool OK = ProcessAndStripXMLTags(*m_options, line,
m_xmlOptions,
m_reorderingConstraint,
xmlWalls, placeholders, offset,
SD.GetXmlBrackets().first,
SD.GetXmlBrackets().second);
UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
xmlWalls, placeholders);
UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
}
}
void
Sentence::
init(AllOptions::ptr const& opts, string line, std::vector<FactorType> const& factorOrder)
init(string line)
{
using namespace std;
const StaticData &SD = StaticData::Instance();
m_frontSpanCoveredLength = 0;
m_sourceCompleted.resize(0);
if (SD.ContinuePartialTranslation())
if (m_options->input.continue_partial_translation)
aux_init_partial_translation(line);
line = Trim(line);
aux_interpret_sgml_markup(line); // for "<seg id=..." markup
aux_interpret_dlt(line); // some poorly documented cache-based stuff
// if sentences is specified as "<passthrough tag1=""/>"
if (SD.options().output.PrintPassThrough ||
SD.options().nbest.include_passthrough) {
if (m_options->output.PrintPassThrough ||m_options->nbest.include_passthrough) {
string pthru = PassthroughSGML(line,"passthrough");
this->SetPassthroughInformation(pthru);
}
vector<size_t> xmlWalls;
vector<pair<size_t, string> >placeholders;
aux_interpret_xml(*opts, line, xmlWalls, placeholders);
aux_interpret_xml(line, xmlWalls, placeholders);
Phrase::CreateFromString(Input, factorOrder, line, NULL);
Phrase::CreateFromString(Input, m_options->input.factor_order, line, NULL);
ProcessPlaceholders(placeholders);
if (SD.IsSyntax()) InitStartEndWord();
if (is_syntax(m_options->search.algo))
InitStartEndWord();
// now that we have final word positions in phrase (from
// CreateFromString), we can make input phrase objects to go with
// our XmlOptions and create TranslationOptions
// only fill the vector if we are parsing XML
if (opts->input.xml_policy != XmlPassThrough) {
if (m_options->input.xml_policy != XmlPassThrough) {
m_xmlCoverageMap.assign(GetSize(), false);
BOOST_FOREACH(XmlOption const* o, m_xmlOptions) {
Range const& r = o->range;
@ -217,7 +210,7 @@ init(AllOptions::ptr const& opts, string line, std::vector<FactorType> const& fa
m_reorderingConstraint.InitializeWalls(GetSize());
// set reordering walls, if "-monotone-at-punction" is set
if (SD.UseReorderingConstraint() && GetSize()) {
if (m_options->reordering.monotone_at_punct && GetSize()) {
Range r(0, GetSize()-1);
m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r));
}
@ -232,14 +225,12 @@ init(AllOptions::ptr const& opts, string line, std::vector<FactorType> const& fa
int
Sentence::
Read(std::istream& in,
const std::vector<FactorType>& factorOrder,
AllOptions const& opts)
Read(std::istream& in)
{
std::string line;
if (getline(in, line, '\n').eof())
return 0;
init(m_options, line, factorOrder);
init(line);
return 1;
}
@ -247,7 +238,7 @@ void
Sentence::
ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders)
{
FactorType placeholderFactor = StaticData::Instance().options().input.placeholder_factor;
FactorType placeholderFactor = m_options->input.placeholder_factor;
if (placeholderFactor == NOT_FOUND) {
return;
}
@ -325,7 +316,7 @@ void Sentence::GetXmlTranslationOptions(std::vector <TranslationOption*> &list,
std::vector <ChartTranslationOptions*>
Sentence::
GetXmlChartTranslationOptions(AllOptions const& opts) const
GetXmlChartTranslationOptions() const
{
std::vector <ChartTranslationOptions*> ret;
@ -333,7 +324,7 @@ GetXmlChartTranslationOptions(AllOptions const& opts) const
// this code is a copy of the 1 in Sentence.
//only fill the vector if we are parsing XML
if (opts.input.xml_policy != XmlPassThrough ) {
if (m_options->input.xml_policy != XmlPassThrough ) {
//TODO: needed to handle exclusive
//for (size_t i=0; i<GetSize(); i++) {
// m_xmlCoverageMap.push_back(false);
@ -374,12 +365,10 @@ CreateFromString(vector<FactorType> const& FOrder, string const& phraseString)
}
Sentence::
Sentence(AllOptions::ptr const& opts, size_t const transId,
string stext, vector<FactorType> const* IFO)
Sentence(AllOptions::ptr const& opts, size_t const transId, string stext)
: InputType(opts, transId)
{
if (IFO) init(opts,stext, *IFO);
else init(opts, stext, opts->input.factor_order);
init(stext);
}
}

View File

@ -64,8 +64,8 @@ protected:
public:
Sentence(AllOptions::ptr const& opts);
Sentence(AllOptions::ptr const& opts, size_t const transId, std::string stext,
std::vector<FactorType> const* IFO = NULL);
Sentence(AllOptions::ptr const& opts, size_t const transId, std::string stext);
// std::vector<FactorType> const* IFO = NULL);
// Sentence(size_t const transId, std::string const& stext);
~Sentence();
@ -94,11 +94,11 @@ public:
//! populates vector argument with XML force translation options for the specific range passed
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list) const;
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions(AllOptions const& opts) const;
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
virtual int
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
Read(std::istream& in);
// , const std::vector<FactorType>& factorOrder, AllOptions const& opts);
void Print(std::ostream& out) const;
@ -115,9 +115,7 @@ public:
}
void
init(AllOptions::ptr const& opts, std::string line,
std::vector<FactorType> const& factorOrder);
void init(std::string line);
std::vector<std::map<std::string,std::string> > const&
GetDltMeta() const {
@ -139,7 +137,7 @@ private:
void
aux_interpret_xml
(AllOptions const& opts, std::string& line, std::vector<size_t> & xmlWalls,
(std::string& line, std::vector<size_t> & xmlWalls,
std::vector<std::pair<size_t, std::string> >& placeholders);
void

View File

@ -219,6 +219,8 @@ bool StaticData::LoadData(Parameter *parameter)
const PARAM_VEC *params;
m_options.init(*parameter);
if (is_syntax(m_options.search.algo))
m_options.syntax.LoadNonTerminals(*parameter, FactorCollection::Instance());
if (IsSyntax())
LoadChartDecodingParameters();

View File

@ -337,33 +337,10 @@ public:
return m_includeLHSInSearchGraph;
}
std::pair<std::string,std::string> GetXmlBrackets() const {
return m_xmlBrackets;
}
// bool PrintTranslationOptions() const {
// return m_printTranslationOptions;
// }
// bool PrintAllDerivations() const {
// return m_printAllDerivations;
// }
const UnknownLHSList &GetUnknownLHS() const {
return m_unknownLHS;
}
const Word &GetInputDefaultNonTerminal() const {
return m_inputDefaultNonTerminal;
}
const Word &GetOutputDefaultNonTerminal() const {
return m_outputDefaultNonTerminal;
}
SourceLabelOverlap GetSourceLabelOverlap() const {
return m_sourceLabelOverlap;
}
size_t GetRuleLimit() const {
return m_ruleLimit;
}

View File

@ -3,9 +3,8 @@
#include <sstream>
#include "moses/FF/UnknownWordPenaltyProducer.h"
#include "moses/StaticData.h"
#include "util/string_stream.hh"
#include "moses/parameters/AllOptions.h"
namespace Moses
{
namespace Syntax
@ -14,13 +13,13 @@ namespace F2S
{
GlueRuleSynthesizer::
GlueRuleSynthesizer(HyperTree &trie, const std::vector<FactorType> &iFactors)
: m_hyperTree(trie)
GlueRuleSynthesizer(Moses::AllOptions const& opts, HyperTree &trie)
: m_input_default_nonterminal(opts.syntax.input_default_non_terminal)
, m_output_default_nonterminal(opts.syntax.output_default_non_terminal)
, m_hyperTree(trie)
{
// const std::vector<FactorType> &inputFactorOrder =
// StaticData::Instance().GetInputFactorOrder();
Word *lhs = NULL;
m_dummySourcePhrase.CreateFromString(Input, iFactors, "hello", &lhs);
m_dummySourcePhrase.CreateFromString(Input, opts.input.factor_order, "hello", &lhs);
delete lhs;
}
@ -47,11 +46,10 @@ void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e,
}
}
TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
const Forest::Hyperedge &e)
TargetPhrase*
GlueRuleSynthesizer::
SynthesizeTargetPhrase(const Forest::Hyperedge &e)
{
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
UnknownWordPenaltyProducer::Instance();
@ -61,7 +59,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
for (std::size_t i = 0; i < e.tail.size(); ++i) {
const Word &symbol = e.tail[i]->pvertex.symbol;
if (symbol.IsNonTerminal()) {
targetPhrase->AddWord(staticData.GetOutputDefaultNonTerminal());
targetPhrase->AddWord(m_output_default_nonterminal);
} else {
// TODO Check this
Word &targetWord = targetPhrase->AddWord();
@ -75,7 +73,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
float score = LOWEST_SCORE;
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score);
targetPhrase->EvaluateInIsolation(m_dummySourcePhrase);
Word *targetLhs = new Word(staticData.GetOutputDefaultNonTerminal());
Word *targetLhs = new Word(m_output_default_nonterminal);
targetPhrase->SetTargetLHS(targetLhs);
targetPhrase->SetAlignmentInfo(alignmentSS.str());

View File

@ -9,6 +9,7 @@
namespace Moses
{
class AllOptions;
namespace Syntax
{
namespace F2S
@ -16,9 +17,11 @@ namespace F2S
class GlueRuleSynthesizer : public HyperTreeCreator
{
Word m_input_default_nonterminal;
Word m_output_default_nonterminal;
public:
GlueRuleSynthesizer(HyperTree &, std::vector<FactorType> const& iFactors);
GlueRuleSynthesizer(Moses::AllOptions const& opts, HyperTree &);
// Synthesize the minimal, monotone rule that can be applied to the given
// hyperedge and add it to the rule trie.
void SynthesizeRule(const Forest::Hyperedge &);

View File

@ -74,8 +74,7 @@ void Manager<RuleMatcher>::Decode()
RuleMatcherCallback callback(m_stackMap, ruleLimit);
// Create a glue rule synthesizer.
GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie,
options()->input.factor_order);
GlueRuleSynthesizer glueRuleSynthesizer(*options(), *m_glueRuleTrie);
// Sort the input forest's vertices into bottom-up topological order.
std::vector<const Forest::Vertex *> sortedVertices;

View File

@ -3,7 +3,7 @@
#include <sstream>
#include "moses/FF/UnknownWordPenaltyProducer.h"
#include "moses/StaticData.h"
#include <boost/scoped_ptr.hpp>
namespace Moses
{
@ -12,7 +12,9 @@ namespace Syntax
namespace T2S
{
void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node)
void
GlueRuleSynthesizer::
SynthesizeRule(const InputTree::Node &node)
{
const Word &sourceLhs = node.pvertex.symbol;
boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node));
@ -22,7 +24,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node)
tpc->Add(tp);
}
Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node)
Phrase*
GlueRuleSynthesizer::
SynthesizeSourcePhrase(const InputTree::Node &node)
{
Phrase *phrase = new Phrase(node.children.size());
for (std::vector<InputTree::Node*>::const_iterator p = node.children.begin();
@ -37,11 +41,10 @@ Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node)
return phrase;
}
TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
const InputTree::Node &node, const Phrase &sourceRhs)
TargetPhrase*
GlueRuleSynthesizer::
SynthesizeTargetPhrase(const InputTree::Node &node, const Phrase &sourceRhs)
{
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
UnknownWordPenaltyProducer::Instance();
@ -51,7 +54,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
for (std::size_t i = 0; i < node.children.size(); ++i) {
const Word &symbol = node.children[i]->pvertex.symbol;
if (symbol.IsNonTerminal()) {
targetPhrase->AddWord(staticData.GetOutputDefaultNonTerminal());
targetPhrase->AddWord(m_output_default_nonterminal);
} else {
// TODO Check this
Word &targetWord = targetPhrase->AddWord();
@ -65,7 +68,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase(
float score = LOWEST_SCORE;
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score);
targetPhrase->EvaluateInIsolation(sourceRhs);
Word *targetLhs = new Word(staticData.GetOutputDefaultNonTerminal());
Word *targetLhs = new Word(m_output_default_nonterminal);
targetPhrase->SetTargetLHS(targetLhs);
targetPhrase->SetAlignmentInfo(alignmentSS.str());

View File

@ -16,9 +16,13 @@ namespace T2S
class GlueRuleSynthesizer : public RuleTrieCreator
{
Word m_output_default_nonterminal;
public:
GlueRuleSynthesizer(RuleTrie &trie) : m_ruleTrie(trie) {}
GlueRuleSynthesizer(RuleTrie &trie, Word dflt_nonterm)
: m_ruleTrie(trie)
, m_output_default_nonterminal(dflt_nonterm)
{}
// Synthesize the minimal, montone rule that can be applied to the given node
// and add it to the rule trie.
void SynthesizeRule(const InputTree::Node &);

View File

@ -111,7 +111,8 @@ void Manager<RuleMatcher>::Decode()
F2S::RuleMatcherCallback callback(m_stackMap, ruleLimit);
// Create a glue rule synthesizer.
GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie);
Word dflt_nonterm = options()->syntax.output_default_non_terminal;
GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie, dflt_nonterm);
// Visit each node of the input tree in post-order.
for (std::vector<InputTree::Node>::const_iterator p =

View File

@ -47,9 +47,7 @@ void TabbedSentence::CreateFromString(const std::vector<FactorType> &factorOrder
int
TabbedSentence::
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
Read(std::istream& in)
{
TabbedColumns allColumns;
@ -60,17 +58,14 @@ Read(std::istream& in,
boost::split(allColumns, line, boost::is_any_of("\t"));
if(allColumns.size() < 2) {
std::stringstream dummyStream;
dummyStream << line << std::endl;
return Sentence::Read(dummyStream, factorOrder, opts);
Sentence::init(line);
} else {
m_columns.resize(allColumns.size() - 1);
std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
std::stringstream dummyStream;
dummyStream << allColumns[0] << std::endl;
return Sentence::Read(dummyStream, factorOrder, opts);
Sentence::init(allColumns[0]);
}
return 1;
}
}

View File

@ -68,8 +68,7 @@ public:
, const std::string &tabbedString);
virtual int
Read(std::istream& in,const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
Read(std::istream& in);
const TabbedColumns& GetColumns() const {
return m_columns;

View File

@ -51,6 +51,7 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
size_t sourceSize = parser.GetSize();
m_expandableDottedRuleListVec.resize(sourceSize);
m_input_default_nonterminal = parser.options()->syntax.input_default_non_terminal;
for (size_t ind = 0; ind < m_expandableDottedRuleListVec.size(); ++ind) {
DottedRuleOnDisk *initDottedRule = new DottedRuleOnDisk(m_dbWrapper.GetRootSourceNode());
@ -81,7 +82,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
ChartParserCallback &outColl)
{
const StaticData &staticData = StaticData::Instance();
const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal();
// const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal();
const Range &range = inputPath.GetWordsRange();
size_t relEndPos = range.GetEndPos() - range.GetStartPos();
@ -178,7 +179,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
if (m_dictionary.m_maxSpanDefault != NOT_FOUND) {
// for Hieu's source syntax
bool isSourceSyntaxNonTerm = sourceLHS != defaultSourceNonTerm;
bool isSourceSyntaxNonTerm = sourceLHS != m_input_default_nonterminal; // defaultSourceNonTerm;
size_t nonTermNumWordsCovered = endPos - startPos + 1;
doSearch = isSourceSyntaxNonTerm ?

View File

@ -57,6 +57,7 @@ private:
std::vector<DottedRuleStackOnDisk*> m_expandableDottedRuleListVec;
std::map<uint64_t, TargetPhraseCollection::shared_ptr > m_cache;
std::list<const OnDiskPt::PhraseNode*> m_sourcePhraseNode;
Word m_input_default_nonterminal;
};
} // namespace Moses

View File

@ -240,25 +240,20 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line,
//! populate this InputType with data from in stream
int
TreeInput::
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
AllOptions const& opts)
Read(std::istream& in)
{
const StaticData &staticData = StaticData::Instance();
string line;
if (getline(in, line, '\n').eof())
return 0;
// remove extra spaces
//line = Trim(line);
m_labelledSpans.clear();
ProcessAndStripXMLTags(opts, line, m_labelledSpans, m_xmlOptions);
ProcessAndStripXMLTags(*m_options, line, m_labelledSpans, m_xmlOptions);
// do words 1st - hack
stringstream strme;
strme << line << endl;
Sentence::Read(strme, factorOrder, opts);
Sentence::Read(strme);
// size input chart
size_t sourceSize = GetSize();
@ -270,19 +265,21 @@ Read(std::istream& in, const std::vector<FactorType>& factorOrder,
// do source labels
vector<XMLParseOutput>::const_iterator iterLabel;
for (iterLabel = m_labelledSpans.begin(); iterLabel != m_labelledSpans.end(); ++iterLabel) {
for (iterLabel = m_labelledSpans.begin();
iterLabel != m_labelledSpans.end(); ++iterLabel) {
const XMLParseOutput &labelItem = *iterLabel;
const Range &range = labelItem.m_range;
const string &label = labelItem.m_label;
AddChartLabel(range.GetStartPos() + 1, range.GetEndPos() + 1, label, factorOrder);
AddChartLabel(range.GetStartPos() + 1, range.GetEndPos() + 1, label);
}
// default label
bool only4empty = m_options->syntax.default_non_term_only_for_empty_range;
for (size_t startPos = 0; startPos < sourceSize; ++startPos) {
for (size_t endPos = startPos; endPos < sourceSize; ++endPos) {
NonTerminalSet &list = GetLabelSet(startPos, endPos);
if (list.size() == 0 || !staticData.GetDefaultNonTermOnlyForEmptyRange()) {
AddChartLabel(startPos, endPos, staticData.GetInputDefaultNonTerminal(), factorOrder);
if (list.size() == 0 || ! only4empty ) {
AddChartLabel(startPos, endPos, m_options->syntax.input_default_non_terminal);
}
}
}
@ -303,13 +300,13 @@ TranslationOptionCollection* TreeInput::CreateTranslationOptionCollection() cons
return NULL;
}
void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const Word &label
, const std::vector<FactorType>& /* factorOrder */)
void
TreeInput::
AddChartLabel(size_t startPos, size_t endPos, const Word &label)
{
UTIL_THROW_IF2(!label.IsNonTerminal(),
"Label must be a non-terminal");
SourceLabelOverlap overlapType = StaticData::Instance().GetSourceLabelOverlap();
SourceLabelOverlap overlapType = m_options->syntax.source_label_overlap;
NonTerminalSet &list = GetLabelSet(startPos, endPos);
switch (overlapType) {
case SourceLabelOverlapAdd:
@ -327,14 +324,17 @@ void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const Word &label
}
}
void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const string &label
, const std::vector<FactorType>& factorOrder)
void
TreeInput::
AddChartLabel(size_t startPos, size_t endPos, const string &label)
{
const std::vector<FactorType>& fOrder = m_options->input.factor_order;
Word word(true);
const Factor *factor = FactorCollection::Instance().AddFactor(Input, factorOrder[0], label, true); // TODO - no factors
const Factor *factor
= FactorCollection::Instance().AddFactor(Input, fOrder[0], label, true);
// TODO - no factors
word.SetFactor(0, factor);
AddChartLabel(startPos, endPos, word, factorOrder);
AddChartLabel(startPos, endPos, word);
}
std::ostream& operator<<(std::ostream &out, const TreeInput &input)

View File

@ -35,10 +35,9 @@ protected:
std::vector<std::vector<NonTerminalSet> > m_sourceChart;
std::vector<XMLParseOutput> m_labelledSpans;
void AddChartLabel(size_t startPos, size_t endPos, const std::string &label
,const std::vector<FactorType>& factorOrder);
void AddChartLabel(size_t startPos, size_t endPos, const Word &label
,const std::vector<FactorType>& factorOrder);
void AddChartLabel(size_t startPos, size_t endPos, const std::string &label);
void AddChartLabel(size_t startPos, size_t endPos, const Word &label);
NonTerminalSet &GetLabelSet(size_t startPos, size_t endPos) {
return m_sourceChart[startPos][endPos - startPos];
}
@ -56,9 +55,7 @@ public:
//! populate this InputType with data from in stream
virtual int
Read(std::istream& in,
const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
Read(std::istream& in);
//! Output debugging info to stream out
virtual void Print(std::ostream&) const;

View File

@ -52,18 +52,15 @@ void WordLattice::Print(std::ostream& out) const
int
WordLattice::
InitializeFromPCNDataType
(const PCN::CN& cn, size_t const maxPhraseLength,
const std::vector<FactorType>& factorOrder,
const std::string& debug_line)
InitializeFromPCNDataType(const PCN::CN& cn, const std::string& debug_line)
{
// const StaticData &staticData = StaticData::Instance();
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
size_t const maxPhraseLength = m_options->search.max_phrase_length;
const InputFeature *inputFeature = InputFeature::InstancePtr();
size_t numInputScores = inputFeature->GetNumInputScores();
size_t numRealWordCount = inputFeature->GetNumRealWordsInInput();
// size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
bool addRealWordCount = (numRealWordCount > 0);
//when we have one more weight than params, we add a word count feature
@ -150,9 +147,7 @@ InitializeFromPCNDataType
int
WordLattice::
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
Read(std::istream& in)
{
Clear();
std::string line;
@ -163,8 +158,7 @@ Read(std::istream& in,
}
PCN::CN cn = PCN::parsePCN(line);
return InitializeFromPCNDataType(cn, opts.search.max_phrase_length,
factorOrder, line);
return InitializeFromPCNDataType(cn, line);
}
void WordLattice::GetAsEdgeMatrix(std::vector<std::vector<bool> >& edges) const
@ -228,17 +222,10 @@ TranslationOptionCollection*
WordLattice
::CreateTranslationOptionCollection(ttasksptr const& ttask) const
{
// size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
// float translationOptionThreshold = StaticData::Instance().GetTranslationOptionThreshold();
size_t maxNoTransOptPerCoverage = ttask->options()->search.max_trans_opt_per_cov;
// StaticData::Instance().GetMaxNoTransOptPerCoverage();
size_t maxNoTransOptPerCoverage = ttask->options()->search.max_trans_opt_per_cov;
float translationOptionThreshold = ttask->options()->search.trans_opt_threshold;
// StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv = NULL;
//rv = new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
if (StaticData::Instance().GetUseLegacyPT()) {
rv = new TranslationOptionCollectionConfusionNet(ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold);

View File

@ -40,14 +40,11 @@ public:
/** Given a lattice represented using the PCN::CN data type (topologically sorted agency list
* representation), initialize the WordLattice object
*/
int InitializeFromPCNDataType(const PCN::CN& cn, size_t const maxPhraseLength,
const std::vector<FactorType>& factorOrder,
const std::string& debug_line = "");
int InitializeFromPCNDataType(const PCN::CN& cn, const std::string& debug_line = "");
/** Read from PLF format (1 lattice per line)
*/
int Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts);
int Read(std::istream& in);
/** Convert internal representation into an edge matrix
* @note edges[1][2] means there is an edge from 1 to 2

View File

@ -159,16 +159,19 @@ vector<string> TokenizeXml(const string& str, const std::string& lbrackStr, cons
* \param rbrackStr xml tag's right bracket string, typically ">"
*/
bool
ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption const*> &res,
ProcessAndStripXMLTags(AllOptions const& opts, string &line,
vector<XmlOption const*> &res,
ReorderingConstraint &reorderingConstraint,
vector< size_t > &walls,
std::vector< std::pair<size_t, std::string> > &placeholders,
int offset, const std::string& lbrackStr,
const std::string& rbrackStr)
std::vector< std::pair<size_t, std::string> > &placeholders)
{
//parse XML markup in translation line
const StaticData &staticData = StaticData::Instance();
const std::string& lbrackStr = opts.input.xml_brackets.first;
const std::string& rbrackStr = opts.input.xml_brackets.second;
int offset = is_syntax(opts.search.algo) ? 1 : 0;
// const StaticData &staticData = StaticData::Instance();
// hack. What pt should XML trans opt be assigned to?
PhraseDictionary *firstPt = NULL;
@ -177,7 +180,6 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption co
}
// no xml tag? we're done.
//if (line.find_first_of('<') == string::npos) {
if (line.find(lbrackStr) == string::npos) {
return true;
}
@ -195,7 +197,6 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption co
size_t wordPos = 0; // position in sentence (in terms of number of words)
const vector<FactorType> &outputFactorOrder = opts.output.factor_order;
// const string &factorDelimiter = staticData.GetFactorDelimiter();
// loop through the tokens
for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) {
@ -459,7 +460,7 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption co
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i], NULL);
// lhs
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
const UnknownLHSList &lhsList = opts.syntax.unknown_lhs; // staticData.GetUnknownLHS();
if (!lhsList.empty()) {
const Factor *factor = FactorCollection::Instance().AddFactor(lhsList[0].first, true);
Word *targetLHS = new Word(true);

View File

@ -32,10 +32,10 @@ std::vector<std::string> TokenizeXml(const std::string& str, const std::string&
bool ProcessAndStripXMLTags(AllOptions const& opts,
std::string &line, std::vector<XmlOption const*> &res,
ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls,
std::vector< std::pair<size_t, std::string> > &placeholders,
int offset,
const std::string& lbrackStr="<", const std::string& rbrackStr=">");
ReorderingConstraint &reorderingConstraint,
std::vector< size_t > &walls,
std::vector< std::pair<size_t, std::string> > &placeholders);
}

View File

@ -327,7 +327,7 @@ run_chart_decoder()
{
Moses::TreeInput tinput(m_options);
istringstream buf(m_source_string + "\n");
tinput.Read(buf, options()->input.factor_order, *m_options);
tinput.Read(buf);
Moses::ChartManager manager(this->self());
manager.Decode();