Merge branch 'mmt-dev'

This commit is contained in:
Ulrich Germann 2015-10-31 13:36:40 +00:00
commit ff1977c29e
81 changed files with 1122 additions and 969 deletions

View File

@ -1,5 +1,5 @@
#BUILDING MOSES #BUILDING MOSES
#
#PACKAGES #PACKAGES
#Language models (optional): #Language models (optional):
#--with-irstlm=/path/to/irstlm #--with-irstlm=/path/to/irstlm
@ -245,7 +245,7 @@ if [ option.get "with-mm" : : "yes" ]
moses/TranslationModel/UG//ptable-describe-features moses/TranslationModel/UG//ptable-describe-features
moses/TranslationModel/UG//count-ptable-features moses/TranslationModel/UG//count-ptable-features
moses/TranslationModel/UG//ptable-lookup moses/TranslationModel/UG//ptable-lookup
# moses/TranslationModel/UG//spe-check-coverage moses/TranslationModel/UG//check-coverage
moses/TranslationModel/UG/mm//mtt-demo1 moses/TranslationModel/UG/mm//mtt-demo1
moses/TranslationModel/UG/mm//mtt-build moses/TranslationModel/UG/mm//mtt-build
moses/TranslationModel/UG/mm//mtt-dump moses/TranslationModel/UG/mm//mtt-dump
@ -256,6 +256,7 @@ if [ option.get "with-mm" : : "yes" ]
moses/TranslationModel/UG/mm//mmlex-lookup moses/TranslationModel/UG/mm//mmlex-lookup
moses/TranslationModel/UG/mm//mtt-count-words moses/TranslationModel/UG/mm//mtt-count-words
moses/TranslationModel/UG/mm//calc-coverage moses/TranslationModel/UG/mm//calc-coverage
moses/TranslationModel/UG//check-coverage
moses/TranslationModel/UG//try-align moses/TranslationModel/UG//try-align
; ;
} }

View File

@ -257,9 +257,9 @@ public:
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
//Make sure alternative paths are retained, if necessary //Make sure alternative paths are retained, if necessary
if (addGraphInfo || nbest_size>0) { // if (addGraphInfo || nbest_size>0) {
(const_cast<StaticData&>(staticData)).SetOutputSearchGraph(true); // (const_cast<StaticData&>(staticData)).SetOutputSearchGraph(true);
} // }
stringstream out, graphInfo, transCollOpts; stringstream out, graphInfo, transCollOpts;
@ -269,7 +269,7 @@ public:
boost::shared_ptr<TreeInput> tinput(new TreeInput); boost::shared_ptr<TreeInput> tinput(new TreeInput);
const vector<FactorType>& IFO = staticData.GetInputFactorOrder(); const vector<FactorType>& IFO = staticData.GetInputFactorOrder();
istringstream in(source + "\n"); istringstream in(source + "\n");
tinput->Read(in,IFO); tinput->Read(in,IFO,staticData.options());
ttasksptr task = Moses::TranslationTask::create(tinput); ttasksptr task = Moses::TranslationTask::create(tinput);
ChartManager manager(task); ChartManager manager(task);
manager.Decode(); manager.Decode();
@ -285,7 +285,8 @@ public:
else else
{ {
// size_t lineNumber = 0; // TODO: Include sentence request number here? // size_t lineNumber = 0; // TODO: Include sentence request number here?
boost::shared_ptr<Sentence> sentence(new Sentence(0,source)); boost::shared_ptr<Sentence> sentence;
sentence.reset(new Sentence(0,source,staticData.options()));
ttasksptr task = Moses::TranslationTask::create(sentence); ttasksptr task = Moses::TranslationTask::create(sentence);
Manager manager(task); Manager manager(task);
manager.Decode(); manager.Decode();
@ -320,7 +321,7 @@ public:
outputNBest(manager, m_retData, nbest_size, nbest_distinct, outputNBest(manager, m_retData, nbest_size, nbest_distinct,
reportAllFactors, addAlignInfo, addScoreBreakdown); reportAllFactors, addAlignInfo, addScoreBreakdown);
} }
(const_cast<StaticData&>(staticData)).SetOutputSearchGraph(false); // (const_cast<StaticData&>(staticData)).SetOutputSearchGraph(false);
} }
m_retData["text"] = value_string(out.str()); m_retData["text"] = value_string(out.str());
XVERBOSE(1,"Output: " << out.str() << endl); XVERBOSE(1,"Output: " << out.str() << endl);
@ -479,7 +480,9 @@ public:
{ {
// should the score breakdown be reported in a more structured manner? // should the score breakdown be reported in a more structured manner?
ostringstream buf; ostringstream buf;
path.GetScoreBreakdown()->OutputAllFeatureScores(buf); bool with_labels
= StaticData::Instance().options().nbest.include_feature_labels;
path.GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels);
nBestXMLItem["fvals"] = xmlrpc_c::value_string(buf.str()); nBestXMLItem["fvals"] = xmlrpc_c::value_string(buf.str());
} }

View File

@ -202,8 +202,9 @@ int main(int argc, char* argv[])
<< " ||| "; << " ||| ";
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList); vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
manager.OutputBestHypo(mbrBestHypo, lineCount, manager.OutputBestHypo(mbrBestHypo, lineCount,
SD.GetReportSegmentation(), manager.options().output.ReportSegmentation,
SD.GetReportAllFactors(),cout); manager.options().output.ReportAllFactors,
cout);
} }
} }
} }

View File

@ -106,7 +106,9 @@ std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
} }
bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,size_t> *b) bool
compare_target(std::pair<size_t,size_t> const* a,
std::pair<size_t,size_t> const* b)
{ {
if(a->second < b->second) return true; if(a->second < b->second) return true;
if(a->second == b->second) return (a->first < b->first); if(a->second == b->second) return (a->first < b->first);
@ -114,29 +116,29 @@ bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,si
} }
std::vector< const std::pair<size_t,size_t>* > AlignmentInfo::GetSortedAlignments() const std::vector< const std::pair<size_t,size_t>* >
AlignmentInfo::
GetSortedAlignments(WordAlignmentSort SortOrder) const
{ {
std::vector< const std::pair<size_t,size_t>* > ret; std::vector< const std::pair<size_t,size_t>* > ret;
CollType::const_iterator iter; CollType::const_iterator iter;
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) { for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
const std::pair<size_t,size_t> &alignPair = *iter; const std::pair<size_t,size_t> &alignPair = *iter;
ret.push_back(&alignPair); ret.push_back(&alignPair);
} }
const StaticData &staticData = StaticData::Instance(); switch (SortOrder) {
WordAlignmentSort wordAlignmentSort = staticData.GetWordAlignmentSort();
switch (wordAlignmentSort) {
case NoSort: case NoSort:
break; break;
case TargetOrder: case TargetOrder:
std::sort(ret.begin(), ret.end(), compare_target); std::sort(ret.begin(), ret.end(), compare_target);
break; break;
default: default:
UTIL_THROW(util::Exception, "Unknown alignment sort option: " << wordAlignmentSort); UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
<< SortOrder);
} }
return ret; return ret;

View File

@ -26,7 +26,7 @@
#include <cstdlib> #include <cstdlib>
#include <boost/functional/hash.hpp> #include <boost/functional/hash.hpp>
#include "TypeDef.h"
namespace Moses namespace Moses
{ {
@ -83,7 +83,8 @@ public:
return m_collection.size(); return m_collection.size();
} }
std::vector< const std::pair<size_t,size_t>* > GetSortedAlignments() const; std::vector< const std::pair<size_t,size_t>* >
GetSortedAlignments(WordAlignmentSort SortOrder) const;
std::vector<size_t> GetSourceIndex2PosMap() const; std::vector<size_t> GetSourceIndex2PosMap() const;

View File

@ -27,7 +27,6 @@
#include "RuleCube.h" #include "RuleCube.h"
#include "Range.h" #include "Range.h"
#include "Util.h" #include "Util.h"
#include "StaticData.h"
#include "ChartTranslationOptions.h" #include "ChartTranslationOptions.h"
#include "ChartTranslationOptionList.h" #include "ChartTranslationOptionList.h"
#include "ChartManager.h" #include "ChartManager.h"
@ -52,8 +51,7 @@ ChartCellBase::~ChartCellBase() {}
ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) : ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
ChartCellBase(startPos, endPos), m_manager(manager) ChartCellBase(startPos, endPos), m_manager(manager)
{ {
const StaticData &staticData = StaticData::Instance(); m_nBestIsEnabled = manager.options().nbest.enabled;
m_nBestIsEnabled = staticData.options().nbest.enabled;
} }
ChartCell::~ChartCell() {} ChartCell::~ChartCell() {}
@ -66,7 +64,14 @@ ChartCell::~ChartCell() {}
bool ChartCell::AddHypothesis(ChartHypothesis *hypo) bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
{ {
const Word &targetLHS = hypo->GetTargetLHS(); const Word &targetLHS = hypo->GetTargetLHS();
return m_hypoColl[targetLHS].AddHypothesis(hypo, m_manager); MapType::iterator m = m_hypoColl.find(targetLHS);
if (m == m_hypoColl.end())
{
std::pair<Word, ChartHypothesisCollection>
e(targetLHS, ChartHypothesisCollection(m_manager.options()));
m = m_hypoColl.insert(e).first;
}
return m->second.AddHypothesis(hypo, m_manager);
} }
/** Prune each collection in this cell to a particular size */ /** Prune each collection in this cell to a particular size */
@ -87,8 +92,6 @@ void ChartCell::PruneToSize()
void ChartCell::Decode(const ChartTranslationOptionList &transOptList void ChartCell::Decode(const ChartTranslationOptionList &transOptList
, const ChartCellCollection &allChartCells) , const ChartCellCollection &allChartCells)
{ {
const StaticData &staticData = StaticData::Instance();
// priority queue for applicable rules with selected hypotheses // priority queue for applicable rules with selected hypotheses
RuleCubeQueue queue(m_manager); RuleCubeQueue queue(m_manager);
@ -100,7 +103,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
} }
// pluck things out of queue and add to hypo collection // pluck things out of queue and add to hypo collection
const size_t popLimit = staticData.options().cube.pop_limit; const size_t popLimit = m_manager.options().cube.pop_limit;
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) { for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
ChartHypothesis *hypo = queue.Pop(); ChartHypothesis *hypo = queue.Pop();
AddHypothesis(hypo); AddHypothesis(hypo);

View File

@ -256,12 +256,13 @@ void ChartHypothesis::CleanupArcList()
* However, may not be enough if only unique candidates are needed, * However, may not be enough if only unique candidates are needed,
* so we'll keep all of arc list if nedd distinct n-best list * so we'll keep all of arc list if nedd distinct n-best list
*/ */
AllOptions const& opts = StaticData::Instance().options();
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.options().nbest.nbest_size; size_t nBestSize = opts.nbest.nbest_size;
bool distinctNBest = (staticData.options().nbest.only_distinct bool distinctNBest = (opts.nbest.only_distinct
|| staticData.options().mbr.enabled || opts.mbr.enabled
|| staticData.GetOutputSearchGraph() || opts.output.NeedSearchGraph()
|| staticData.GetOutputSearchGraphHypergraph()); || !opts.output.SearchGraphHG.empty());
if (!distinctNBest && m_arcList->size() > nBestSize) { if (!distinctNBest && m_arcList->size() > nBestSize) {
// prune arc list only if there too many arcs // prune arc list only if there too many arcs

View File

@ -26,6 +26,7 @@
#include "ChartManager.h" #include "ChartManager.h"
#include "HypergraphOutput.h" #include "HypergraphOutput.h"
#include "util/exception.hh" #include "util/exception.hh"
#include "parameters/AllOptions.h"
using namespace std; using namespace std;
using namespace Moses; using namespace Moses;
@ -33,13 +34,13 @@ using namespace Moses;
namespace Moses namespace Moses
{ {
ChartHypothesisCollection::ChartHypothesisCollection() ChartHypothesisCollection::ChartHypothesisCollection(AllOptions const& opts)
{ {
const StaticData &staticData = StaticData::Instance(); // const StaticData &staticData = StaticData::Instance();
m_beamWidth = staticData.GetBeamWidth(); m_beamWidth = opts.search.beam_width; // staticData.GetBeamWidth();
m_maxHypoStackSize = staticData.options().search.stack_size; m_maxHypoStackSize = opts.search.stack_size; // staticData.options().search.stack_size;
m_nBestIsEnabled = staticData.options().nbest.enabled; m_nBestIsEnabled = opts.nbest.enabled; // staticData.options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity(); m_bestScore = -std::numeric_limits<float>::infinity();
} }

View File

@ -29,6 +29,7 @@ namespace Moses
{ {
class ChartSearchGraphWriter; class ChartSearchGraphWriter;
class AllOptions;
//! functor to compare (chart) hypotheses by (descending) score //! functor to compare (chart) hypotheses by (descending) score
class ChartHypothesisScoreOrderer class ChartHypothesisScoreOrderer
@ -70,7 +71,7 @@ public:
return m_hypos.end(); return m_hypos.end();
} }
ChartHypothesisCollection(); ChartHypothesisCollection(AllOptions const& opts);
~ChartHypothesisCollection(); ~ChartHypothesisCollection();
bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager); bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager);

View File

@ -371,7 +371,8 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
OutputSurface(out, outputPhrase, outputFactorOrder, false); OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| "; out << " ||| ";
boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation); boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation);
scoreBreakdown->OutputAllFeatureScores(out); bool with_labels = options().nbest.include_feature_labels;
scoreBreakdown->OutputAllFeatureScores(out, with_labels);
out << " ||| " << derivation.score; out << " ||| " << derivation.score;
// optionally, print word alignments // optionally, print word alignments
@ -618,7 +619,7 @@ void ChartManager::OutputDetailedTranslationReport(
//DIMw //DIMw
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
if (staticData.IsDetailedAllTranslationReportingEnabled()) { if (options().output.detailed_all_transrep_filepath.size()) {
const Sentence &sentence = static_cast<const Sentence &>(m_source); const Sentence &sentence = static_cast<const Sentence &>(m_source);
size_t nBestSize = staticData.options().nbest.nbest_size; size_t nBestSize = staticData.options().nbest.nbest_size;
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList; std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
@ -835,11 +836,11 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe
Backtrack(hypo); Backtrack(hypo);
VERBOSE(3,"0" << std::endl); VERBOSE(3,"0" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) { if (options().output.ReportHypoScore) {
out << hypo->GetTotalScore() << " "; out << hypo->GetTotalScore() << " ";
} }
if (StaticData::Instance().IsPathRecoveryEnabled()) { if (options().output.RecoverPath) {
out << "||| "; out << "||| ";
} }
Phrase outPhrase(ARRAY_SIZE_INCR); Phrase outPhrase(ARRAY_SIZE_INCR);
@ -858,7 +859,7 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe
} else { } else {
VERBOSE(1, "NO BEST TRANSLATION" << endl); VERBOSE(1, "NO BEST TRANSLATION" << endl);
if (StaticData::Instance().GetOutputHypoScore()) { if (options().output.ReportHypoScore) {
out << "0 "; out << "0 ";
} }

View File

@ -107,8 +107,13 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
targetPhrase->SetAlignmentInfo("0-0"); targetPhrase->SetAlignmentInfo("0-0");
targetPhrase->EvaluateInIsolation(*unksrc); targetPhrase->EvaluateInIsolation(*unksrc);
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) { AllOptions const& opts = staticData.options();
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]"); if (!opts.output.detailed_tree_transrep_filepath.empty() ||
opts.nbest.print_trees || staticData.GetTreeStructure() != NULL) {
std::string prop = "[ ";
prop += (*targetLHS)[0]->GetString().as_string() + " ";
prop += sourceWord[0]->GetString().as_string() + " ]";
targetPhrase->SetProperty("Tree", prop);
} }
// chart rule // chart rule

View File

@ -110,29 +110,14 @@ ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
int int
ConfusionNet:: ConfusionNet::
Read(std::istream& in, Read(std::istream& in,
const std::vector<FactorType>& factorOrder) const std::vector<FactorType>& factorOrder,
AllOptions const& opts)
{ {
int rv=ReadF(in,factorOrder,0); int rv=ReadF(in,factorOrder,0);
if(rv) stats.collect(*this); if(rv) stats.collect(*this);
return rv; return rv;
} }
#if 0
// Deprecated due to code duplication;
// use Word::CreateFromString() instead
void
ConfusionNet::
String2Word(const std::string& s,Word& w,
const std::vector<FactorType>& factorOrder)
{
std::vector<std::string> factorStrVector = Tokenize(s, "|");
for(size_t i=0; i<factorOrder.size(); ++i)
w.SetFactor(factorOrder[i],
FactorCollection::Instance().AddFactor
(Input,factorOrder[i], factorStrVector[i]));
}
#endif
bool bool
ConfusionNet:: ConfusionNet::
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder) ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
@ -161,7 +146,8 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
for(size_t i=0; i < numInputScores; i++) { for(size_t i=0; i < numInputScores; i++) {
double prob; double prob;
if (!(is>>prob)) { if (!(is>>prob)) {
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n"); TRACE_ERR("ERROR: unable to parse CN input - bad link probability, "
<< "or wrong number of scores\n");
return false; return false;
} }
if(prob<0.0) { if(prob<0.0) {
@ -174,7 +160,8 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE)); probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
} }
//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon // store 'real' word count in last feature if we have one more
// weight than we do arc scores and not epsilon
if (addRealWordCount && word!=EPSILON && word!="") if (addRealWordCount && word!=EPSILON && word!="")
probs.back() = -1.0; probs.back() = -1.0;

View File

@ -67,7 +67,8 @@ public:
bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0); bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0);
virtual void Print(std::ostream&) const; virtual void Print(std::ostream&) const;
int Read(std::istream& in,const std::vector<FactorType>& factorOrder); int Read(std::istream& in,const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
Phrase GetSubString(const Range&) const; //TODO not defined Phrase GetSubString(const Range&) const; //TODO not defined
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined

View File

@ -100,12 +100,14 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
} }
} }
void DecodeStepTranslation::ProcessInitialTranslation( void
const InputType &source DecodeStepTranslation::
,PartialTranslOptColl &outputPartialTranslOptColl ProcessInitialTranslation(InputType const& source,
, size_t startPos, size_t endPos, bool adhereTableLimit PartialTranslOptColl &outputPartialTranslOptColl,
, const InputPath &inputPath size_t startPos, size_t endPos,
, TargetPhraseCollection::shared_ptr phraseColl) const bool adhereTableLimit,
InputPath const& inputPath,
TargetPhraseCollection::shared_ptr phraseColl) const
{ {
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature(); const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
const size_t tableLimit = phraseDictionary->GetTableLimit(); const size_t tableLimit = phraseDictionary->GetTableLimit();
@ -114,12 +116,13 @@ void DecodeStepTranslation::ProcessInitialTranslation(
if (phraseColl != NULL) { if (phraseColl != NULL) {
IFVERBOSE(3) { IFVERBOSE(3) {
if(StaticData::Instance().GetInputType() == SentenceInput) if(source.GetType() == SentenceInput)
TRACE_ERR("[" << source.GetSubString(range) << "; " << startPos << "-" << endPos << "]\n"); TRACE_ERR("[" << source.GetSubString(range) << "; "
<< startPos << "-" << endPos << "]\n");
else else
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl); TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
} }
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit; iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
@ -137,11 +140,13 @@ void DecodeStepTranslation::ProcessInitialTranslation(
} }
} }
void DecodeStepTranslation::ProcessInitialTranslationLEGACY( void
const InputType &source DecodeStepTranslation::
,PartialTranslOptColl &outputPartialTranslOptColl ProcessInitialTransLEGACY(InputType const& source,
, size_t startPos, size_t endPos, bool adhereTableLimit PartialTranslOptColl &outputPartialTranslOptColl,
, const InputPathList &inputPathList) const size_t startPos, size_t endPos,
bool adhereTableLimit,
InputPathList const& inputPathList) const
{ {
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature(); const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
const size_t tableLimit = phraseDictionary->GetTableLimit(); const size_t tableLimit = phraseDictionary->GetTableLimit();
@ -152,12 +157,13 @@ void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
if (phraseColl != NULL) { if (phraseColl != NULL) {
IFVERBOSE(3) { IFVERBOSE(3) {
if(StaticData::Instance().GetInputType() == SentenceInput) if(source.GetType() == SentenceInput)
TRACE_ERR("[" << source.GetSubString(range) << "; " << startPos << "-" << endPos << "]\n"); TRACE_ERR("[" << source.GetSubString(range) << "; "
<< startPos << "-" << endPos << "]\n");
else else
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl); TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
} }
const std::vector<Phrase> &sourcePhrases = phraseColl->GetSourcePhrases(); const std::vector<Phrase> &sourcePhrases = phraseColl->GetSourcePhrases();
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;

View File

@ -61,10 +61,13 @@ public:
, TargetPhraseCollection::shared_ptr phraseColl) const; , TargetPhraseCollection::shared_ptr phraseColl) const;
// legacy // legacy
void ProcessInitialTranslationLEGACY(const InputType &source void
, PartialTranslOptColl &outputPartialTranslOptColl ProcessInitialTransLEGACY(InputType const& source,
, size_t startPos, size_t endPos, bool adhereTableLimit PartialTranslOptColl &outputPartialTranslOptColl,
, const InputPathList &inputPathList) const; size_t startPos, size_t endPos,
bool adhereTableLimit,
InputPathList const& inputPathList) const;
void ProcessLEGACY(const TranslationOption &inputPartialTranslOpt void ProcessLEGACY(const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep , const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl , PartialTranslOptColl &outputPartialTranslOptColl

View File

@ -17,8 +17,10 @@ namespace Moses
{ {
//! populate this InputType with data from in stream //! populate this InputType with data from in stream
int ForestInput::Read(std::istream &in, int ForestInput::
const std::vector<FactorType>& factorOrder) Read(std::istream &in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
{ {
using Syntax::F2S::Forest; using Syntax::F2S::Forest;
@ -56,7 +58,7 @@ int ForestInput::Read(std::istream &in,
// not sure ForestInput needs to. // not sure ForestInput needs to.
std::stringstream strme; std::stringstream strme;
strme << "<s> " << sentence << " </s>" << std::endl; strme << "<s> " << sentence << " </s>" << std::endl;
Sentence::Read(strme, factorOrder); Sentence::Read(strme, factorOrder, opts);
// Find the maximum end position of any vertex (0 if forest is empty). // Find the maximum end position of any vertex (0 if forest is empty).
std::size_t maxEnd = FindMaxEnd(*m_forest); std::size_t maxEnd = FindMaxEnd(*m_forest);

View File

@ -28,7 +28,10 @@ public:
} }
//! populate this InputType with data from in stream //! populate this InputType with data from in stream
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder); virtual int
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts);
//! Output debugging info to stream out //! Output debugging info to stream out
virtual void Print(std::ostream&) const; virtual void Print(std::ostream&) const;

View File

@ -56,7 +56,7 @@ WriteHypos(const ChartHypothesisCollection& hypos,
ChartHypothesisCollection::const_iterator iter; ChartHypothesisCollection::const_iterator iter;
for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) { for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
ChartHypothesis &mainHypo = **iter; ChartHypothesis &mainHypo = **iter;
if (StaticData::Instance().GetUnprunedSearchGraph() || if (StaticData::Instance().options().output.DontPruneSearchGraph ||
reachable.find(mainHypo.GetId()) != reachable.end()) { reachable.find(mainHypo.GetId()) != reachable.end()) {
(*m_out) << m_lineNumber << " " << mainHypo << endl; (*m_out) << m_lineNumber << " " << mainHypo << endl;
} }
@ -90,7 +90,7 @@ WriteHypos(const ChartHypothesisCollection& hypos,
ChartHypothesisCollection::const_iterator iter; ChartHypothesisCollection::const_iterator iter;
for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) { for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
const ChartHypothesis* mainHypo = *iter; const ChartHypothesis* mainHypo = *iter;
if (!StaticData::Instance().GetUnprunedSearchGraph() && if (!StaticData::Instance().options().output.DontPruneSearchGraph &&
reachable.find(mainHypo->GetId()) == reachable.end()) { reachable.find(mainHypo->GetId()) == reachable.end()) {
//Ignore non reachable nodes //Ignore non reachable nodes
continue; continue;

View File

@ -195,9 +195,8 @@ EvaluateWhenApplied(float futureScore)
const StatefulFeatureFunction &ff = *ffs[i]; const StatefulFeatureFunction &ff = *ffs[i];
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored(ff)) { if (! staticData.IsFeatureFunctionIgnored(ff)) {
m_ffStates[i] = ff.EvaluateWhenApplied(*this, FFState const* s = m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL;
m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL, m_ffStates[i] = ff.EvaluateWhenApplied(*this, s, &m_currScoreBreakdown);
&m_currScoreBreakdown);
} }
} }
@ -276,15 +275,11 @@ CleanupArcList()
* However, may not be enough if only unique candidates are needed, * However, may not be enough if only unique candidates are needed,
* so we'll keep all of arc list if nedd distinct n-best list * so we'll keep all of arc list if nedd distinct n-best list
*/ */
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.options().nbest.nbest_size; AllOptions const& opts = m_manager.options();
bool distinctNBest = (m_manager.options().nbest.only_distinct || size_t nBestSize = opts.nbest.nbest_size;
staticData.GetLatticeSamplesSize() || bool distinctNBest = opts.NBestDistinct();
m_manager.options().mbr.enabled ||
staticData.GetOutputSearchGraph() ||
staticData.GetOutputSearchGraphSLF() ||
staticData.GetOutputSearchGraphHypergraph() ||
m_manager.options().lmbr.enabled);
if (!distinctNBest && m_arcList->size() > nBestSize * 5) { if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
// prune arc list only if there too many arcs // prune arc list only if there too many arcs
@ -292,9 +287,8 @@ CleanupArcList()
m_arcList->end(), CompareHypothesisTotalScore()); m_arcList->end(), CompareHypothesisTotalScore());
// delete bad ones // delete bad ones
ArcList::iterator iter; ArcList::iterator i = m_arcList->begin() + nBestSize;
for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter) while (i != m_arcList->end()) delete *i++;
delete *iter;
m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end()); m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end());
} }
@ -386,14 +380,16 @@ OutputAlignment(std::ostream &out) const
edges.push_back(currentHypo); edges.push_back(currentHypo);
currentHypo = currentHypo->GetPrevHypo(); currentHypo = currentHypo->GetPrevHypo();
} }
OutputAlignment(out, edges); OutputAlignment(out, edges, m_manager.options().output.WA_SortOrder);
} }
void void
Hypothesis:: Hypothesis::
OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges) OutputAlignment(ostream &out,
vector<const Hypothesis *> const& edges,
WordAlignmentSort waso)
{ {
size_t targetOffset = 0; size_t targetOffset = 0;
@ -402,7 +398,7 @@ OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
const TargetPhrase &tp = edge.GetCurrTargetPhrase(); const TargetPhrase &tp = edge.GetCurrTargetPhrase();
size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos(); size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset); OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset, waso);
targetOffset += tp.GetSize(); targetOffset += tp.GetSize();
} }
@ -412,15 +408,17 @@ OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
void void
Hypothesis:: Hypothesis::
OutputAlignment(ostream &out, const AlignmentInfo &ai, OutputAlignment(ostream &out, const AlignmentInfo &ai,
size_t sourceOffset, size_t targetOffset) size_t sourceOffset, size_t targetOffset,
WordAlignmentSort waso)
{ {
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec; typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
AlignVec alignments = ai.GetSortedAlignments(); AlignVec alignments = ai.GetSortedAlignments(waso);
AlignVec::const_iterator it; AlignVec::const_iterator it;
for (it = alignments.begin(); it != alignments.end(); ++it) { for (it = alignments.begin(); it != alignments.end(); ++it) {
const std::pair<size_t,size_t> &alignment = **it; const std::pair<size_t,size_t> &alignment = **it;
out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " "; out << alignment.first + sourceOffset << "-"
<< alignment.second + targetOffset << " ";
} }
} }
@ -526,15 +524,17 @@ OutputSurface(std::ostream &out, const Hypothesis &edge,
const int sourceEnd = sourceRange.GetEndPos(); const int sourceEnd = sourceRange.GetEndPos();
out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
if (reportSegmentation == 2) { if (reportSegmentation == 2) {
WordAlignmentSort waso = m_manager.options().output.WA_SortOrder;
out << ",wa="; out << ",wa=";
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
Hypothesis::OutputAlignment(out, ai, 0, 0); Hypothesis::OutputAlignment(out, ai, 0, 0, waso);
out << ",total="; out << ",total=";
out << edge.GetScore() - edge.GetPrevHypo()->GetScore(); out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
out << ","; out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
scoreBreakdown.OutputAllFeatureScores(out); bool with_labels = m_manager.options().nbest.include_feature_labels;
scoreBreakdown.OutputAllFeatureScores(out, with_labels);
} }
out << "| "; out << "| ";
} }
@ -608,9 +608,10 @@ OutputLocalWordAlignment(vector<xmlrpc_c::value>& dest) const
using namespace std; using namespace std;
Range const& src = this->GetCurrSourceWordsRange(); Range const& src = this->GetCurrSourceWordsRange();
Range const& trg = this->GetCurrTargetWordsRange(); Range const& trg = this->GetCurrTargetWordsRange();
WordAlignmentSort waso = m_manager.options().output.WA_SortOrder;
vector<pair<size_t,size_t> const* > a vector<pair<size_t,size_t> const* > a
= this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(); = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(waso);
typedef pair<size_t,size_t> item; typedef pair<size_t,size_t> item;
map<string, xmlrpc_c::value> M; map<string, xmlrpc_c::value> M;
BOOST_FOREACH(item const* p, a) { BOOST_FOREACH(item const* p, a) {

View File

@ -251,9 +251,18 @@ public:
return m_transOpt; return m_transOpt;
} }
void OutputAlignment(std::ostream &out) const; void
static void OutputAlignment(std::ostream &out, const std::vector<const Hypothesis *> &edges); OutputAlignment(std::ostream &out) const;
static void OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset);
static void
OutputAlignment(std::ostream &out,
const std::vector<const Hypothesis *> &edges,
WordAlignmentSort waso);
static void
OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai,
size_t sourceOffset, size_t targetOffset,
WordAlignmentSort waso);
void OutputInput(std::ostream& os) const; void OutputInput(std::ostream& os) const;
static void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo); static void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo);

View File

@ -36,7 +36,7 @@ namespace Moses
HypothesisStackNormal::HypothesisStackNormal(Manager& manager) : HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
HypothesisStack(manager) HypothesisStack(manager)
{ {
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled; m_nBestIsEnabled = manager.options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity(); m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = -std::numeric_limits<float>::infinity(); m_worstScore = -std::numeric_limits<float>::infinity();
} }

View File

@ -79,12 +79,6 @@ namespace Moses
IOWrapper::IOWrapper() IOWrapper::IOWrapper()
: m_nBestStream(NULL) : m_nBestStream(NULL)
// , m_outputWordGraphStream(NULL)
// , m_outputSearchGraphStream(NULL)
// , m_detailedTranslationReportingStream(NULL)
// , m_unknownsStream(NULL)
// , m_alignmentInfoStream(NULL)
// , m_latticeSamplesStream(NULL)
, m_surpressSingleBestOutput(false) , m_surpressSingleBestOutput(false)
, m_look_ahead(0) , m_look_ahead(0)
, m_look_back(0) , m_look_back(0)
@ -100,8 +94,8 @@ IOWrapper::IOWrapper()
m_look_ahead = staticData.options().context.look_ahead; m_look_ahead = staticData.options().context.look_ahead;
m_look_back = staticData.options().context.look_back; m_look_back = staticData.options().context.look_back;
m_inputType = staticData.GetInputType(); m_inputType = staticData.options().input.input_type;
UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput, UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput,
"Context-sensitive decoding currently works only with sentence input."); "Context-sensitive decoding currently works only with sentence input.");

View File

@ -216,6 +216,7 @@ boost::shared_ptr<InputType>
IOWrapper:: IOWrapper::
BufferInput() BufferInput()
{ {
AllOptions const& opts = StaticData::Instance().options();
boost::shared_ptr<itype> source; boost::shared_ptr<itype> source;
boost::shared_ptr<InputType> ret; boost::shared_ptr<InputType> ret;
if (m_future_input.size()) { if (m_future_input.size()) {
@ -224,13 +225,13 @@ BufferInput()
m_buffered_ahead -= ret->GetSize(); m_buffered_ahead -= ret->GetSize();
} else { } else {
source.reset(new itype); source.reset(new itype);
if (!source->Read(*m_inputStream, *m_inputFactorOrder)) if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts))
return ret; return ret;
ret = source; ret = source;
} }
while (m_buffered_ahead < m_look_ahead) { while (m_buffered_ahead < m_look_ahead) {
source.reset(new itype); source.reset(new itype);
if (!source->Read(*m_inputStream, *m_inputFactorOrder)) if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts))
break; break;
m_future_input.push_back(source); m_future_input.push_back(source);
m_buffered_ahead += source->GetSize(); m_buffered_ahead += source->GetSize();

View File

@ -320,10 +320,15 @@ void Manager::OutputNBest(OutputCollector *collector) const
OutputNBestList(collector, *completed_nbest_, m_source.GetTranslationId()); OutputNBestList(collector, *completed_nbest_, m_source.GetTranslationId());
} }
void Manager::OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const void
Manager::
OutputNBestList(OutputCollector *collector,
std::vector<search::Applied> const& nbest,
long translationId) const
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder(); const std::vector<Moses::FactorType> &outputFactorOrder
= staticData.GetOutputFactorOrder();
std::ostringstream out; std::ostringstream out;
// wtf? copied from the original OutputNBestList // wtf? copied from the original OutputNBestList
@ -332,18 +337,21 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector<sear
} }
Phrase outputPhrase; Phrase outputPhrase;
ScoreComponentCollection features; ScoreComponentCollection features;
for (std::vector<search::Applied>::const_iterator i = nbest.begin(); i != nbest.end(); ++i) { for (std::vector<search::Applied>::const_iterator i = nbest.begin();
i != nbest.end(); ++i) {
Incremental::PhraseAndFeatures(*i, outputPhrase, features); Incremental::PhraseAndFeatures(*i, outputPhrase, features);
// <s> and </s> // <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2, UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); "Output phrase should have contained at least 2 words "
<< "(beginning and end-of-sentence)");
outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
out << translationId << " ||| "; out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false); OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| "; out << " ||| ";
features.OutputAllFeatureScores(out); bool with_labels = options().nbest.include_feature_labels;
features.OutputAllFeatureScores(out, with_labels);
out << " ||| " << i->GetScore() << '\n'; out << " ||| " << i->GetScore() << '\n';
} }
out << std::flush; out << std::flush;
@ -351,7 +359,9 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector<sear
collector->Write(translationId, out.str()); collector->Write(translationId, out.str());
} }
void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const void
Manager::
OutputDetailedTranslationReport(OutputCollector *collector) const
{ {
if (collector && !completed_nbest_->empty()) { if (collector && !completed_nbest_->empty()) {
const search::Applied &applied = completed_nbest_->at(0); const search::Applied &applied = completed_nbest_->at(0);
@ -498,7 +508,7 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied
if (collector == NULL) return; if (collector == NULL) return;
std::ostringstream out; std::ostringstream out;
FixPrecision(out); FixPrecision(out);
if (StaticData::Instance().GetOutputHypoScore()) { if (options().output.ReportHypoScore) {
out << applied.GetScore() << ' '; out << applied.GetScore() << ' ';
} }
Phrase outPhrase; Phrase outPhrase;
@ -515,10 +525,12 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied
VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl); VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl);
} }
void Manager::OutputBestNone(OutputCollector *collector, long translationId) const void
Manager::
OutputBestNone(OutputCollector *collector, long translationId) const
{ {
if (collector == NULL) return; if (collector == NULL) return;
if (StaticData::Instance().GetOutputHypoScore()) { if (options().output.ReportHypoScore) {
collector->Write(translationId, "0 \n"); collector->Write(translationId, "0 \n");
} else { } else {
collector->Write(translationId, "\n"); collector->Write(translationId, "\n");

View File

@ -1,5 +1,4 @@
// -*- c++ -*- // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
// $Id$
// vim:tabstop=2 // vim:tabstop=2
/*********************************************************************** /***********************************************************************
@ -31,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "ReorderingConstraint.h" #include "ReorderingConstraint.h"
#include "NonTerminal.h" #include "NonTerminal.h"
#include "Range.h" #include "Range.h"
#include "parameters/AllOptions.h"
namespace Moses namespace Moses
{ {
@ -184,7 +184,10 @@ public:
} }
//! populate this InputType with data from in stream //! populate this InputType with data from in stream
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder) =0; virtual int
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts) =0;
//! Output debugging info to stream out //! Output debugging info to stream out
virtual void Print(std::ostream&) const =0; virtual void Print(std::ostream&) const =0;

View File

@ -1,4 +1,5 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- // $Id$
/*********************************************************************** /***********************************************************************
Moses - factored phrase-based language decoder Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh Copyright (C) 2006 University of Edinburgh
@ -25,14 +26,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "n_gram.h" #include "n_gram.h"
#include "lmContainer.h" #include "lmContainer.h"
// should be defined in lmContainer.h, if the version of IRSTLM used provides
// context-dependent functionality
#ifndef _IRSTLM_LMCONTEXTDEPENDENT
#define _IRSTLM_LMCONTEXTDEPENDENT 5
#else
#define IRSTLM_CONTEXT_DEPENDENT
#endif
using namespace irstlm; using namespace irstlm;
#include "IRST.h" #include "IRST.h"
@ -67,10 +60,9 @@ public:
} }
}; };
LanguageModelIRST:: LanguageModelIRST::LanguageModelIRST(const std::string &line)
LanguageModelIRST(const std::string &line) :LanguageModelSingleFactor(line)
: LanguageModelSingleFactor(line) ,m_lmtb_dub(0), m_lmtb_size(0)
, m_lmtb_dub(0), m_lmtb_size(0)
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
int threadCount = staticData.ThreadCount(); int threadCount = staticData.ThreadCount();
@ -86,9 +78,9 @@ LanguageModelIRST(const std::string &line)
VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_size:|" << m_lmtb_size << "|" << std::endl); VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_size:|" << m_lmtb_size << "|" << std::endl);
} }
LanguageModelIRST:: LanguageModelIRST::~LanguageModelIRST()
~LanguageModelIRST()
{ {
#ifndef WIN32 #ifndef WIN32
TRACE_ERR( "reset mmap\n"); TRACE_ERR( "reset mmap\n");
if (m_lmtb) m_lmtb->reset_mmap(); if (m_lmtb) m_lmtb->reset_mmap();
@ -98,17 +90,13 @@ LanguageModelIRST::
} }
bool bool LanguageModelIRST::IsUseable(const FactorMask &mask) const
LanguageModelIRST::
IsUseable(const FactorMask &mask) const
{ {
bool ret = mask[m_factorType]; bool ret = mask[m_factorType];
return ret; return ret;
} }
void void LanguageModelIRST::Load()
LanguageModelIRST::
Load()
{ {
FactorCollection &factorCollection = FactorCollection::Instance(); FactorCollection &factorCollection = FactorCollection::Instance();
@ -135,9 +123,7 @@ Load()
if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub); if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);
} }
void void LanguageModelIRST::CreateFactors(FactorCollection &factorCollection)
LanguageModelIRST::
CreateFactors(FactorCollection &factorCollection)
{ {
// add factors which have srilm id // add factors which have srilm id
// code copied & paste from SRI LM class. should do template function // code copied & paste from SRI LM class. should do template function
@ -179,23 +165,17 @@ CreateFactors(FactorCollection &factorCollection)
} }
} }
int int LanguageModelIRST::GetLmID( const std::string &str ) const
LanguageModelIRST::
GetLmID( const std::string &str ) const
{ {
return d->encode( str.c_str() ); // at the level of micro tags return d->encode( str.c_str() ); // at the level of micro tags
} }
int int LanguageModelIRST::GetLmID( const Word &word ) const
LanguageModelIRST::
GetLmID( const Word &word ) const
{ {
return GetLmID( word.GetFactor(m_factorType) ); return GetLmID( word.GetFactor(m_factorType) );
} }
int int LanguageModelIRST::GetLmID( const Factor *factor ) const
LanguageModelIRST::
GetLmID( const Factor *factor ) const
{ {
size_t factorId = factor->GetId(); size_t factorId = factor->GetId();
@ -216,21 +196,21 @@ GetLmID( const Factor *factor ) const
/////////// ///////////
///OLD PROBLEM - SOLVED ///OLD PROBLEM - SOLVED
//////////// ////////////
/// IL PPROBLEMA ERA QUI /// IL PPROBLEMA ERA QUI
/// m_lmIdLookup.push_back(code); /// m_lmIdLookup.push_back(code);
/// PERCHE' USANDO PUSH_BACK IN REALTA' INSEREVIVAMO L'ELEMENTO NUOVO /// PERCHE' USANDO PUSH_BACK IN REALTA' INSEREVIVAMO L'ELEMENTO NUOVO
/// IN POSIZIONE (factorID-1) invece che in posizione factrID dove dopo andiamo a leggerlo (vedi caso C /// IN POSIZIONE (factorID-1) invece che in posizione factrID dove dopo andiamo a leggerlo (vedi caso C
/// Cosi' funziona .... /// Cosi' funziona ....
/// ho un dubbio su cosa c'e' nelle prime posizioni di m_lmIdLookup /// ho un dubbio su cosa c'e' nelle prime posizioni di m_lmIdLookup
/// quindi /// quindi
/// e scopro che rimane vuota una entry ogni due /// e scopro che rimane vuota una entry ogni due
/// perche' factorID cresce di due in due (perche' codifica sia source che target) "vuota" la posizione (factorID-1) /// perche' factorID cresce di due in due (perche' codifica sia source che target) "vuota" la posizione (factorID-1)
/// non da problemi di correttezza, ma solo di "spreco" di memoria /// non da problemi di correttezza, ma solo di "spreco" di memoria
/// potremmo sostituirerendere m_lmIdLookup una std:map invece che un std::vector, /// potremmo sostituirerendere m_lmIdLookup una std:map invece che un std::vector,
/// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori /// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori
/// a te la scelta!!!! /// a te la scelta!!!!
//////////////// ////////////////
if (factorId >= m_lmIdLookup.size()) { if (factorId >= m_lmIdLookup.size()) {
@ -251,34 +231,21 @@ GetLmID( const Factor *factor ) const
} }
} }
FFState const* const FFState* LanguageModelIRST::EmptyHypothesisState(const InputType &/*input*/) const
LanguageModelIRST::
EmptyHypothesisState(const InputType &/*input*/) const
{ {
std::auto_ptr<IRSTLMState> ret(new IRSTLMState()); std::auto_ptr<IRSTLMState> ret(new IRSTLMState());
return ret.release(); return ret.release();
} }
void void LanguageModelIRST::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
LanguageModelIRST::
CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
{ {
bool isContextAdaptive
= m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
fullScore = 0; fullScore = 0;
ngramScore = 0; ngramScore = 0;
oovCount = 0; oovCount = 0;
if ( !phrase.GetSize() ) return; if ( !phrase.GetSize() ) return;
//get the context_weight map here
SPTR<std::map<std::string, float> const> CW;
if (isContextAdaptive && phrase.HasScope()) {
CW = phrase.GetScope()->GetContextWeights();
}
int _min = min(m_lmtb_size - 1, (int) phrase.GetSize()); int _min = min(m_lmtb_size - 1, (int) phrase.GetSize());
int codes[m_lmtb_size]; int codes[m_lmtb_size];
@ -289,78 +256,36 @@ CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oov
char* msp = NULL; char* msp = NULL;
float before_boundary = 0.0; float before_boundary = 0.0;
for (; position < _min; ++position) {
#ifdef IRSTLM_CONTEXT_DEPENDENT codes[idx] = GetLmID(phrase.GetWord(position));
if (CW) { if (codes[idx] == m_unknownId) ++oovCount;
for (; position < _min; ++position) { before_boundary += m_lmtb->clprob(codes,idx+1,NULL,NULL,&msp);
codes[idx] = GetLmID(phrase.GetWord(position)); ++idx;
if (codes[idx] == m_unknownId) ++oovCount;
before_boundary += m_lmtb->clprob(codes,idx+1,*CW,NULL,NULL,&msp);
++idx;
}
} else {
#endif
for (; position < _min; ++position) {
codes[idx] = GetLmID(phrase.GetWord(position));
if (codes[idx] == m_unknownId) ++oovCount;
before_boundary += m_lmtb->clprob(codes,idx+1,NULL,NULL,&msp);
++idx;
}
#ifdef IRSTLM_CONTEXT_DEPENDENT
} }
#endif
ngramScore = 0.0; ngramScore = 0.0;
int end_loop = (int) phrase.GetSize(); int end_loop = (int) phrase.GetSize();
#ifdef IRSTLM_CONTEXT_DEPENDENT for (; position < end_loop; ++position) {
if (CW) { for (idx = 1; idx < m_lmtb_size; ++idx) {
for (; position < end_loop; ++position) { codes[idx-1] = codes[idx];
for (idx = 1; idx < m_lmtb_size; ++idx) {
codes[idx-1] = codes[idx];
}
codes[idx-1] = GetLmID(phrase.GetWord(position));
if (codes[idx-1] == m_unknownId) ++oovCount;
ngramScore += m_lmtb->clprob(codes,idx,*CW,NULL,NULL,&msp);
} }
} else { codes[idx-1] = GetLmID(phrase.GetWord(position));
#endif if (codes[idx-1] == m_unknownId) ++oovCount;
for (; position < end_loop; ++position) { ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp);
for (idx = 1; idx < m_lmtb_size; ++idx) {
codes[idx-1] = codes[idx];
}
codes[idx-1] = GetLmID(phrase.GetWord(position));
if (codes[idx-1] == m_unknownId) ++oovCount;
ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp);
}
#ifdef IRSTLM_CONTEXT_DEPENDENT
} }
#endif
before_boundary = TransformLMScore(before_boundary); before_boundary = TransformLMScore(before_boundary);
ngramScore = TransformLMScore(ngramScore); ngramScore = TransformLMScore(ngramScore);
fullScore = ngramScore + before_boundary; fullScore = ngramScore + before_boundary;
} }
FFState* FFState* LanguageModelIRST::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
LanguageModelIRST::
EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
ScoreComponentCollection *out) const
{ {
bool isContextAdaptive
= m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
if (!hypo.GetCurrTargetLength()) { if (!hypo.GetCurrTargetLength()) {
std::auto_ptr<IRSTLMState> ret(new IRSTLMState(ps)); std::auto_ptr<IRSTLMState> ret(new IRSTLMState(ps));
return ret.release(); return ret.release();
} }
//get the context_weight map here
SPTR<std::map<std::string, float> const> CW;
if (isContextAdaptive) {
ttasksptr ttask = hypo.GetManager().GetTtask();
if (ttask) CW = ttask->GetScope()->GetContextWeights();
}
//[begin, end) in STL-like fashion. //[begin, end) in STL-like fashion.
const int begin = (const int) hypo.GetCurrTargetWordsRange().GetStartPos(); const int begin = (const int) hypo.GetCurrTargetWordsRange().GetStartPos();
const int end = (const int) hypo.GetCurrTargetWordsRange().GetEndPos() + 1; const int end = (const int) hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
@ -383,34 +308,18 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
} }
char* msp = NULL; char* msp = NULL;
float score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
position = (const int) begin+1; position = (const int) begin+1;
float score; while (position < adjust_end) {
#ifdef IRSTLM_CONTEXT_DEPENDENT for (idx=1; idx<m_lmtb_size; idx++) {
if (CW) { codes[idx-1] = codes[idx];
score = m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
while (position < adjust_end) {
for (idx=1; idx<m_lmtb_size; idx++) {
codes[idx-1] = codes[idx];
}
codes[idx-1] = GetLmID(hypo.GetWord(position));
score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
++position;
} }
} else { codes[idx-1] = GetLmID(hypo.GetWord(position));
#endif score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp); ++position;
position = (const int) begin+1;
while (position < adjust_end) {
for (idx=1; idx<m_lmtb_size; idx++) {
codes[idx-1] = codes[idx];
}
codes[idx-1] = GetLmID(hypo.GetWord(position));
score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
++position;
}
#ifdef IRSTLM_CONTEXT_DEPENDENT
} }
#endif
//adding probability of having sentenceEnd symbol, after this phrase; //adding probability of having sentenceEnd symbol, after this phrase;
//this could happen only when all source words are covered //this could happen only when all source words are covered
if (hypo.IsSourceCompleted()) { if (hypo.IsSourceCompleted()) {
@ -427,13 +336,8 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
codes[idx] = m_lmtb_sentenceStart; codes[idx] = m_lmtb_sentenceStart;
--idx; --idx;
} }
#ifdef IRSTLM_CONTEXT_DEPENDENT
if (CW) score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
else
#else
score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp); score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
#endif } else {
} else {
// need to set the LM state // need to set the LM state
if (adjust_end < end) { //the LMstate of this target phrase refers to the last m_lmtb_size-1 words if (adjust_end < end) { //the LMstate of this target phrase refers to the last m_lmtb_size-1 words
@ -454,9 +358,7 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
return ret.release(); return ret.release();
} }
LMResult LMResult LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
LanguageModelIRST::
GetValue(const vector<const Word*> &contextFactor, State* finalState) const
{ {
// set up context // set up context
size_t count = contextFactor.size(); size_t count = contextFactor.size();
@ -492,8 +394,7 @@ GetValue(const vector<const Word*> &contextFactor, State* finalState) const
return result; return result;
} }
bool bool LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold)
LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold)
{ {
if (sentences_done==-1) return true; if (sentences_done==-1) return true;
if (m_lmcache_cleanup_threshold) if (m_lmcache_cleanup_threshold)
@ -510,9 +411,7 @@ void LanguageModelIRST::InitializeForInput(ttasksptr const& ttask)
#endif #endif
} }
void void LanguageModelIRST::CleanUpAfterSentenceProcessing(const InputType& source)
LanguageModelIRST::
CleanUpAfterSentenceProcessing(const InputType& source)
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
static int sentenceCount = 0; static int sentenceCount = 0;
@ -526,9 +425,7 @@ CleanUpAfterSentenceProcessing(const InputType& source)
} }
} }
void void LanguageModelIRST::SetParameter(const std::string& key, const std::string& value)
LanguageModelIRST::
SetParameter(const std::string& key, const std::string& value)
{ {
if (key == "dub") { if (key == "dub") {
m_lmtb_dub = Scan<unsigned int>(value); m_lmtb_dub = Scan<unsigned int>(value);

View File

@ -1,4 +1,3 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
// $Id$ // $Id$
/*********************************************************************** /***********************************************************************
@ -92,20 +91,17 @@ public:
void Load(); void Load();
const FFState *EmptyHypothesisState(const InputType &/*input*/) const; const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
protected:
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const; virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
public:
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const; virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
/* virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; /*
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const;
*/
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const;
*/
void InitializeForInput(ttasksptr const& ttask); void InitializeForInput(ttasksptr const& ttask);
void CleanUpAfterSentenceProcessing(const InputType& source); void CleanUpAfterSentenceProcessing(const InputType& source);

View File

@ -1,6 +1,5 @@
// $Id$ // -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
// vim:tabstop=2 // vim:tabstop=2
/*********************************************************************** /***********************************************************************
Moses - factored phrase-based language decoder Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh Copyright (C) 2006 University of Edinburgh
@ -49,6 +48,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/mbr.h" #include "moses/mbr.h"
#include "moses/LatticeMBR.h" #include "moses/LatticeMBR.h"
#include <boost/foreach.hpp>
#ifdef HAVE_PROTOBUF #ifdef HAVE_PROTOBUF
#include "hypergraph.pb.h" #include "hypergraph.pb.h"
#include "rule.pb.h" #include "rule.pb.h"
@ -98,6 +99,10 @@ Manager::GetSource() const
*/ */
void Manager::Decode() void Manager::Decode()
{ {
std::cerr << options().nbest.nbest_size << " "
<< options().nbest.enabled << " " << std::endl;
// initialize statistics // initialize statistics
ResetSentenceStats(m_source); ResetSentenceStats(m_source);
IFVERBOSE(2) { IFVERBOSE(2) {
@ -123,7 +128,8 @@ void Manager::Decode()
// some reporting on how long this took // some reporting on how long this took
IFVERBOSE(1) { IFVERBOSE(1) {
GetSentenceStats().StopTimeCollectOpts(); GetSentenceStats().StopTimeCollectOpts();
TRACE_ERR("Line "<< m_source.GetTranslationId() << ": Collecting options took " TRACE_ERR("Line "<< m_source.GetTranslationId()
<< ": Collecting options took "
<< GetSentenceStats().GetTimeCollectOpts() << " seconds at " << GetSentenceStats().GetTimeCollectOpts() << " seconds at "
<< __FILE__ << ":" << __LINE__ << endl); << __FILE__ << ":" << __LINE__ << endl);
} }
@ -1112,11 +1118,13 @@ void Manager::OutputSearchGraphAsSLF(long translationId, std::ostream &outputSea
} }
void OutputSearchNode(long translationId, std::ostream &outputSearchGraphStream, void
const SearchGraphNode& searchNode) OutputSearchNode(AllOptions const& opts, long translationId,
std::ostream &outputSearchGraphStream,
SearchGraphNode const& searchNode)
{ {
const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
bool extendedFormat = StaticData::Instance().GetOutputSearchGraphExtended(); bool extendedFormat = opts.output.SearchGraphExtended.size();
outputSearchGraphStream << translationId; outputSearchGraphStream << translationId;
// special case: initial hypothesis // special case: initial hypothesis
@ -1369,24 +1377,32 @@ void Manager::SerializeSearchGraphPB(
} }
#endif #endif
void Manager::OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const void
Manager::
OutputSearchGraph(long translationId, std::ostream &out) const
{ {
vector<SearchGraphNode> searchGraph; vector<SearchGraphNode> searchGraph;
GetSearchGraph(searchGraph); GetSearchGraph(searchGraph);
for (size_t i = 0; i < searchGraph.size(); ++i) { for (size_t i = 0; i < searchGraph.size(); ++i) {
OutputSearchNode(translationId,outputSearchGraphStream,searchGraph[i]); OutputSearchNode(options(),translationId,out,searchGraph[i]);
} }
} }
void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected, void
std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, set< const Hypothesis* > >* pOutgoingHyps, vector< float>* pFwdBwdScores) const Manager::
GetForwardBackwardSearchGraph
( std::map< int, bool >* pConnected,
std::vector<Hypothesis const* >* pConnectedList,
std::map<Hypothesis const*, set<Hypothesis const*> >* pOutgoingHyps,
vector< float>* pFwdBwdScores) const
{ {
std::map < int, bool > &connected = *pConnected; std::map < int, bool > &connected = *pConnected;
std::vector< const Hypothesis *>& connectedList = *pConnectedList; std::vector< const Hypothesis *>& connectedList = *pConnectedList;
std::map < int, int > forward; std::map < int, int > forward;
std::map < int, double > forwardScore; std::map < int, double > forwardScore;
std::map < const Hypothesis*, set <const Hypothesis*> > & outgoingHyps = *pOutgoingHyps; std::map < const Hypothesis*, set <const Hypothesis*> > & outgoingHyps
= *pOutgoingHyps;
vector< float> & estimatedScores = *pFwdBwdScores; vector< float> & estimatedScores = *pFwdBwdScores;
// *** find connected hypotheses *** // *** find connected hypotheses ***
@ -1395,7 +1411,8 @@ void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
// ** compute best forward path for each hypothesis *** // // ** compute best forward path for each hypothesis *** //
// forward cost of hypotheses on final stack is 0 // forward cost of hypotheses on final stack is 0
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks(); const std::vector < HypothesisStack* > &hypoStackColl
= m_search->GetHypothesisStacks();
const HypothesisStack &finalStack = *hypoStackColl.back(); const HypothesisStack &finalStack = *hypoStackColl.back();
HypothesisStack::const_iterator iterHypo; HypothesisStack::const_iterator iterHypo;
for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) { for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
@ -1504,34 +1521,34 @@ void Manager::OutputBest(OutputCollector *collector) const
if (!options().mbr.enabled) { if (!options().mbr.enabled) {
bestHypo = GetBestHypothesis(); bestHypo = GetBestHypothesis();
if (bestHypo) { if (bestHypo) {
if (StaticData::Instance().GetOutputHypoScore()) { if (options().output.ReportHypoScore) {
out << bestHypo->GetTotalScore() << ' '; out << bestHypo->GetTotalScore() << ' ';
} }
if (staticData.IsPathRecoveryEnabled()) { if (options().output.RecoverPath) {
bestHypo->OutputInput(out); bestHypo->OutputInput(out);
out << "||| "; out << "||| ";
} }
const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id"); // const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id");
if (params && params->size() && Scan<bool>(params->at(0)) ) { if (options().output.PrintID) {
out << translationId << " "; out << translationId << " ";
} }
// VN : I put back the code for OutputPassthroughInformation // VN : I put back the code for OutputPassthroughInformation
if (staticData.IsPassthroughEnabled()) { if (options().output.PrintPassThrough) {
OutputPassthroughInformation(out, bestHypo); OutputPassthroughInformation(out, bestHypo);
} }
// end of add back // end of add back
if (staticData.GetReportSegmentation() == 2) { if (options().output.ReportSegmentation == 2) {
GetOutputLanguageModelOrder(out, bestHypo); GetOutputLanguageModelOrder(out, bestHypo);
} }
bestHypo->OutputBestSurface( bestHypo->OutputBestSurface(
out, out,
staticData.GetOutputFactorOrder(), staticData.GetOutputFactorOrder(),
staticData.GetReportSegmentation(), options().output.ReportSegmentation,
staticData.GetReportAllFactors()); options().output.ReportAllFactors);
if (staticData.PrintAlignmentInfo()) { if (options().output.PrintAlignmentInfo) {
out << "||| "; out << "||| ";
bestHypo->OutputAlignment(out); bestHypo->OutputAlignment(out);
} }
@ -1572,8 +1589,9 @@ void Manager::OutputBest(OutputCollector *collector) const
} else { } else {
//Lattice MBR decoding //Lattice MBR decoding
vector<Word> mbrBestHypo = doLatticeMBR(*this,nBestList); vector<Word> mbrBestHypo = doLatticeMBR(*this,nBestList);
OutputBestHypo(mbrBestHypo, translationId, staticData.GetReportSegmentation(), OutputBestHypo(mbrBestHypo, translationId,
staticData.GetReportAllFactors(),out); options().output.ReportSegmentation,
options().output.ReportAllFactors, out);
IFVERBOSE(2) { IFVERBOSE(2) {
PrintUserTime("finished Lattice MBR decoding"); PrintUserTime("finished Lattice MBR decoding");
} }
@ -1584,8 +1602,8 @@ void Manager::OutputBest(OutputCollector *collector) const
else if (options().search.consensus) { else if (options().search.consensus) {
const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList); const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList);
OutputBestHypo(conBestHypo, translationId, OutputBestHypo(conBestHypo, translationId,
staticData.GetReportSegmentation(), options().output.ReportSegmentation,
staticData.GetReportAllFactors(),out); options().output.ReportAllFactors, out);
OutputAlignment(m_alignmentOut, conBestHypo); OutputAlignment(m_alignmentOut, conBestHypo);
IFVERBOSE(2) { IFVERBOSE(2) {
PrintUserTime("finished Consensus decoding"); PrintUserTime("finished Consensus decoding");
@ -1596,8 +1614,8 @@ void Manager::OutputBest(OutputCollector *collector) const
else { else {
const TrellisPath &mbrBestHypo = doMBR(nBestList); const TrellisPath &mbrBestHypo = doMBR(nBestList);
OutputBestHypo(mbrBestHypo, translationId, OutputBestHypo(mbrBestHypo, translationId,
staticData.GetReportSegmentation(), options().output.ReportSegmentation,
staticData.GetReportAllFactors(),out); options().output.ReportAllFactors, out);
OutputAlignment(m_alignmentOut, mbrBestHypo); OutputAlignment(m_alignmentOut, mbrBestHypo);
IFVERBOSE(2) { IFVERBOSE(2) {
PrintUserTime("finished MBR decoding"); PrintUserTime("finished MBR decoding");
@ -1624,7 +1642,7 @@ void Manager::OutputNBest(OutputCollector *collector) const
long translationId = m_source.GetTranslationId(); long translationId = m_source.GetTranslationId();
if (options().lmbr.enabled) { if (options().lmbr.enabled) {
if (staticData.options().nbest.enabled) { if (options().nbest.enabled) {
collector->Write(translationId, m_latticeNBestOut.str()); collector->Write(translationId, m_latticeNBestOut.str());
} }
} else { } else {
@ -1632,22 +1650,24 @@ void Manager::OutputNBest(OutputCollector *collector) const
ostringstream out; ostringstream out;
CalcNBest(options().nbest.nbest_size, nBestList, CalcNBest(options().nbest.nbest_size, nBestList,
options().nbest.only_distinct); options().nbest.only_distinct);
OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), OutputNBest(out, nBestList,
staticData.GetOutputFactorOrder(),
m_source.GetTranslationId(), m_source.GetTranslationId(),
staticData.GetReportSegmentation()); options().output.ReportSegmentation);
collector->Write(m_source.GetTranslationId(), out.str()); collector->Write(m_source.GetTranslationId(), out.str());
} }
} }
void Manager::OutputNBest(std::ostream& out void
, const Moses::TrellisPathList &nBestList Manager::
, const std::vector<Moses::FactorType>& outputFactorOrder OutputNBest(std::ostream& out,
, long translationId const Moses::TrellisPathList &nBestList,
, char reportSegmentation) const const std::vector<Moses::FactorType>& outputFactorOrder,
long translationId, char reportSegmentation) const
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
NBestOptions const& nbo = staticData.options().nbest; NBestOptions const& nbo = options().nbest;
bool reportAllFactors = nbo.include_all_factors; bool reportAllFactors = nbo.include_all_factors;
bool includeSegmentation = nbo.include_segmentation; bool includeSegmentation = nbo.include_segmentation;
bool includeWordAlignment = nbo.include_alignment_info; bool includeWordAlignment = nbo.include_alignment_info;
@ -1661,12 +1681,14 @@ void Manager::OutputNBest(std::ostream& out
out << translationId << " ||| "; out << translationId << " ||| ";
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge]; const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); OutputSurface(out, edge, outputFactorOrder, reportSegmentation,
reportAllFactors);
} }
out << " |||"; out << " |||";
// print scores with feature names // print scores with feature names
path.GetScoreBreakdown()->OutputAllFeatureScores(out); bool with_labels = options().nbest.include_feature_labels;
path.GetScoreBreakdown()->OutputAllFeatureScores(out, with_labels);
// total // total
out << " ||| " << path.GetTotalScore(); out << " ||| " << path.GetTotalScore();
@ -1704,7 +1726,7 @@ void Manager::OutputNBest(std::ostream& out
} }
} }
if (StaticData::Instance().IsPathRecoveryEnabled()) { if (options().output.RecoverPath) {
out << " ||| "; out << " ||| ";
OutputInput(out, edges[0]); OutputInput(out, edges[0]);
} }
@ -1719,8 +1741,11 @@ void Manager::OutputNBest(std::ostream& out
/*** /***
* print surface factor only for the given phrase * print surface factor only for the given phrase
*/ */
void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder, void
char reportSegmentation, bool reportAllFactors) const Manager::
OutputSurface(std::ostream &out, const Hypothesis &edge,
const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
{ {
UTIL_THROW_IF2(outputFactorOrder.size() == 0, UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Must specific at least 1 output factor"); "Must specific at least 1 output factor");
@ -1788,26 +1813,33 @@ void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std
out << ","; out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
scoreBreakdown.OutputAllFeatureScores(out); bool with_labels = options().nbest.include_feature_labels;
scoreBreakdown.OutputAllFeatureScores(out, with_labels);
} }
out << "| "; out << "| ";
} }
} }
void Manager::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const void
Manager::
OutputAlignment(ostream &out, const AlignmentInfo &ai,
size_t sourceOffset, size_t targetOffset) const
{ {
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec; typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
AlignVec alignments = ai.GetSortedAlignments(); AlignVec alignments = ai.GetSortedAlignments(options().output.WA_SortOrder);
AlignVec::const_iterator it; AlignVec::const_iterator it;
for (it = alignments.begin(); it != alignments.end(); ++it) { for (it = alignments.begin(); it != alignments.end(); ++it) {
const std::pair<size_t,size_t> &alignment = **it; const std::pair<size_t,size_t> &alignment = **it;
out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " "; out << alignment.first + sourceOffset << "-"
<< alignment.second + targetOffset << " ";
} }
} }
void Manager::OutputInput(std::ostream& os, const Hypothesis* hypo) const void
Manager::
OutputInput(std::ostream& os, const Hypothesis* hypo) const
{ {
size_t len = hypo->GetInput().GetSize(); size_t len = hypo->GetInput().GetSize();
std::vector<const Phrase*> inp_phrases(len, 0); std::vector<const Phrase*> inp_phrases(len, 0);
@ -1851,8 +1883,10 @@ void Manager::OutputLatticeSamples(OutputCollector *collector) const
TrellisPathList latticeSamples; TrellisPathList latticeSamples;
ostringstream out; ostringstream out;
CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples); CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(), OutputNBest(out,latticeSamples,
staticData.GetReportSegmentation()); staticData.GetOutputFactorOrder(),
m_source.GetTranslationId(),
options().output.ReportSegmentation);
collector->Write(m_source.GetTranslationId(), out.str()); collector->Write(m_source.GetTranslationId(), out.str());
} }
@ -1970,14 +2004,10 @@ void Manager::OutputSearchGraphSLF() const
long translationId = m_source.GetTranslationId(); long translationId = m_source.GetTranslationId();
// Output search graph in HTK standard lattice format (SLF) // Output search graph in HTK standard lattice format (SLF)
bool slf = staticData.GetOutputSearchGraphSLF(); std::string const& slf = options().output.SearchGraphSLF;
if (slf) { if (slf.size()) {
util::StringStream fileName; util::StringStream fileName;
fileName << slf << "/" << translationId << ".slf";
string dir;
staticData.GetParameter().SetParameter<string>(dir, "output-search-graph-slf", "");
fileName << dir << "/" << translationId << ".slf";
ofstream *file = new ofstream; ofstream *file = new ofstream;
file->open(fileName.str().c_str()); file->open(fileName.str().c_str());
if (file->is_open() && file->good()) { if (file->is_open() && file->good()) {
@ -2045,7 +2075,11 @@ void Manager::OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*trans
out << endl; out << endl;
} }
void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, char reportSegmentation, bool reportAllFactors, std::ostream &out) const void
Manager::
OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,
char reportSegmentation, bool reportAllFactors,
std::ostream &out) const
{ {
const std::vector<const Hypothesis *> &edges = path.GetEdges(); const std::vector<const Hypothesis *> &edges = path.GetEdges();
@ -2056,9 +2090,12 @@ void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationI
out << endl; out << endl;
} }
void Manager::OutputAlignment(std::ostringstream &out, const TrellisPath &path) const void
Manager::
OutputAlignment(std::ostringstream &out, const TrellisPath &path) const
{ {
Hypothesis::OutputAlignment(out, path.GetEdges()); WordAlignmentSort waso = options().output.WA_SortOrder;
Hypothesis::OutputAlignment(out, path.GetEdges(), waso);
// Used by --alignment-output-file so requires endl // Used by --alignment-output-file so requires endl
out << std::endl; out << std::endl;
} }

View File

@ -131,7 +131,7 @@ protected:
// nbest // nbest
mutable std::ostringstream m_latticeNBestOut; mutable std::ostringstream m_latticeNBestOut;
mutable std::ostringstream m_alignmentOut; mutable std::ostringstream m_alignmentOut;
public:
void OutputNBest(std::ostream& out void OutputNBest(std::ostream& out
, const Moses::TrellisPathList &nBestList , const Moses::TrellisPathList &nBestList
, const std::vector<Moses::FactorType>& outputFactorOrder , const std::vector<Moses::FactorType>& outputFactorOrder

View File

@ -39,16 +39,19 @@ MockHypothesisGuard
{ {
BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size()); BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size());
std::vector<Moses::FactorType> factors(1,0); std::vector<Moses::FactorType> factors(1,0);
m_sentence.reset(new Sentence(0, sourceSentence, &factors)); AllOptions const& opts = StaticData::Instance().options();
m_sentence.reset(new Sentence(0, sourceSentence, opts, &factors));
m_ttask = TranslationTask::create(m_sentence); m_ttask = TranslationTask::create(m_sentence);
m_manager.reset(new Manager(m_ttask)); m_manager.reset(new Manager(m_ttask));
//Initial empty hypothesis //Initial empty hypothesis
Bitmaps bitmaps(m_sentence.get()->GetSize(), m_sentence.get()->m_sourceCompleted); Bitmaps bitmaps(m_sentence.get()->GetSize(),
m_sentence.get()->m_sourceCompleted);
m_manager->ResetSentenceStats(*m_sentence); m_manager->ResetSentenceStats(*m_sentence);
const Bitmap &initBitmap = bitmaps.GetInitialBitmap(); const Bitmap &initBitmap = bitmaps.GetInitialBitmap();
m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt, initBitmap); m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt,
initBitmap);
//create the chain //create the chain
vector<Alignment>::const_iterator ai = alignments.begin(); vector<Alignment>::const_iterator ai = alignments.begin();
@ -56,7 +59,8 @@ MockHypothesisGuard
for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) { for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) {
Hypothesis* prevHypo = m_hypothesis; Hypothesis* prevHypo = m_hypothesis;
Range range(ai->first,ai->second); Range range(ai->first,ai->second);
const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), range); const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(),
range);
m_targetPhrases.push_back(TargetPhrase(NULL)); m_targetPhrases.push_back(TargetPhrase(NULL));
// m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL); // m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL);

View File

@ -1620,6 +1620,13 @@ SetParameter<bool>(bool &parameter, std::string const& parameterName,
} }
} }
void
Parameter::
SetParameter(bool& var, std::string const& name)
{
SetParameter(var,name,false);
}
} // namespace } // namespace

View File

@ -149,6 +149,20 @@ public:
} }
} }
void SetParameter(bool& var, std::string const& name);
bool SetBooleanSwitch(bool& val, std::string const name) {
// issues a warning if format is wrong
const PARAM_VEC *params = GetParam(name);
val = (params && params->size());
if (val && params->size() != 1)
{
TRACE_ERR("ERROR: wrong format for switch -" << name);
return false;
}
return true;
}
}; };
template<> template<>

View File

@ -305,35 +305,38 @@ void ScoreComponentCollection::PlusEquals(const FeatureFunction* sp, const Score
} }
} }
void ScoreComponentCollection::OutputAllFeatureScores(std::ostream &out) const void
ScoreComponentCollection::
OutputAllFeatureScores(std::ostream &out, bool with_labels) const
{ {
std::string lastName = ""; std::string lastName = "";
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) { for( size_t i=0; i<sff.size(); i++ ) {
const StatefulFeatureFunction *ff = sff[i]; const StatefulFeatureFunction *ff = sff[i];
if (ff->IsTuneable()) { if (ff->IsTuneable()) {
OutputFeatureScores( out, ff, lastName ); OutputFeatureScores(out, ff, lastName, with_labels);
} }
} }
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions(); const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
for( size_t i=0; i<slf.size(); i++ ) { for( size_t i=0; i<slf.size(); i++ ) {
const StatelessFeatureFunction *ff = slf[i]; const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) { if (ff->IsTuneable()) {
OutputFeatureScores( out, ff, lastName ); OutputFeatureScores(out, ff, lastName, with_labels);
} }
} }
} }
void ScoreComponentCollection::OutputFeatureScores( std::ostream& out void
, const FeatureFunction *ff ScoreComponentCollection::
, std::string &lastName ) const OutputFeatureScores(std::ostream& out, FeatureFunction const* ff,
std::string &lastName, bool with_labels) const
{ {
const StaticData &staticData = StaticData::Instance(); // const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.options().nbest.include_feature_labels; // bool labeledOutput = staticData.options().nbest.include_feature_labels;
// regular features (not sparse) // regular features (not sparse)
if (ff->HasTuneableComponents()) { if (ff->HasTuneableComponents()) {
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) { if( with_labels && lastName != ff->GetScoreProducerDescription() ) {
lastName = ff->GetScoreProducerDescription(); lastName = ff->GetScoreProducerDescription();
out << " " << lastName << "="; out << " " << lastName << "=";
} }

View File

@ -433,10 +433,9 @@ public:
m_scores.merge(other.m_scores); m_scores.merge(other.m_scores);
} }
void OutputAllFeatureScores(std::ostream &out) const; void OutputAllFeatureScores(std::ostream &out, bool with_labels) const;
void OutputFeatureScores( std::ostream& out void OutputFeatureScores(std::ostream& out, Moses::FeatureFunction const* ff,
, const Moses::FeatureFunction *ff std::string &lastName, bool with_labels) const;
, std::string &lastName ) const;
#ifdef MPI_ENABLE #ifdef MPI_ENABLE
public: public:

View File

@ -166,7 +166,8 @@ aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
void void
Sentence:: Sentence::
init(string line, std::vector<FactorType> const& factorOrder) init(string line, std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
{ {
using namespace std; using namespace std;
const StaticData &SD = StaticData::Instance(); const StaticData &SD = StaticData::Instance();
@ -182,7 +183,8 @@ init(string line, std::vector<FactorType> const& factorOrder)
aux_interpret_dlt(line); // some poorly documented cache-based stuff aux_interpret_dlt(line); // some poorly documented cache-based stuff
// if sentences is specified as "<passthrough tag1=""/>" // if sentences is specified as "<passthrough tag1=""/>"
if (SD.IsPassthroughEnabled() || SD.options().nbest.include_passthrough) { if (SD.options().output.PrintPassThrough ||
SD.options().nbest.include_passthrough) {
string pthru = PassthroughSGML(line,"passthrough"); string pthru = PassthroughSGML(line,"passthrough");
this->SetPassthroughInformation(pthru); this->SetPassthroughInformation(pthru);
} }
@ -230,12 +232,14 @@ init(string line, std::vector<FactorType> const& factorOrder)
int int
Sentence:: Sentence::
Read(std::istream& in,const std::vector<FactorType>& factorOrder) Read(std::istream& in,
const std::vector<FactorType>& factorOrder,
AllOptions const& opts)
{ {
std::string line; std::string line;
if (getline(in, line, '\n').eof()) if (getline(in, line, '\n').eof())
return 0; return 0;
init(line, factorOrder); init(line, factorOrder, opts);
return 1; return 1;
} }
@ -366,12 +370,14 @@ CreateFromString(vector<FactorType> const& FOrder, string const& phraseString)
} }
Sentence:: Sentence::
Sentence(size_t const transId, string const& stext, Sentence(size_t const transId,
string const& stext,
AllOptions const& opts,
vector<FactorType> const* IFO) vector<FactorType> const* IFO)
: InputType(transId) : InputType(transId)
{ {
if (IFO) init(stext, *IFO); if (IFO) init(stext, *IFO, opts);
else init(stext, StaticData::Instance().GetInputFactorOrder()); else init(stext, StaticData::Instance().GetInputFactorOrder(), opts);
} }
} }

View File

@ -1,6 +1,4 @@
// -*- c++ -*- // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
// $Id$
/*********************************************************************** /***********************************************************************
Moses - factored phrase-based language decoder Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh Copyright (C) 2006 University of Edinburgh
@ -28,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Word.h" #include "Word.h"
#include "Phrase.h" #include "Phrase.h"
#include "InputType.h" #include "InputType.h"
#include "parameters/AllOptions.h"
namespace Moses namespace Moses
{ {
@ -66,7 +65,8 @@ protected:
public: public:
Sentence(); Sentence();
Sentence(size_t const transId, std::string const& stext, Sentence(size_t const transId, std::string const& stext,
std::vector<FactorType> const* IFO = NULL); AllOptions const& opts,
std::vector<FactorType> const* IFO = NULL);
// Sentence(size_t const transId, std::string const& stext); // Sentence(size_t const transId, std::string const& stext);
~Sentence(); ~Sentence();
@ -97,7 +97,10 @@ public:
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const; void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const; std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder); virtual int
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
void Print(std::ostream& out) const; void Print(std::ostream& out) const;
TranslationOptionCollection* TranslationOptionCollection*
@ -114,7 +117,8 @@ public:
void void
init(std::string line, std::vector<FactorType> const& factorOrder); init(std::string line, std::vector<FactorType> const& factorOrder,
AllOptions const& opts);
std::vector<std::map<std::string,std::string> > const& std::vector<std::map<std::string,std::string> > const&
GetDltMeta() const { GetDltMeta() const {

View File

@ -63,7 +63,7 @@ StaticData StaticData::s_instance;
StaticData::StaticData() StaticData::StaticData()
: m_sourceStartPosMattersForRecombination(false) : m_sourceStartPosMattersForRecombination(false)
, m_requireSortingAfterSourceContext(false) , m_requireSortingAfterSourceContext(false)
, m_inputType(SentenceInput) // , m_inputType(SentenceInput)
, m_lmEnableOOVFeature(false) , m_lmEnableOOVFeature(false)
, m_isAlwaysCreateDirectTranslationOption(false) , m_isAlwaysCreateDirectTranslationOption(false)
, m_currentWeightSetting("default") , m_currentWeightSetting("default")
@ -132,23 +132,11 @@ StaticData
const PARAM_VEC *params; const PARAM_VEC *params;
// input type has to be specified BEFORE loading the phrase tables! // input type has to be specified BEFORE loading the phrase tables!
m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput); // m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
m_parameter->SetParameter(m_continuePartialTranslation, m_parameter->SetParameter(m_continuePartialTranslation,
"continue-partial-translation", false ); "continue-partial-translation", false );
std::string s_it = "text input";
if (m_inputType == 1) {
s_it = "confusion net";
}
if (m_inputType == 2) {
s_it = "word lattice";
}
if (m_inputType == 3) {
s_it = "tree";
}
VERBOSE(2,"input type is: "<<s_it<<"\n");
// use of xml in input // use of xml in input
m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough); m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
@ -181,119 +169,30 @@ StaticData
m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1); m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
m_parameter->SetParameter(m_recoverPath, "recover-input-path", false); m_parameter->SetParameter(m_includeLHSInSearchGraph,
if (m_recoverPath && m_inputType == SentenceInput) { "include-lhs-in-search-graph", false );
TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
m_recoverPath = false;
}
m_parameter->SetParameter(m_outputHypoScore, "output-hypo-score", false ); m_parameter->SetParameter<string>(m_outputUnknownsFile,
m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false ); "output-unknowns", "");
m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort);
params = m_parameter->GetParam("alignment-output-file");
if (params && params->size()) {
m_alignmentOutputFile = Scan<std::string>(params->at(0));
}
m_parameter->SetParameter( m_PrintID, "print-id", false );
m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false );
params = m_parameter->GetParam("output-word-graph");
m_outputWordGraph = (params && params->size() == 2);
params = m_parameter->GetParam("output-search-graph");
if (params && params->size()) {
if (params->size() != 1) {
std::cerr << "ERROR: wrong format for switch -output-search-graph file";
return false;
}
m_outputSearchGraph = true;
}
// ... in extended format
else if (m_parameter->GetParam("output-search-graph-extended") &&
m_parameter->GetParam("output-search-graph-extended")->size()) {
if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) {
std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file";
return false;
}
m_outputSearchGraph = true;
m_outputSearchGraphExtended = true;
} else {
m_outputSearchGraph = false;
}
params = m_parameter->GetParam("output-search-graph-slf");
if (params && params->size()) {
m_outputSearchGraphSLF = true;
} else {
m_outputSearchGraphSLF = false;
}
params = m_parameter->GetParam("output-search-graph-hypergraph");
if (params && params->size()) {
m_outputSearchGraphHypergraph = true;
} else {
m_outputSearchGraphHypergraph = false;
}
#ifdef HAVE_PROTOBUF
params = m_parameter->GetParam("output-search-graph-pb");
if (params && params->size()) {
if (params->size() != 1) {
cerr << "ERROR: wrong format for switch -output-search-graph-pb path";
return false;
}
m_outputSearchGraphPB = true;
} else
m_outputSearchGraphPB = false;
#endif
m_parameter->SetParameter( m_unprunedSearchGraph, "unpruned-search-graph", false );
m_parameter->SetParameter( m_includeLHSInSearchGraph, "include-lhs-in-search-graph", false );
m_parameter->SetParameter<string>(m_outputUnknownsFile, "output-unknowns", "");
// printing source phrase spans
m_parameter->SetParameter( m_reportSegmentation, "report-segmentation", false );
m_parameter->SetParameter( m_reportSegmentationEnriched, "report-segmentation-enriched", false );
// print all factors of output translations
m_parameter->SetParameter( m_reportAllFactors, "report-all-factors", false );
//Print Translation Options //Print Translation Options
m_parameter->SetParameter(m_printTranslationOptions, "print-translation-option", false ); m_parameter->SetParameter(m_printTranslationOptions,
"print-translation-option", false );
//Print All Derivations //Print All Derivations
m_parameter->SetParameter(m_printAllDerivations , "print-all-derivations", false ); m_parameter->SetParameter(m_printAllDerivations ,
"print-all-derivations", false );
// additional output
m_parameter->SetParameter<string>(m_detailedTranslationReportingFilePath, m_parameter->SetParameter<long>(m_startTranslationId,
"translation-details", ""); "start-translation-id", 0);
m_parameter->SetParameter<string>(m_detailedTreeFragmentsTranslationReportingFilePath,
"tree-translation-details", "");
m_parameter->SetParameter<string>(m_detailedAllTranslationReportingFilePath,
"translation-all-details", "");
m_parameter->SetParameter<long>(m_startTranslationId, "start-translation-id", 0);
//lattice samples //lattice samples
params = m_parameter->GetParam("lattice-samples");
if (params) {
if (params->size() ==2 ) {
m_latticeSamplesFilePath = params->at(0);
m_latticeSamplesSize = Scan<size_t>(params->at(1));
} else {
std::cerr <<"wrong format for switch -lattice-samples file size";
return false;
}
} else {
m_latticeSamplesSize = 0;
}
return true; return true;
} }
void void
StaticData StaticData::
::ini_compact_table_options() ini_compact_table_options()
{ {
// Compact phrase table and reordering model // Compact phrase table and reordering model
m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false ); m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false );
@ -301,8 +200,8 @@ StaticData
} }
void void
StaticData StaticData::
::ini_lm_options() ini_lm_options()
{ {
m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1); m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1);
} }
@ -349,8 +248,8 @@ StaticData
} }
void void
StaticData StaticData::
::ini_factor_maps() ini_factor_maps()
{ {
const PARAM_VEC *params; const PARAM_VEC *params;
// factor delimiter // factor delimiter
@ -380,8 +279,8 @@ StaticData
} }
void void
StaticData StaticData::
::ini_oov_options() ini_oov_options()
{ {
// unknown word processing // unknown word processing
m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false ); m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false );
@ -398,8 +297,8 @@ StaticData
} }
void void
StaticData StaticData::
::ini_zombie_options() ini_zombie_options()
{ {
//Disable discarding //Disable discarding
m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false); m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false);
@ -434,20 +333,6 @@ bool StaticData::LoadData(Parameter *parameter)
// search // search
ini_oov_options(); ini_oov_options();
// set m_nbest_options.enabled = true if necessary:
if (m_options.mbr.enabled
|| m_options.mira
|| m_options.search.consensus
|| m_outputSearchGraph
|| m_outputSearchGraphSLF
|| m_outputSearchGraphHypergraph
#ifdef HAVE_PROTOBUF
|| m_outputSearchGraphPB
#endif
|| m_latticeSamplesFilePath.size()) {
m_options.nbest.enabled = true;
}
// S2T decoder // S2T decoder
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm", m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
RecursiveCYKPlus); RecursiveCYKPlus);
@ -455,8 +340,9 @@ bool StaticData::LoadData(Parameter *parameter)
ini_zombie_options(); // probably dead, or maybe not ini_zombie_options(); // probably dead, or maybe not
m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", NOT_FOUND); m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor",
NOT_FOUND);
// FEATURE FUNCTION INITIALIZATION HAPPENS HERE =============================== // FEATURE FUNCTION INITIALIZATION HAPPENS HERE ===============================
initialize_features(); initialize_features();
@ -507,7 +393,8 @@ void StaticData::SetWeight(const FeatureFunction* sp, float weight)
m_allWeights.Assign(sp,weight); m_allWeights.Assign(sp,weight);
} }
void StaticData::SetWeights(const FeatureFunction* sp, const std::vector<float>& weights) void StaticData::SetWeights(const FeatureFunction* sp,
const std::vector<float>& weights)
{ {
m_allWeights.Resize(); m_allWeights.Resize();
m_allWeights.Assign(sp,weights); m_allWeights.Assign(sp,weights);
@ -557,8 +444,10 @@ void StaticData::LoadChartDecodingParameters()
LoadNonTerminals(); LoadNonTerminals();
// source label overlap // source label overlap
m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap", SourceLabelOverlapAdd); m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap",
m_parameter->SetParameter(m_ruleLimit, "rule-limit", DEFAULT_MAX_TRANS_OPT_SIZE); SourceLabelOverlapAdd);
m_parameter->SetParameter(m_ruleLimit, "rule-limit",
DEFAULT_MAX_TRANS_OPT_SIZE);
} }
@ -596,12 +485,16 @@ void StaticData::LoadDecodeGraphs()
} }
} }
void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const vector<size_t> &maxChartSpans) void
StaticData::
LoadDecodeGraphsOld(const vector<string> &mappingVector,
const vector<size_t> &maxChartSpans)
{ {
const vector<PhraseDictionary*>& pts = PhraseDictionary::GetColl(); const vector<PhraseDictionary*>& pts = PhraseDictionary::GetColl();
const vector<GenerationDictionary*>& gens = GenerationDictionary::GetColl(); const vector<GenerationDictionary*>& gens = GenerationDictionary::GetColl();
const std::vector<FeatureFunction*> *featuresRemaining = &FeatureFunction::GetFeatureFunctions(); const std::vector<FeatureFunction*> *featuresRemaining
= &FeatureFunction::GetFeatureFunctions();
DecodeStep *prev = 0; DecodeStep *prev = 0;
size_t prevDecodeGraphInd = 0; size_t prevDecodeGraphInd = 0;
@ -620,7 +513,8 @@ void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const
// For specifying multiple translation model // For specifying multiple translation model
decodeGraphInd = Scan<size_t>(token[0]); decodeGraphInd = Scan<size_t>(token[0]);
//the vectorList index can only increment by one //the vectorList index can only increment by one
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1, UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd
&& decodeGraphInd != prevDecodeGraphInd + 1,
"Malformed mapping"); "Malformed mapping");
if (decodeGraphInd > prevDecodeGraphInd) { if (decodeGraphInd > prevDecodeGraphInd) {
prev = NULL; prev = NULL;
@ -707,7 +601,8 @@ void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVect
decodeGraphInd = Scan<size_t>(token[0]); decodeGraphInd = Scan<size_t>(token[0]);
//the vectorList index can only increment by one //the vectorList index can only increment by one
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1, UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd
&& decodeGraphInd != prevDecodeGraphInd + 1,
"Malformed mapping"); "Malformed mapping");
if (decodeGraphInd > prevDecodeGraphInd) { if (decodeGraphInd > prevDecodeGraphInd) {
prev = NULL; prev = NULL;
@ -783,17 +678,6 @@ void StaticData::ReLoadBleuScoreFeatureParameter(float weight)
void StaticData::SetExecPath(const std::string &path) void StaticData::SetExecPath(const std::string &path)
{ {
/*
namespace fs = boost::filesystem;
fs::path full_path( fs::initial_path<fs::path>() );
full_path = fs::system_complete( fs::path( path ) );
//Without file name
m_binPath = full_path.parent_path().string();
*/
// NOT TESTED // NOT TESTED
size_t pos = path.rfind("/"); size_t pos = path.rfind("/");
if (pos != string::npos) { if (pos != string::npos) {
@ -810,34 +694,33 @@ const string &StaticData::GetBinDirectory() const
float StaticData::GetWeightWordPenalty() const float StaticData::GetWeightWordPenalty() const
{ {
float weightWP = GetWeight(&WordPenaltyProducer::Instance()); float weightWP = GetWeight(&WordPenaltyProducer::Instance());
//VERBOSE(1, "Read weightWP from translation sytem: " << weightWP << std::endl);
return weightWP; return weightWP;
} }
void void
StaticData StaticData::
::InitializeForInput(ttasksptr const& ttask) const InitializeForInput(ttasksptr const& ttask) const
{ {
const std::vector<FeatureFunction*> &producers const std::vector<FeatureFunction*> &producers
= FeatureFunction::GetFeatureFunctions(); = FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) { for(size_t i=0; i<producers.size(); ++i) {
FeatureFunction &ff = *producers[i]; FeatureFunction &ff = *producers[i];
if (! IsFeatureFunctionIgnored(ff)) { if (! IsFeatureFunctionIgnored(ff)) {
Timer iTime; Timer iTime;
iTime.start(); iTime.start();
ff.InitializeForInput(ttask); ff.InitializeForInput(ttask);
VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription() << " )" VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription()
<< "= " << iTime << endl); << " )" << "= " << iTime << endl);
} }
} }
} }
void void
StaticData StaticData::
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
{ {
const std::vector<FeatureFunction*> &producers const std::vector<FeatureFunction*> &producers
= FeatureFunction::GetFeatureFunctions(); = FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) { for(size_t i=0; i<producers.size(); ++i) {
FeatureFunction &ff = *producers[i]; FeatureFunction &ff = *producers[i];
if (! IsFeatureFunctionIgnored(ff)) { if (! IsFeatureFunctionIgnored(ff)) {

View File

@ -82,9 +82,6 @@ protected:
// Initial = 0 = can be used when creating poss trans // Initial = 0 = can be used when creating poss trans
// Other = 1 = used to calculate LM score once all steps have been processed // Other = 1 = used to calculate LM score once all steps have been processed
float float
// m_beamWidth,
// m_earlyDiscardingThreshold,
// m_translationOptionThreshold,
m_wordDeletionWeight; m_wordDeletionWeight;
@ -94,15 +91,9 @@ protected:
// -ve = no limit on distortion // -ve = no limit on distortion
// 0 = no disortion (monotone in old pharaoh) // 0 = no disortion (monotone in old pharaoh)
bool m_reorderingConstraint; //! use additional reordering constraints bool m_reorderingConstraint; //! use additional reordering constraints
// bool m_useEarlyDistortionCost;
// size_t m_maxHypoStackSize; //! hypothesis-stack size that triggers pruning
// size_t m_minHypoStackDiversity; //! minimum number of hypothesis in stack for each source word coverage;
BookkeepingOptions m_bookkeeping_options; BookkeepingOptions m_bookkeeping_options;
size_t m_latticeSamplesSize; size_t m_latticeSamplesSize;
// size_t m_maxNoTransOptPerCoverage;
// size_t m_maxNoPartTransOpt;
// size_t m_maxPhraseLength;
std::string m_latticeSamplesFilePath; std::string m_latticeSamplesFilePath;
bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them
@ -116,48 +107,31 @@ protected:
bool m_printTranslationOptions; bool m_printTranslationOptions;
bool m_sourceStartPosMattersForRecombination; bool m_sourceStartPosMattersForRecombination;
bool m_recoverPath; // bool m_recoverPath;
bool m_outputHypoScore; // bool m_outputHypoScore;
bool m_requireSortingAfterSourceContext; bool m_requireSortingAfterSourceContext;
// SearchAlgorithm m_searchAlgorithm; // SearchAlgorithm m_searchAlgorithm;
InputTypeEnum m_inputType; // InputTypeEnum m_inputType;
mutable size_t m_verboseLevel; mutable size_t m_verboseLevel;
bool m_reportSegmentation; // bool m_reportSegmentation;
bool m_reportSegmentationEnriched; // bool m_reportSegmentationEnriched;
bool m_reportAllFactors; // bool m_reportAllFactors;
std::string m_detailedTranslationReportingFilePath; // std::string m_detailedTranslationReportingFilePath;
std::string m_detailedTreeFragmentsTranslationReportingFilePath; // std::string m_detailedTreeFragmentsTranslationReportingFilePath;
// std::string m_detailedAllTranslationReportingFilePath;
std::string m_detailedAllTranslationReportingFilePath; // bool m_PrintAlignmentInfo;
// bool m_PrintID;
bool m_PrintAlignmentInfo; // bool m_PrintPassthroughInformation;
// std::string m_alignmentOutputFile;
bool m_PrintID;
bool m_PrintPassthroughInformation;
std::string m_alignmentOutputFile;
std::string m_factorDelimiter; //! by default, |, but it can be changed std::string m_factorDelimiter; //! by default, |, but it can be changed
XmlInputType m_xmlInputType; //! method for handling sentence XML input XmlInputType m_xmlInputType; //! method for handling sentence XML input
std::pair<std::string,std::string> m_xmlBrackets; //! strings to use as XML tags' opening and closing brackets. Default are "<" and ">" std::pair<std::string,std::string> m_xmlBrackets; //! strings to use as XML tags' opening and closing brackets. Default are "<" and ">"
// bool m_mbr; //! use MBR decoder
// bool m_useLatticeMBR; //! use MBR decoder
// bool m_mira; // do mira training
// bool m_useConsensusDecoding; //! Use Consensus decoding (DeNero et al 2009)
// size_t m_mbrSize; //! number of translation candidates considered
// float m_mbrScale; //! scaling factor for computing marginal probability of candidate translation
// size_t m_lmbrPruning; //! average number of nodes per word wanted in pruned lattice
// std::vector<float> m_lmbrThetas; //! theta(s) for lattice mbr calculation
// bool m_useLatticeHypSetForLatticeMBR; //! to use nbest as hypothesis set during lattice MBR
// float m_lmbrPrecision; //! unigram precision theta - see Tromble et al 08 for more details
// float m_lmbrPRatio; //! decaying factor for ngram thetas - see Tromble et al 08 for more details
// float m_lmbrMapWeight; //! Weight given to the map solution. See Kumar et al 09 for details
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1) size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
bool m_lmEnableOOVFeature; bool m_lmEnableOOVFeature;
@ -167,15 +141,15 @@ protected:
bool m_isAlwaysCreateDirectTranslationOption; bool m_isAlwaysCreateDirectTranslationOption;
//! constructor. only the 1 static variable can be created //! constructor. only the 1 static variable can be created
bool m_outputWordGraph; //! whether to output word graph // bool m_outputWordGraph; //! whether to output word graph
bool m_outputSearchGraph; //! whether to output search graph // bool m_outputSearchGraph; //! whether to output search graph
bool m_outputSearchGraphExtended; //! ... in extended format // bool m_outputSearchGraphExtended; //! ... in extended format
bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF) // bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF)
bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph // bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph
#ifdef HAVE_PROTOBUF #ifdef HAVE_PROTOBUF
bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf // bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf
#endif #endif
bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only) // bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only)
bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph
std::string m_outputUnknownsFile; //! output unknowns in this file std::string m_outputUnknownsFile; //! output unknowns in this file
@ -190,7 +164,7 @@ protected:
Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal; Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
SourceLabelOverlap m_sourceLabelOverlap; SourceLabelOverlap m_sourceLabelOverlap;
UnknownLHSList m_unknownLHS; UnknownLHSList m_unknownLHS;
WordAlignmentSort m_wordAlignmentSort; // WordAlignmentSort m_wordAlignmentSort;
int m_threadCount; int m_threadCount;
long m_startTranslationId; long m_startTranslationId;
@ -229,10 +203,6 @@ protected:
const StatefulFeatureFunction* m_treeStructure; const StatefulFeatureFunction* m_treeStructure;
// number of nonterminal labels
// size_t m_nonTerminalSize;
void ini_compact_table_options(); void ini_compact_table_options();
void ini_consensus_decoding_options(); void ini_consensus_decoding_options();
void ini_cube_pruning_options(); void ini_cube_pruning_options();
@ -278,7 +248,8 @@ public:
} }
#endif #endif
//! Load data into static instance. This function is required as LoadData() is not const //! Load data into static instance. This function is required as
// LoadData() is not const
static bool LoadDataStatic(Parameter *parameter, const std::string &execPath); static bool LoadDataStatic(Parameter *parameter, const std::string &execPath);
//! Main function to load everything. Also initialize the Parameter object //! Main function to load everything. Also initialize the Parameter object
@ -336,22 +307,6 @@ public:
bool IsWordDeletionEnabled() const { bool IsWordDeletionEnabled() const {
return m_wordDeletionEnabled; return m_wordDeletionEnabled;
} }
// size_t GetMaxHypoStackSize() const {
// return m_options.search.stack_size;
// }
// size_t GetMinHypoStackDiversity() const {
// return m_options.search.stack_diversity;
// }
size_t IsPathRecoveryEnabled() const {
return m_recoverPath;
}
bool IsIDEnabled() const {
return m_PrintID;
}
bool IsPassthroughEnabled() const {
return m_PrintPassthroughInformation;
}
int GetMaxDistortion() const { int GetMaxDistortion() const {
return m_options.reordering.max_distortion; return m_options.reordering.max_distortion;
@ -384,47 +339,6 @@ public:
void SetVerboseLevel(int x) const { void SetVerboseLevel(int x) const {
m_verboseLevel = x; m_verboseLevel = x;
} }
char GetReportSegmentation() const {
if (m_reportSegmentation) return 1;
if (m_reportSegmentationEnriched) return 2;
return 0;
}
void SetReportSegmentation(const int &val) {
if (val == 0)
m_reportSegmentation = m_reportSegmentationEnriched = false;
else if (val == 1)
m_reportSegmentation = true;
else if (val == 2)
m_reportSegmentationEnriched = true;
else
std::cerr << "Warning: Invalid value for reportSegmentation (0 - 2)! Ignoring";
}
bool GetReportAllFactors() const {
return m_reportAllFactors;
}
bool IsDetailedTranslationReportingEnabled() const {
return !m_detailedTranslationReportingFilePath.empty();
}
bool IsDetailedAllTranslationReportingEnabled() const {
return !m_detailedAllTranslationReportingFilePath.empty();
}
const std::string &GetDetailedTranslationReportingFilePath() const {
return m_detailedTranslationReportingFilePath;
}
bool IsDetailedTreeFragmentsTranslationReportingEnabled() const {
return !m_detailedTreeFragmentsTranslationReportingFilePath.empty();
}
const std::string &GetDetailedTreeFragmentsTranslationReportingFilePath() const {
return m_detailedTreeFragmentsTranslationReportingFilePath;
}
// bool IsLabeledNBestList() const {
// return m_options.nbest.include_feature_labels;
// }
bool UseMinphrInMemory() const { bool UseMinphrInMemory() const {
return m_minphrMemory; return m_minphrMemory;
@ -434,19 +348,6 @@ public:
return m_minlexrMemory; return m_minlexrMemory;
} }
// for mert
// size_t GetNBestSize() const {
// return m_options.nbest.nbest_size;
// }
// const std::string &GetNBestFilePath() const {
// return m_options.nbest.output_file_path;
// }
// bool IsNBestEnabled() const {
// return m_options.nbest.enabled;
// }
size_t GetLatticeSamplesSize() const { size_t GetLatticeSamplesSize() const {
return m_latticeSamplesSize; return m_latticeSamplesSize;
} }
@ -455,22 +356,6 @@ public:
return m_latticeSamplesFilePath; return m_latticeSamplesFilePath;
} }
// size_t GetNBestFactor() const {
// return m_options.nbest.factor;
// }
bool GetOutputWordGraph() const {
return m_outputWordGraph;
}
//! Sets the global score vector weights for a given FeatureFunction.
InputTypeEnum GetInputType() const {
return m_inputType;
}
// SearchAlgorithm GetSearchAlgorithm() const {
// return m_searchAlgorithm;
// }
bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const { bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const {
if (algo == DefaultSearchAlgorithm) if (algo == DefaultSearchAlgorithm)
algo = m_options.search.algo; algo = m_options.search.algo;
@ -577,33 +462,36 @@ public:
return m_lmEnableOOVFeature; return m_lmEnableOOVFeature;
} }
bool GetOutputSearchGraph() const { // bool GetOutputSearchGraph() const {
return m_outputSearchGraph; // return m_outputSearchGraph;
} // }
void SetOutputSearchGraph(bool outputSearchGraph) {
m_outputSearchGraph = outputSearchGraph; // void SetOutputSearchGraph(bool outputSearchGraph) {
} // m_outputSearchGraph = outputSearchGraph;
bool GetOutputSearchGraphExtended() const { // }
return m_outputSearchGraphExtended;
} // bool GetOutputSearchGraphExtended() const {
bool GetOutputSearchGraphSLF() const { // return m_outputSearchGraphExtended;
return m_outputSearchGraphSLF; // }
} // GetOutputSearchGraphSLF() const {
bool GetOutputSearchGraphHypergraph() const { // return m_outputSearchGraphSLF;
return m_outputSearchGraphHypergraph; // }
} // bool GetOutputSearchGraphHypergraph() const {
#ifdef HAVE_PROTOBUF // return m_outputSearchGraphHypergraph;
bool GetOutputSearchGraphPB() const { // }
return m_outputSearchGraphPB;
} // #ifdef HAVE_PROTOBUF
#endif // bool GetOutputSearchGraphPB() const {
// return m_outputSearchGraphPB;
// }
// #endif
const std::string& GetOutputUnknownsFile() const { const std::string& GetOutputUnknownsFile() const {
return m_outputUnknownsFile; return m_outputUnknownsFile;
} }
bool GetUnprunedSearchGraph() const { // bool GetUnprunedSearchGraph() const {
return m_unprunedSearchGraph; // return m_unprunedSearchGraph;
} // }
bool GetIncludeLHSInSearchGraph() const { bool GetIncludeLHSInSearchGraph() const {
return m_includeLHSInSearchGraph; return m_includeLHSInSearchGraph;
@ -640,9 +528,9 @@ public:
return m_sourceLabelOverlap; return m_sourceLabelOverlap;
} }
bool GetOutputHypoScore() const { // bool GetOutputHypoScore() const {
return m_outputHypoScore; // return m_outputHypoScore;
} // }
size_t GetRuleLimit() const { size_t GetRuleLimit() const {
return m_ruleLimit; return m_ruleLimit;
} }
@ -675,16 +563,16 @@ public:
return m_bookkeeping_options.need_alignment_info; return m_bookkeeping_options.need_alignment_info;
// return m_needAlignmentInfo; // return m_needAlignmentInfo;
} }
const std::string &GetAlignmentOutputFile() const { // const std::string &GetAlignmentOutputFile() const {
return m_alignmentOutputFile; // return m_alignmentOutputFile;
} // }
bool PrintAlignmentInfo() const { // bool PrintAlignmentInfo() const {
return m_PrintAlignmentInfo; // return m_PrintAlignmentInfo;
} // }
WordAlignmentSort GetWordAlignmentSort() const { // WordAlignmentSort GetWordAlignmentSort() const {
return m_wordAlignmentSort; // return m_wordAlignmentSort;
} // }
bool GetHasAlternateWeightSettings() const { bool GetHasAlternateWeightSettings() const {
return m_weightSetting.size() > 0; return m_weightSetting.size() > 0;

View File

@ -26,12 +26,12 @@ void Manager::OutputBest(OutputCollector *collector) const
const SHyperedge *best = GetBestSHyperedge(); const SHyperedge *best = GetBestSHyperedge();
if (best == NULL) { if (best == NULL) {
VERBOSE(1, "NO BEST TRANSLATION" << std::endl); VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) { if (options().output.ReportHypoScore) {
out << "0 "; out << "0 ";
} }
out << '\n'; out << '\n';
} else { } else {
if (StaticData::Instance().GetOutputHypoScore()) { if (options().output.ReportHypoScore) {
out << best->label.score << " "; out << best->label.score << " ";
} }
Phrase yield = GetOneBestTargetYield(*best); Phrase yield = GetOneBestTargetYield(*best);
@ -49,12 +49,10 @@ void Manager::OutputBest(OutputCollector *collector) const
void Manager::OutputNBest(OutputCollector *collector) const void Manager::OutputNBest(OutputCollector *collector) const
{ {
if (collector) { if (collector) {
const StaticData &staticData = StaticData::Instance();
long translationId = m_source.GetTranslationId(); long translationId = m_source.GetTranslationId();
KBestExtractor::KBestVec nBestList; KBestExtractor::KBestVec nBestList;
ExtractKBest(staticData.options().nbest.nbest_size, nBestList, ExtractKBest(options().nbest.nbest_size, nBestList,
staticData.options().nbest.only_distinct); options().nbest.only_distinct);
OutputNBestList(collector, nBestList, translationId); OutputNBestList(collector, nBestList, translationId);
} }
} }
@ -111,7 +109,8 @@ void Manager::OutputNBestList(OutputCollector *collector,
out << translationId << " ||| "; out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false); OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| "; out << " ||| ";
derivation.scoreBreakdown.OutputAllFeatureScores(out); bool with_labels = options().nbest.include_feature_labels;
derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels);
out << " ||| " << derivation.score; out << " ||| " << derivation.score;
// optionally, print word alignments // optionally, print word alignments

View File

@ -66,7 +66,7 @@ template<typename RuleTrie>
TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase( TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase(
const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob) const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob)
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &SD = StaticData::Instance();
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
UnknownWordPenaltyProducer::Instance(); UnknownWordPenaltyProducer::Instance();
@ -82,8 +82,8 @@ TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase(
targetPhrase->EvaluateInIsolation(srcPhrase); targetPhrase->EvaluateInIsolation(srcPhrase);
targetPhrase->SetTargetLHS(&targetLhs); targetPhrase->SetTargetLHS(&targetLhs);
targetPhrase->SetAlignmentInfo("0-0"); targetPhrase->SetAlignmentInfo("0-0");
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || if (!SD.options().output.detailed_tree_transrep_filepath.empty() ||
staticData.GetTreeStructure() != NULL) { SD.GetTreeStructure() != NULL) {
std::string value = "[ " + targetLhs[0]->GetString().as_string() + " " + std::string value = "[ " + targetLhs[0]->GetString().as_string() + " " +
oov[0]->GetString().as_string() + " ]"; oov[0]->GetString().as_string() + " ]";
targetPhrase->SetProperty("Tree", value); targetPhrase->SetProperty("Tree", value);

View File

@ -45,7 +45,11 @@ void TabbedSentence::CreateFromString(const std::vector<FactorType> &factorOrder
} }
} }
int TabbedSentence::Read(std::istream& in, const std::vector<FactorType>& factorOrder) int
TabbedSentence::
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
{ {
TabbedColumns allColumns; TabbedColumns allColumns;
@ -58,14 +62,14 @@ int TabbedSentence::Read(std::istream& in, const std::vector<FactorType>& factor
if(allColumns.size() < 2) { if(allColumns.size() < 2) {
std::stringstream dummyStream; std::stringstream dummyStream;
dummyStream << line << std::endl; dummyStream << line << std::endl;
return Sentence::Read(dummyStream, factorOrder); return Sentence::Read(dummyStream, factorOrder, opts);
} else { } else {
m_columns.resize(allColumns.size() - 1); m_columns.resize(allColumns.size() - 1);
std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin()); std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
std::stringstream dummyStream; std::stringstream dummyStream;
dummyStream << allColumns[0] << std::endl; dummyStream << allColumns[0] << std::endl;
return Sentence::Read(dummyStream, factorOrder); return Sentence::Read(dummyStream, factorOrder, opts);
} }
} }

View File

@ -67,7 +67,9 @@ public:
virtual void CreateFromString(const std::vector<FactorType> &factorOrder virtual void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::string &tabbedString); , const std::string &tabbedString);
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder); virtual int
Read(std::istream& in,const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
const TabbedColumns& GetColumns() const { const TabbedColumns& GetColumns() const {
return m_columns; return m_columns;

View File

@ -44,7 +44,7 @@ using namespace boost::algorithm;
namespace Moses namespace Moses
{ {
typename PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache; PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache;
PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line) PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line)
:PhraseDictionary(line, true) :PhraseDictionary(line, true)

View File

@ -25,7 +25,7 @@ namespace Moses
{ {
boost::thread_specific_ptr<typename TargetPhraseCollectionCache::CacheMap> boost::thread_specific_ptr<TargetPhraseCollectionCache::CacheMap>
TargetPhraseCollectionCache::m_phraseCache; TargetPhraseCollectionCache::m_phraseCache;
} }

View File

@ -59,6 +59,18 @@ $(TOP)/moses/TranslationModel/UG//mmsapt
$(TOP)/util//kenutil $(TOP)/util//kenutil
; ;
exe check-coverage :
check-coverage.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
$(TOP)/util//kenutil
;
exe sim-pe : exe sim-pe :
sim-pe.cc sim-pe.cc
$(TOP)/moses//moses $(TOP)/moses//moses

View File

@ -17,7 +17,7 @@ echo $$d
endef endef
MOSES_ROOT := $(shell $(find_moses_root)) MOSES_ROOT := $(shell $(find_moses_root))
$(info MOSES_ROOT=${MOSES_ROOT})
# =============================================================================== # ===============================================================================
# COMPILATION PREFERENCES # COMPILATION PREFERENCES
# =============================================================================== # ===============================================================================
@ -35,7 +35,9 @@ CXXFLAGS += -DMAX_NUM_FACTORS=4
CXXFLAGS += -DKENLM_MAX_ORDER=5 CXXFLAGS += -DKENLM_MAX_ORDER=5
CXXFLAGS += -DWITH_THREADS CXXFLAGS += -DWITH_THREADS
CXXFLAGS += -DNO_MOSES CXXFLAGS += -DNO_MOSES
CXXFLAGS += -I${MOSES_ROOT} -I. CXXFLAGS += -DMMT
CXXFLAGS += -I$(dir ${MOSES_ROOT})mmt-only
CXXFLAGS += -I${MOSES_ROOT} -I. -I${MOSES_ROOT}/opt/include
ifeq ($(variant),debug) ifeq ($(variant),debug)
CXXFLAGS += -ggdb -O0 CXXFLAGS += -ggdb -O0
@ -45,7 +47,7 @@ else ifeq ($(variant),syntax)
CXXFLAGS += -fsyntax-only CXXFLAGS += -fsyntax-only
endif endif
# LDFLAGS = -L${MOSES_ROOT}/lib -L ./lib/ LDFLAGS = -L${MOSES_ROOT}/opt/lib64 -L./lib/
# WDIR = build/$(variant)/${HOSTTYPE}/${KERNEL} # WDIR = build/$(variant)/${HOSTTYPE}/${KERNEL}
WDIR = build/$(variant) WDIR = build/$(variant)
@ -60,14 +62,22 @@ nil:
# libraries required # libraries required
LIBS = m z bz2 pthread dl ${BOOSTLIBS} BOOSTLIBS := program_options iostreams thread system filesystem
#LIBS += tcmalloc BOOSTLIBS := $(addprefix -lboost_,${BOOSTLIBS})
BOOSTLIBS := thread system filesystem program_options iostreams ifeq ($(BOOSTLIBTAG),"")
BOOSTLIBS := $(addprefix boost_,${BOOSTLIBS})
ifdef ($(BOOSTLIBTAG),"")
BOOSTLIBS := $(addsuffix ${BOOSTLIBTAG},${BOOSTLIBS}) BOOSTLIBS := $(addsuffix ${BOOSTLIBTAG},${BOOSTLIBS})
endif endif
STATIC_LIBS = m bz2 z dl rt
DYNAMIC_LIBS = pthread
#DYNAMIC_LIBS += tcmalloc
LIBS = -Wl,-B$(link)
LIBS += -L${MOSES_ROOT}/opt/lib64 ${BOOSTLIBS}
LIBS += $(addprefix -l,${STATIC_LIBS})
LIBS += -Wl,-Bdynamic
LIBS += $(addprefix -l,${DYNAMIC_LIBS})
cc2obj = $(addsuffix .o,$(patsubst ${MOSES_ROOT}%,$(WDIR)%,\ cc2obj = $(addsuffix .o,$(patsubst ${MOSES_ROOT}%,$(WDIR)%,\
$(patsubst .%,$(WDIR)%,$(basename $1)))) $(patsubst .%,$(WDIR)%,$(basename $1))))
cc2exe = $(addprefix ./bin/$(variant)/,$(basename $(notdir $1))) cc2exe = $(addprefix ./bin/$(variant)/,$(basename $(notdir $1)))
@ -79,7 +89,7 @@ DEP += $(basename $(call cc2obj,$1)).d
$(call cc2obj,$1): $1 $(call cc2obj,$1): $1
@echo -e "COMPILING $1" @echo -e "COMPILING $1"
@mkdir -p $$(@D) @mkdir -p $$(@D)
@${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@ ${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@
endef endef
@ -90,7 +100,7 @@ $(call cc2exe,$1): $(call cc2obj,$1) $(LIBOBJ)
ifneq ($(variant),syntax) ifneq ($(variant),syntax)
@echo -e "LINKING $$@" @echo -e "LINKING $$@"
@mkdir -p $${@D} @mkdir -p $${@D}
@${CXX} ${CXXFLAGS} -o $$@ $(LIBOBJ) $(addprefix -l,${LIBS}) $$< ${CXX} ${CXXFLAGS} -o $$@ $$< $(LIBOBJ) ${LIBS}
endif endif
endef endef
@ -106,7 +116,8 @@ skip += ug_splice_arglist.cc
# skip += ug_lexical_reordering.cc # skip += ug_lexical_reordering.cc
# objects from elsewhere in the moses tree that are needed # objects from elsewhere in the moses tree that are needed
extra = ${MOSES_ROOT}/util/exception.cc extra = ${MOSES_ROOT}/util/exception.cc
extra += ${MOSES_ROOT}/util/integer_to_string.cc
$(foreach f,$(skip),$(eval broken+=$(shell find -name $f))) $(foreach f,$(skip),$(eval broken+=$(shell find -name $f)))
broken += $(wildcard ./mm/stashed/*) broken += $(wildcard ./mm/stashed/*)

View File

@ -0,0 +1,81 @@
// #include "mmsapt.h"
// #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
// #include "moses/TranslationTask.h"
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <boost/tokenizer.hpp>
#include <boost/shared_ptr.hpp>
#include <algorithm>
#include <iostream>
#include "mm/ug_bitext.h"
#include "generic/file_io/ug_stream.h"
#include <string>
#include <sstream>
using namespace Moses;
using namespace sapt;
using namespace std;
using namespace boost;
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
typedef mmBitext<Token> bitext_t;
struct mycmp
{
bool operator() (pair<string,uint32_t> const& a,
pair<string,uint32_t> const& b) const
{
return a.second > b.second;
}
};
string
basename(string const path, string const suffix)
{
size_t p = path.find_last_of("/");
size_t k = path.size() - suffix.size();
cout << path << " " << suffix << endl;
cout << path.substr(0,p) << " " << path.substr(k) << endl;
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
}
int main(int argc, char* argv[])
{
bitext_t B;
B.open(argv[1],argv[2],argv[3]);
string line;
string ifile = argv[4];
string docname = basename(ifile, string(".") + argv[2] + ".gz");
boost::iostreams::filtering_istream in;
ugdiss::open_input_stream(ifile,in);
while(getline(in,line))
{
cout << line << " [" << docname << "]" << endl;
vector<id_type> snt;
B.V1->fillIdSeq(line,snt);
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B.I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
{
if (m.ca() > 500) continue;
sapt::tsa::ArrayEntry I(m.lower_bound(-1));
char const* stop = m.upper_bound(-1);
map<string,uint32_t> cnt;
while (I.next != stop)
{
m.root->readEntry(I.next,I);
++cnt[B.docname(I.sid)];
}
cout << setw(8) << int(m.ca()) << " " << B.V1->toString(&snt[i],&snt[k+1]) << endl;
typedef pair<string,uint32_t> entry;
vector<entry> ranked; ranked.reserve(cnt.size());
BOOST_FOREACH(entry const& e, cnt) ranked.push_back(e);
sort(ranked.begin(),ranked.end(),mycmp());
BOOST_FOREACH(entry const& e, ranked)
cout << setw(12) << " " << e.second << " " << e.first << endl;
cout << endl;
}
}
}
}

View File

@ -0,0 +1,67 @@
// for each word in the input, keep track of the longest matching ngram covering it
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <boost/tokenizer.hpp>
#include <boost/shared_ptr.hpp>
#include <algorithm>
#include <iostream>
#include "mm/ug_bitext.h"
#include "generic/file_io/ug_stream.h"
#include <string>
#include <sstream>
using namespace Moses;
using namespace sapt;
using namespace std;
using namespace boost;
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
typedef mmBitext<Token> bitext_t;
struct mycmp
{
bool operator() (pair<string,uint32_t> const& a,
pair<string,uint32_t> const& b) const
{
return a.second > b.second;
}
};
string
basename(string const path, string const suffix)
{
size_t p = path.find_last_of("/");
size_t k = path.size() - suffix.size();
cout << path << " " << suffix << endl;
cout << path.substr(0,p) << " " << path.substr(k) << endl;
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
}
int main(int argc, char* argv[])
{
bitext_t B;
B.open(argv[1],argv[2],argv[3]);
B.V1->setDynamic(true);
string line;
string ifile = argv[4];
string docname = basename(ifile, string(".") + argv[2] + ".gz");
boost::iostreams::filtering_istream in;
ugdiss::open_input_stream(ifile,in);
while(getline(in,line))
{
cout << line << " [" << docname << "]" << endl;
vector<id_type> snt;
B.V1->fillIdSeq(line,snt);
vector<size_t> match(snt.size(),0);
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B.I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k);
for (size_t j = 0; j < m.size(); ++j)
match[i+j] = max(match[i+j], m.size());
}
for (size_t i = 0; i < snt.size(); ++i)
cout << setw(3) << match[i] << " " << (*B.V1)[snt[i]] << endl;
}
}

View File

@ -0,0 +1,70 @@
// #include "mmsapt.h"
// #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
// #include "moses/TranslationTask.h"
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <boost/tokenizer.hpp>
#include <boost/shared_ptr.hpp>
#include <algorithm>
#include <iostream>
#include "mm/ug_bitext.h"
#include "generic/file_io/ug_stream.h"
#include <string>
#include <sstream>
#include "mm/ug_bitext_sampler.h"
using namespace Moses;
using namespace sapt;
using namespace std;
using namespace boost;
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
typedef mmBitext<Token> bitext_t;
struct mycmp
{
bool operator() (pair<string,uint32_t> const& a,
pair<string,uint32_t> const& b) const
{
return a.second > b.second;
}
};
string
basename(string const path, string const suffix)
{
size_t p = path.find_last_of("/");
size_t k = path.size() - suffix.size();
cout << path << " " << suffix << endl;
cout << path.substr(0,p) << " " << path.substr(k) << endl;
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
}
int main(int argc, char* argv[])
{
boost::intrusive_ptr<bitext_t> B(new bitext_t);
B->open(argv[1],argv[2],argv[3]);
string line;
string ifile = argv[4];
string docname = basename(ifile, string(".") + argv[2] + ".gz");
boost::iostreams::filtering_istream in;
ugdiss::open_input_stream(ifile,in);
while(getline(in,line))
{
cout << line << " [" << docname << "]" << endl;
vector<id_type> snt;
B->V1->fillIdSeq(line,snt);
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B->I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
{
SPTR<SamplingBias const> zilch;
BitextSampler<Token> s(B.get(), m, zilch, 1000, 1000,
sapt::random_sampling);
s();
cout << m.size() << " " << s.stats()->trg.size() << endl;
}
}
}
}

View File

@ -29,7 +29,7 @@ HOST ?= $(shell hostname)
HOSTTYPE ?= $(shell uname -m) HOSTTYPE ?= $(shell uname -m)
KERNEL = $(shell uname -r) KERNEL = $(shell uname -r)
MOSES_ROOT = ${HOME}/code/mosesdecoder MOSES_ROOT ?= ${HOME}/code/mosesdecoder
WDIR = build/${HOSTTYPE}/${KERNEL}/${OPTI} WDIR = build/${HOSTTYPE}/${KERNEL}/${OPTI}
VPATH = ${HOME}/code/mosesdecoder/ VPATH = ${HOME}/code/mosesdecoder/
CXXFLAGS = ${PROF} -ggdb -Wall -O${OPTI} ${INCLUDES} CXXFLAGS = ${PROF} -ggdb -Wall -O${OPTI} ${INCLUDES}

View File

@ -28,8 +28,8 @@ Bitext<Token>::agenda
while (j->nextSample(sid,offset)) while (j->nextSample(sid,offset))
{ {
aln.clear(); aln.clear();
int po_fwd = Moses::LRModel::NONE; int po_fwd = LRModel::NONE;
int po_bwd = Moses::LRModel::NONE; int po_bwd = LRModel::NONE;
int docid = j->m_bias ? j->m_bias->GetClass(sid) : -1; int docid = j->m_bias ? j->m_bias->GetClass(sid) : -1;
bitvector* full_aln = j->fwd ? &full_alignment : NULL; bitvector* full_aln = j->fwd ? &full_alignment : NULL;

View File

@ -17,7 +17,7 @@ namespace sapt
jstats() jstats()
: my_rcnt(0), my_cnt2(0), my_wcnt(0), my_bcnt(0) : my_rcnt(0), my_cnt2(0), my_wcnt(0), my_bcnt(0)
{ {
for (int i = 0; i <= Moses::LRModel::NONE; ++i) for (int i = 0; i <= LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0; ofwd[i] = obwd[i] = 0;
my_aln.reserve(1); my_aln.reserve(1);
} }
@ -30,7 +30,7 @@ namespace sapt
my_bcnt = other.bcnt(); my_bcnt = other.bcnt();
my_aln = other.aln(); my_aln = other.aln();
indoc = other.indoc; indoc = other.indoc;
for (int i = 0; i <= Moses::LRModel::NONE; i++) for (int i = 0; i <= LRModel::NONE; i++)
{ {
ofwd[i] = other.ofwd[i]; ofwd[i] = other.ofwd[i];
obwd[i] = other.obwd[i]; obwd[i] = other.obwd[i];
@ -41,7 +41,7 @@ namespace sapt
jstats:: jstats::
dcnt_fwd(PhraseOrientation const idx) const dcnt_fwd(PhraseOrientation const idx) const
{ {
assert(idx <= Moses::LRModel::NONE); assert(idx <= LRModel::NONE);
return ofwd[idx]; return ofwd[idx];
} }
@ -49,7 +49,7 @@ namespace sapt
jstats:: jstats::
dcnt_bwd(PhraseOrientation const idx) const dcnt_bwd(PhraseOrientation const idx) const
{ {
assert(idx <= Moses::LRModel::NONE); assert(idx <= LRModel::NONE);
return obwd[idx]; return obwd[idx];
} }

View File

@ -24,8 +24,8 @@ namespace sapt
std::vector<std::pair<size_t, std::vector<unsigned char> > > my_aln; std::vector<std::pair<size_t, std::vector<unsigned char> > > my_aln;
// internal word alignment // internal word alignment
uint32_t ofwd[Moses::LRModel::NONE+1]; // forward distortion type counts uint32_t ofwd[LRModel::NONE+1]; // forward distortion type counts
uint32_t obwd[Moses::LRModel::NONE+1]; // backward distortion type counts uint32_t obwd[LRModel::NONE+1]; // backward distortion type counts
public: public:
std::map<uint32_t,uint32_t> indoc; std::map<uint32_t,uint32_t> indoc;
@ -48,8 +48,8 @@ namespace sapt
bool valid(); bool valid();
uint32_t dcnt_fwd(PhraseOrientation const idx) const; uint32_t dcnt_fwd(PhraseOrientation const idx) const;
uint32_t dcnt_bwd(PhraseOrientation const idx) const; uint32_t dcnt_bwd(PhraseOrientation const idx) const;
void fill_lr_vec(Moses::LRModel::Direction const& dir, void fill_lr_vec(LRModel::Direction const& dir,
Moses::LRModel::ModelType const& mdl, LRModel::ModelType const& mdl,
std::vector<float>& v); std::vector<float>& v);
}; };
} }

View File

@ -12,7 +12,7 @@ namespace sapt
pstats:: pstats::
pstats() : raw_cnt(0), sample_cnt(0), good(0), sum_pairs(0), in_progress(0) pstats() : raw_cnt(0), sample_cnt(0), good(0), sum_pairs(0), in_progress(0)
{ {
for (int i = 0; i <= Moses::LRModel::NONE; ++i) for (int i = 0; i <= LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0; ofwd[i] = obwd[i] = 0;
} }

View File

@ -30,8 +30,8 @@ namespace sapt
size_t sum_pairs; // total number of target phrases extracted (can be > raw_cnt) size_t sum_pairs; // total number of target phrases extracted (can be > raw_cnt)
size_t in_progress; // how many threads are currently working on this? size_t in_progress; // how many threads are currently working on this?
uint32_t ofwd[Moses::LRModel::NONE+1]; // distribution of fwd phrase orientations uint32_t ofwd[LRModel::NONE+1]; // distribution of fwd phrase orientations
uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations uint32_t obwd[LRModel::NONE+1]; // distribution of bwd phrase orientations
indoc_map_t indoc; indoc_map_t indoc;
trg_map_t trg; trg_map_t trg;
@ -43,14 +43,14 @@ namespace sapt
bool bool
add(uint64_t const pid, // target phrase id add(uint64_t const pid, // target phrase id
float const w, // sample weight (1./(# of phrases extractable)) float const w, // sample weight (1./(# of phrases extractable))
float const b, // sample bias score float const b, // sample bias score
alnvec const& a, // local alignment alnvec const& a, // local alignment
uint32_t const cnt2, // raw target phrase count uint32_t const cnt2, // raw target phrase count
uint32_t fwd_o, // fwd. phrase orientation uint32_t fwd_o, // fwd. phrase orientation
uint32_t bwd_o, // bwd. phrase orientation uint32_t bwd_o, // bwd. phrase orientation
int const docid); // document where sample was found int const docid); // document where sample was found
void void
count_sample(int const docid, // document where sample was found count_sample(int const docid, // document where sample was found
size_t const num_pairs, // # of phrases extractable here size_t const num_pairs, // # of phrases extractable here

View File

@ -74,8 +74,11 @@ BitextSampler : public Moses::reference_counter
public: public:
BitextSampler(BitextSampler const& other); BitextSampler(BitextSampler const& other);
BitextSampler const& operator=(BitextSampler const& other); BitextSampler const& operator=(BitextSampler const& other);
BitextSampler(bitext const* const bitext, typename bitext::iter const& phrase, BitextSampler(bitext const* const bitext,
SPTR<SamplingBias const> const& bias, size_t const min_samples, size_t const max_samples, typename bitext::iter const& phrase,
SPTR<SamplingBias const> const& bias,
size_t const min_samples,
size_t const max_samples,
sampling_method const method); sampling_method const method);
~BitextSampler(); ~BitextSampler();
SPTR<pstats> stats(); SPTR<pstats> stats();

View File

@ -227,7 +227,9 @@ namespace sapt
// Now sort the array // Now sort the array
if (log) *log << "sorting .... with " << threads << " threads." << std::endl; if (log) *log << "sorting .... with " << threads << " threads." << std::endl;
#ifndef NO_MOSES
double start_time = util::WallTime(); double start_time = util::WallTime();
#endif
boost::scoped_ptr<ug::ThreadPool> tpool; boost::scoped_ptr<ug::ThreadPool> tpool;
tpool.reset(new ug::ThreadPool(threads)); tpool.reset(new ug::ThreadPool(threads));
@ -252,8 +254,10 @@ namespace sapt
} }
} }
tpool.reset(); tpool.reset();
#ifndef NO_MOSES
if (log) *log << "Done sorting after " << util::WallTime() - start_time if (log) *log << "Done sorting after " << util::WallTime() - start_time
<< " seconds." << std::endl; << " seconds." << std::endl;
#endif
this->startArray = reinterpret_cast<char const*>(&(*sufa.begin())); this->startArray = reinterpret_cast<char const*>(&(*sufa.begin()));
this->endArray = reinterpret_cast<char const*>(&(*sufa.end())); this->endArray = reinterpret_cast<char const*>(&(*sufa.end()));
this->numTokens = sufa.size(); this->numTokens = sufa.size();

View File

@ -4,7 +4,7 @@ namespace sapt
{ {
using namespace std; using namespace std;
Moses::LRModel::ReorderingType po_other = Moses::LRModel::NONE; LRModel::ReorderingType po_other = LRModel::NONE;
// check if min and max in the aligmnet vector v are within the // check if min and max in the aligmnet vector v are within the
// bounds LFT and RGT and update the actual bounds L and R; update // bounds LFT and RGT and update the actual bounds L and R; update
// the total count of alignment links in the underlying phrase // the total count of alignment links in the underlying phrase
@ -83,54 +83,56 @@ namespace sapt
return ret; return ret;
} }
Moses::LRModel::ReorderingType // LRModel::ReorderingType
sapt::PhraseOrientation
find_po_fwd(vector<vector<ushort> >& a1, find_po_fwd(vector<vector<ushort> >& a1,
vector<vector<ushort> >& a2, vector<vector<ushort> >& a2,
size_t s1, size_t e1, size_t s1, size_t e1,
size_t s2, size_t e2) size_t s2, size_t e2)
{ {
if (e2 == a2.size()) // end of target sentence if (e2 == a2.size()) // end of target sentence
return Moses::LRModel::M; return LRModel::M;
size_t y = e2, L = e2, R = a2.size()-1; // won't change size_t y = e2, L = e2, R = a2.size()-1; // won't change
size_t x = e1, T = e1, B = a1.size()-1; size_t x = e1, T = e1, B = a1.size()-1;
if (e1 < a1.size() && expand_block(a1,a2,x,y,T,L,B,R) >= 0) if (e1 < a1.size() && expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::M; return LRModel::M;
B = x = s1-1; T = 0; B = x = s1-1; T = 0;
if (s1 && expand_block(a1,a2,x,y,T,L,B,R) >= 0) if (s1 && expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::S; return LRModel::S;
while (e2 < a2.size() && a2[e2].size() == 0) ++e2; while (e2 < a2.size() && a2[e2].size() == 0) ++e2;
if (e2 == a2.size()) // should never happen, actually if (e2 == a2.size()) // should never happen, actually
return Moses::LRModel::NONE; return LRModel::NONE;
if (a2[e2].back() < s1) if (a2[e2].back() < s1)
return Moses::LRModel::DL; return LRModel::DL;
if (a2[e2].front() >= e1) if (a2[e2].front() >= e1)
return Moses::LRModel::DR; return LRModel::DR;
return Moses::LRModel::NONE; return LRModel::NONE;
} }
Moses::LRModel::ReorderingType // LRModel::ReorderingType
PhraseOrientation
find_po_bwd(vector<vector<ushort> >& a1, find_po_bwd(vector<vector<ushort> >& a1,
vector<vector<ushort> >& a2, vector<vector<ushort> >& a2,
size_t s1, size_t e1, size_t s1, size_t e1,
size_t s2, size_t e2) size_t s2, size_t e2)
{ {
if (s1 == 0 && s2 == 0) return Moses::LRModel::M; if (s1 == 0 && s2 == 0) return LRModel::M;
if (s2 == 0) return Moses::LRModel::DR; if (s2 == 0) return LRModel::DR;
if (s1 == 0) return Moses::LRModel::DL; if (s1 == 0) return LRModel::DL;
size_t y = s2-1, L = 0, R = s2-1; // won't change size_t y = s2-1, L = 0, R = s2-1; // won't change
size_t x = s1-1, T = 0, B = s1-1; size_t x = s1-1, T = 0, B = s1-1;
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0) if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::M; return LRModel::M;
T = x = e1; B = a1.size()-1; T = x = e1; B = a1.size()-1;
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0) if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::S; return LRModel::S;
while (s2-- && a2[s2].size() == 0); while (s2-- && a2[s2].size() == 0);
Moses::LRModel::ReorderingType ret; LRModel::ReorderingType ret;
ret = (a2[s2].size() == 0 ? po_other : ret = (a2[s2].size() == 0 ? po_other :
a2[s2].back() < s1 ? Moses::LRModel::DR : a2[s2].back() < s1 ? LRModel::DR :
a2[s2].front() >= e1 ? Moses::LRModel::DL : a2[s2].front() >= e1 ? LRModel::DL :
po_other); po_other);
#if 0 #if 0
cout << "s1=" << s1 << endl; cout << "s1=" << s1 << endl;

View File

@ -12,7 +12,7 @@ namespace sapt {
#ifdef NO_MOSES #ifdef NO_MOSES
class LRModel{ class LRModel{
public:
enum ModelType { Monotonic, MSD, MSLR, LeftRight, None }; enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
enum Direction { Forward, Backward, Bidirectional }; enum Direction { Forward, Backward, Bidirectional };

View File

@ -26,8 +26,8 @@ namespace sapt
uint32_t raw1, raw2, sample1, sample2, good1, good2, joint; uint32_t raw1, raw2, sample1, sample2, good1, good2, joint;
float cum_bias; float cum_bias;
std::vector<float> fvals; std::vector<float> fvals;
float dfwd[Moses::LRModel::NONE+1]; // distortion counts // counts or probs? float dfwd[LRModel::NONE+1]; // distortion counts // counts or probs?
float dbwd[Moses::LRModel::NONE+1]; // distortion counts float dbwd[LRModel::NONE+1]; // distortion counts
std::vector<unsigned char> aln; std::vector<unsigned char> aln;
float score; float score;
bool inverse; bool inverse;
@ -125,7 +125,7 @@ namespace sapt
// } // }
// should we do that here or leave the raw counts? // should we do that here or leave the raw counts?
for (int i = 0; i <= Moses::LRModel::NONE; i++) for (int i = 0; i <= LRModel::NONE; i++)
{ {
PhraseOrientation po = static_cast<PhraseOrientation>(i); PhraseOrientation po = static_cast<PhraseOrientation>(i);
dfwd[i] = js.dcnt_fwd(po); dfwd[i] = js.dcnt_fwd(po);
@ -201,7 +201,7 @@ namespace sapt
, inverse(o.inverse) , inverse(o.inverse)
, indoc(o.indoc) , indoc(o.indoc)
{ {
for (int i = 0; i <= Moses::LRModel::NONE; ++i) for (int i = 0; i <= LRModel::NONE; ++i)
{ {
dfwd[i] = o.dfwd[i]; dfwd[i] = o.dfwd[i];
dbwd[i] = o.dbwd[i]; dbwd[i] = o.dbwd[i];

View File

@ -63,7 +63,9 @@ namespace Moses
, btfix(new mmbitext) , btfix(new mmbitext)
, m_bias_log(NULL) , m_bias_log(NULL)
, m_bias_loglevel(0) , m_bias_loglevel(0)
#ifndef NO_MOSES
, m_lr_func(NULL) , m_lr_func(NULL)
#endif
, m_sampling_method(random_sampling) , m_sampling_method(random_sampling)
, bias_key(((char*)this)+3) , bias_key(((char*)this)+3)
, cache_key(((char*)this)+2) , cache_key(((char*)this)+2)
@ -597,6 +599,7 @@ namespace Moses
// Evaluate with all features that can be computed using available factors // Evaluate with all features that can be computed using available factors
tp->EvaluateInIsolation(src, m_featuresToApply); tp->EvaluateInIsolation(src, m_featuresToApply);
#ifndef NO_MOSES
if (m_lr_func) if (m_lr_func)
{ {
LRModel::ModelType mdl = m_lr_func->GetModel().GetModelType(); LRModel::ModelType mdl = m_lr_func->GetModel().GetModelType();
@ -605,6 +608,7 @@ namespace Moses
pool.fill_lr_vec(dir, mdl, *scores); pool.fill_lr_vec(dir, mdl, *scores);
tp->SetExtraScores(m_lr_func, scores); tp->SetExtraScores(m_lr_func, scores);
} }
#endif
return tp; return tp;
} }
@ -863,10 +867,10 @@ namespace Moses
boost::unique_lock<boost::shared_mutex> ctxlock(context->lock); boost::unique_lock<boost::shared_mutex> ctxlock(context->lock);
if (localcache) std::cerr << "have local cache " << std::endl; if (localcache) std::cerr << "have local cache " << std::endl;
std::cerr << "BOO at " << HERE << std::endl; // std::cerr << "BOO at " << HERE << std::endl;
if (!localcache) if (!localcache)
{ {
std::cerr << "no local cache at " << HERE << std::endl; // std::cerr << "no local cache at " << HERE << std::endl;
setup_bias(ttask); setup_bias(ttask);
if (context->bias) if (context->bias)
{ {
@ -879,6 +883,7 @@ namespace Moses
if (!context->cache1) context->cache1.reset(new pstats::cache_t); if (!context->cache1) context->cache1.reset(new pstats::cache_t);
if (!context->cache2) context->cache2.reset(new pstats::cache_t); if (!context->cache2) context->cache2.reset(new pstats::cache_t);
#ifndef NO_MOSES
if (m_lr_func_name.size() && m_lr_func == NULL) if (m_lr_func_name.size() && m_lr_func == NULL)
{ {
FeatureFunction* lr = &FeatureFunction::FindFeatureFunction(m_lr_func_name); FeatureFunction* lr = &FeatureFunction::FindFeatureFunction(m_lr_func_name);
@ -887,6 +892,7 @@ namespace Moses
<< " does not seem to be a lexical reordering function!"); << " does not seem to be a lexical reordering function!");
// todo: verify that lr_func implements a hierarchical reordering model // todo: verify that lr_func implements a hierarchical reordering model
} }
#endif
} }
bool bool

View File

@ -26,7 +26,9 @@
#include "moses/TranslationModel/UG/TargetPhraseCollectionCache.h" #include "moses/TranslationModel/UG/TargetPhraseCollectionCache.h"
#ifndef NO_MOSES
#include "moses/FF/LexicalReordering/LexicalReordering.h" #include "moses/FF/LexicalReordering/LexicalReordering.h"
#endif
#include "moses/InputFileStream.h" #include "moses/InputFileStream.h"
#include "moses/FactorTypeSet.h" #include "moses/FactorTypeSet.h"
@ -82,7 +84,9 @@ namespace Moses
boost::scoped_ptr<std::ofstream> m_bias_logger; // for logging to a file boost::scoped_ptr<std::ofstream> m_bias_logger; // for logging to a file
std::ostream* m_bias_log; std::ostream* m_bias_log;
int m_bias_loglevel; int m_bias_loglevel;
#ifndef NO_MOSES
LexicalReordering* m_lr_func; // associated lexical reordering function LexicalReordering* m_lr_func; // associated lexical reordering function
#endif
std::string m_lr_func_name; // name of associated lexical reordering function std::string m_lr_func_name; // name of associated lexical reordering function
sapt::sampling_method m_sampling_method; // sampling method, see ug_bitext_sampler sapt::sampling_method m_sampling_method; // sampling method, see ug_bitext_sampler
boost::scoped_ptr<ug::ThreadPool> m_thread_pool; boost::scoped_ptr<ug::ThreadPool> m_thread_pool;

View File

@ -69,7 +69,7 @@ int main(int argc, char* argv[])
while (true) while (true)
{ {
boost::shared_ptr<Sentence> phrase(new Sentence); boost::shared_ptr<Sentence> phrase(new Sentence);
if (!phrase->Read(cin,ifo)) break; if (!phrase->Read(cin,ifo, StaticData::Instance().options())) break;
boost::shared_ptr<TranslationTask> ttask; boost::shared_ptr<TranslationTask> ttask;
ttask = TranslationTask::create(phrase); ttask = TranslationTask::create(phrase);
if (pdta) if (pdta)

View File

@ -1,7 +1,7 @@
# -*- makefile -*- # # -*- makefile -*-
MOSES_CODE=/fs/gna0/germann/code/mosesdecoder # MOSES_CODE=/fs/gna0/germann/code/mosesdecoder
MOSES_ROOT=/fs/gna0/germann/moses # MOSES_ROOT=/fs/gna0/germann/moses
LIBS = $(addprefix -l,moses icuuc icuio icui18n boost_iostreams) # LIBS = $(addprefix -l,moses icuuc icuio icui18n boost_iostreams)
ibm1-align: ibm1-align.cc # ibm1-align: ibm1-align.cc
g++ -o $@ -L ${MOSES_ROOT}/lib -I ${MOSES_CODE} $^ ${LIBS} -ggdb # g++ -o $@ -L ${MOSES_ROOT}/lib -I ${MOSES_CODE} $^ ${LIBS} -ggdb

View File

@ -1,4 +1,4 @@
// $Id$ // -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
#include <list> #include <list>
#include <vector> #include <vector>
@ -12,7 +12,7 @@
#include "TranslationModel/PhraseDictionaryTreeAdaptor.h" #include "TranslationModel/PhraseDictionaryTreeAdaptor.h"
#include "util/exception.hh" #include "util/exception.hh"
#include <boost/foreach.hpp> #include <boost/foreach.hpp>
#include "TranslationTask.h"
using namespace std; using namespace std;
namespace Moses namespace Moses
@ -41,7 +41,7 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
size_t inputSize = input.GetSize(); size_t inputSize = input.GetSize();
m_inputPathMatrix.resize(inputSize); m_inputPathMatrix.resize(inputSize);
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); size_t maxSizePhrase = ttask->options().search.max_phrase_length;
maxSizePhrase = std::min(inputSize, maxSizePhrase); maxSizePhrase = std::min(inputSize, maxSizePhrase);
// 1-word phrases // 1-word phrases
@ -234,8 +234,10 @@ CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t st
list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin(); list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin();
const DecodeStep &decodeStep = **iterStep; const DecodeStep &decodeStep = **iterStep;
static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslationLEGACY DecodeStepTranslation const& dstep
(m_source, *oldPtoc, startPos, endPos, adhereTableLimit, inputPathList); = static_cast<const DecodeStepTranslation&>(decodeStep);
dstep.ProcessInitialTransLEGACY(m_source, *oldPtoc, startPos, endPos,
adhereTableLimit, inputPathList);
// do rest of decode steps // do rest of decode steps
int indexStep = 0; int indexStep = 0;

View File

@ -186,7 +186,8 @@ void TranslationTask::Run()
// report thread number // report thread number
#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS) #if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
VERBOSE(2, "Translating line " << translationId << " in thread id " << pthread_self() << endl); VERBOSE(2, "Translating line " << translationId << " in thread id "
<< pthread_self() << endl);
#endif #endif
@ -214,8 +215,8 @@ void TranslationTask::Run()
OutputCollector* ocoll; OutputCollector* ocoll;
Timer additionalReportingTime; Timer additionalReportingTime;
additionalReportingTime.start(); additionalReportingTime.start();
boost::shared_ptr<IOWrapper> const& io = m_ioWrapper; boost::shared_ptr<IOWrapper> const& io = m_ioWrapper;
manager->OutputBest(io->GetSingleBestOutputCollector()); manager->OutputBest(io->GetSingleBestOutputCollector());
// output word graph // output word graph
@ -229,7 +230,7 @@ void TranslationTask::Run()
// Output search graph in hypergraph format for Kenneth Heafield's // Output search graph in hypergraph format for Kenneth Heafield's
// lazy hypergraph decoder; writes to stderr // lazy hypergraph decoder; writes to stderr
if (StaticData::Instance().GetOutputSearchGraphHypergraph()) { if (options().output.SearchGraphHG.size()) {
size_t transId = manager->GetSource().GetTranslationId(); size_t transId = manager->GetSource().GetTranslationId();
string fname = io->GetHypergraphOutputFileName(transId); string fname = io->GetHypergraphOutputFileName(transId);
manager->OutputSearchGraphAsHypergraph(fname, PRECISION); manager->OutputSearchGraphAsHypergraph(fname, PRECISION);

View File

@ -237,7 +237,10 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
} }
//! populate this InputType with data from in stream //! populate this InputType with data from in stream
int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder) int
TreeInput::
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
AllOptions const& opts)
{ {
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
@ -254,8 +257,8 @@ int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
stringstream strme; stringstream strme;
strme << line << endl; strme << line << endl;
Sentence::Read(strme, factorOrder); Sentence::Read(strme, factorOrder, opts);
// size input chart // size input chart
size_t sourceSize = GetSize(); size_t sourceSize = GetSize();
m_sourceChart.resize(sourceSize); m_sourceChart.resize(sourceSize);

View File

@ -53,7 +53,10 @@ public:
} }
//! populate this InputType with data from in stream //! populate this InputType with data from in stream
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder); virtual int
Read(std::istream& in,
const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
//! Output debugging info to stream out //! Output debugging info to stream out
virtual void Print(std::ostream&) const; virtual void Print(std::ostream&) const;

View File

@ -147,7 +147,11 @@ InitializeFromPCNDataType
return !cn.empty(); return !cn.empty();
} }
int WordLattice::Read(std::istream& in,const std::vector<FactorType>& factorOrder) int
WordLattice::
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
{ {
Clear(); Clear();
std::string line; std::string line;

View File

@ -43,8 +43,10 @@ public:
int InitializeFromPCNDataType(const PCN::CN& cn, const std::vector<FactorType>& factorOrder, const std::string& debug_line = ""); int InitializeFromPCNDataType(const PCN::CN& cn, const std::vector<FactorType>& factorOrder, const std::string& debug_line = "");
/** Read from PLF format (1 lattice per line) /** Read from PLF format (1 lattice per line)
*/ */
int Read(std::istream& in,const std::vector<FactorType>& factorOrder); int Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts);
/** Convert internal representation into an edge matrix /** Convert internal representation into an edge matrix
* @note edges[1][2] means there is an edge from 1 to 2 * @note edges[1][2] means there is an edge from 1 to 2
*/ */

View File

@ -21,6 +21,7 @@ namespace Moses
if (!input.init(param)) return false; if (!input.init(param)) return false;
if (!mbr.init(param)) return false; if (!mbr.init(param)) return false;
if (!lmbr.init(param)) return false; if (!lmbr.init(param)) return false;
if (!output.init(param)) return false;
param.SetParameter(mira, "mira", false); param.SetParameter(mira, "mira", false);
@ -45,12 +46,31 @@ namespace Moses
{ {
if (mbr.enabled) if (mbr.enabled)
{ {
cerr << "Error: Cannot use consensus decoding together with mbr" << endl; cerr << "Error: Cannot use consensus decoding together with mbr"
<< endl;
return false; return false;
} }
mbr.enabled = true; mbr.enabled = true;
} }
// RecoverPath should only be used with confusion net or word lattice input
if (output.RecoverPath && input.input_type == SentenceInput)
{
TRACE_ERR("--recover-input-path should only be used with "
<<"confusion net or word lattice input!\n");
output.RecoverPath = false;
}
// set m_nbest_options.enabled = true if necessary:
nbest.enabled = (nbest.enabled || mira || search.consensus
|| nbest.nbest_size > 0
|| !output.SearchGraph.empty()
|| !output.SearchGraphExtended.empty()
|| !output.SearchGraphSLF.empty()
|| !output.SearchGraphHG.empty()
|| !output.SearchGraphPB.empty()
|| output.lattice_sample_size != 0);
return true; return true;
} }
@ -67,9 +87,24 @@ namespace Moses
if (!input.update(param)) return false; if (!input.update(param)) return false;
if (!mbr.update(param)) return false; if (!mbr.update(param)) return false;
if (!lmbr.update(param)) return false; if (!lmbr.update(param)) return false;
return true; if (!output.update(param)) return false;
return sanity_check();
} }
#endif #endif
bool
AllOptions::
NBestDistinct() const
{
return (nbest.only_distinct
|| mbr.enabled || lmbr.enabled
|| output.lattice_sample_size
|| !output.SearchGraph.empty()
|| !output.SearchGraphExtended.empty()
|| !output.SearchGraphSLF.empty()
|| !output.SearchGraphHG.empty());
}
} }

View File

@ -11,6 +11,7 @@
#include "InputOptions.h" #include "InputOptions.h"
#include "MBR_Options.h" #include "MBR_Options.h"
#include "LMBR_Options.h" #include "LMBR_Options.h"
#include "ReportingOptions.h"
namespace Moses namespace Moses
{ {
struct struct
@ -24,7 +25,7 @@ namespace Moses
InputOptions input; InputOptions input;
MBR_Options mbr; MBR_Options mbr;
LMBR_Options lmbr; LMBR_Options lmbr;
ReportingOptions output;
bool mira; bool mira;
// StackOptions stack; // StackOptions stack;
@ -38,6 +39,8 @@ namespace Moses
bool update(std::map<std::string,xmlrpc_c::value>const& param); bool update(std::map<std::string,xmlrpc_c::value>const& param);
#endif #endif
bool NBestDistinct() const;
}; };
} }

View File

@ -1,4 +1,4 @@
// -*- mode: c++; cc-style: gnu -*- // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "moses/Parameter.h" #include "moses/Parameter.h"
#include "NBestOptions.h" #include "NBestOptions.h"
@ -33,4 +33,21 @@ init(Parameter const& P)
enabled = output_file_path.size(); enabled = output_file_path.size();
return true; return true;
} }
#ifdef HAVE_XMLRPC_C
bool
NBestOptions::
update(std::map<std::string,xmlrpc_c::value>const& param)
{
typedef std::map<std::string, xmlrpc_c::value> params_t;
params_t::const_iterator si = param.find("nbest");
if (si != param.end())
nbest_size = xmlrpc_c::value_int(si->second);
only_distinct = check(param, "nbest-distinct");
enabled = (nbest_size > 0);
return true;
}
#endif
} // namespace Moses } // namespace Moses

View File

@ -24,6 +24,10 @@ struct NBestOptions : public OptionsBaseClass
bool init(Parameter const& param); bool init(Parameter const& param);
#ifdef HAVE_XMLRPC_C
bool update(std::map<std::string,xmlrpc_c::value>const& param);
#endif
}; };
} }

View File

@ -10,6 +10,16 @@ namespace Moses
{ {
return true; return true;
} }
bool
OptionsBaseClass::
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (param.find(key) != param.end());
}
#endif #endif
} }

View File

@ -12,6 +12,10 @@ namespace Moses
#ifdef HAVE_XMLRPC_C #ifdef HAVE_XMLRPC_C
virtual bool virtual bool
update(std::map<std::string,xmlrpc_c::value>const& params); update(std::map<std::string,xmlrpc_c::value>const& params);
bool
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key);
#endif #endif
}; };
} }

View File

@ -1,5 +1,4 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#if 0
#include "ReportingOptions.h" #include "ReportingOptions.h"
#include "moses/Parameter.h" #include "moses/Parameter.h"
@ -9,82 +8,70 @@ namespace Moses {
ReportingOptions:: ReportingOptions::
init(Parameter const& param) init(Parameter const& param)
{ {
// including factors in the output
param.SetParameter(ReportAllFactors, "report-all-factors", false);
// segmentation reporting
ReportSegmentation = (param.GetParam("report-segmentation-enriched")
? 2 : param.GetParam("report-segmentation")
? 1 : 0);
// word alignment reporting
param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false);
param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort);
std::string e; // hack to save us param.SetParameter<string>(...)
param.SetParameter(AlignmentOutputFile,"alignment-output-file", e);
// output a word graph
PARAM_VEC const* params; PARAM_VEC const* params;
param.SetParameter(segmentation, "report-segmentation", false );
param.SetParameter(segmentation_enriched, "report-segmentation-enriched", false);
param.SetParameter(all_factors, "report-all-factors", false );
// print ...
param.SetParameter(id, "print-id", false );
param.SetParameter(aln_info, "print-alignment-info", false);
param.SetParameter(passthrough, "print-passthrough", false );
param.SetParameter<string>(detailed_transrep_filepath, "translation-details", "");
param.SetParameter<string>(detailed_tree_transrep_filepath,
"tree-translation-details", "");
param.SetParameter<string>(detailed_all_transrep_filepath,
"translation-all-details", "");
// output search graph
param.SetParameter<string>(output,
"translation-all-details", "");
param.SetParameter(sort_word_alignment, "sort-word-alignment", NoSort);
// Is there a reason why we can't use SetParameter here? [UG]
= param.GetParam("alignment-output-file");
if (params && params->size()) {
m_alignmentOutputFile = Scan<std::string>(params->at(0));
}
params = param.GetParam("output-word-graph"); params = param.GetParam("output-word-graph");
output_word_graph = (params && params->size() == 2); WordGraph = (params && params->size() == 2); // what are the two options?
// bizarre code ahead! Why do we need to do the checks here?
// as adapted from StaticData.cpp
params = param.GetParam("output-search-graph");
if (params && params->size()) {
if (params->size() != 1) {
std::cerr << "ERROR: wrong format for switch -output-search-graph file";
return false;
}
output_search_graph = true;
}
else if (m_parameter->GetParam("output-search-graph-extended") &&
m_parameter->GetParam("output-search-graph-extended")->size()) {
if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) {
std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file";
return false;
}
output_search_graph = true;
m_outputSearchGraphExtended = true;
} else {
m_outputSearchGraph = false;
}
params = m_parameter->GetParam("output-search-graph-slf");
output_search_graph_slf = params && params->size();
params = m_parameter->GetParam("output-search-graph-hypergraph");
output_search_graph_hypergraph = params && params->size();
// dump the search graph
param.SetParameter(SearchGraph, "output-search-graph", e);
param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e);
param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e);
param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e);
#ifdef HAVE_PROTOBUF #ifdef HAVE_PROTOBUF
params = m_parameter->GetParam("output-search-graph-pb"); param.SetParameter(SearchGraphPB, "output-search-graph-pb", e);
if (params && params->size()) {
if (params->size() != 1) {
cerr << "ERROR: wrong format for switch -output-search-graph-pb path";
return false;
}
m_outputSearchGraphPB = true;
} else
m_outputSearchGraphPB = false;
#endif #endif
param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false);
// miscellaneous
param.SetParameter(RecoverPath, "recover-input-path",false);
param.SetParameter(ReportHypoScore, "output-hypo-score",false);
param.SetParameter(PrintID, "print-id",false);
param.SetParameter(PrintPassThrough, "print-passthrough",false);
param.SetParameter(detailed_all_transrep_filepath,
"translation-all-details", e);
param.SetParameter(detailed_transrep_filepath, "translation-details", e);
param.SetParameter(detailed_tree_transrep_filepath,
"tree-translation-details", e);
params = param.GetParam("lattice-samples");
if (params) {
if (params->size() ==2 ) {
lattice_sample_filepath = params->at(0);
lattice_sample_size = Scan<size_t>(params->at(1));
} else {
std::cerr <<"wrong format for switch -lattice-samples file size";
return false;
}
} else {
lattice_sample_size = 0;
}
return true;
}
#ifdef HAVE_XMLRPC_C
bool
ReportingOptions::
update(std::map<std::string,xmlrpc_c::value>const& param)
{
ReportAllFactors = check(param, "report-all-factors");
return true; return true;
} }
}
#endif #endif
}

View File

@ -2,40 +2,59 @@
#pragma once #pragma once
#include <string> #include <string>
#include "moses/Parameter.h" #include "moses/Parameter.h"
#include "OptionsBaseClass.h"
namespace Moses namespace Moses
{ {
struct struct
ReportingOptions ReportingOptions : public OptionsBaseClass
{ {
bool ReportAllFactors; // m_reportAllFactors;
WordAlignmentSort sort_word_alignment; // 0: no, 1: target order int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
bool PrintAlignmentInfo; // m_PrintAlignmentInfo
bool segmentation; // m_reportSegmentation; WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
bool segmentation_enriched; // m_reportSegmentationEnriched; std::string AlignmentOutputFile;
bool all_factors; // m_reportAllFactors;
bool output_word_graph; bool WordGraph;
bool output_search_graph;
bool output_search_graph_extended; std::string SearchGraph;
bool output_search_graph_slf; std::string SearchGraphExtended;
bool output_search_graph_hypergraph; std::string SearchGraphSLF;
bool output_search_graph_protobuf; std::string SearchGraphHG;
std::string SearchGraphPB;
bool DontPruneSearchGraph;
bool RecoverPath; // recover input path?
bool ReportHypoScore;
bool PrintID;
bool PrintPassThrough;
// print .. // print ..
bool aln_info; // m_PrintAlignmentInfo; bool aln_info; // m_PrintAlignmentInfo;
bool id; // m_PrintID;
bool passthrough; // m_PrintPassthroughInformation;
// transrep = translation reporting // transrep = translation reporting
std::string detailed_transrep_filepath; std::string detailed_transrep_filepath;
std::string detailed_tree_transrep_filepath; std::string detailed_tree_transrep_filepath;
std::string detailed_all_transrep_filepath; std::string detailed_all_transrep_filepath;
std::string aln_output_file; // m_alignmentOutputFile; std::string lattice_sample_filepath;
size_t lattice_sample_size;
bool init(Parameter const& param); bool init(Parameter const& param);
/// do we need to keep the search graph from decoding?
bool NeedSearchGraph() const {
return !(SearchGraph.empty() && SearchGraphExtended.empty());
}
#ifdef HAVE_XMLRPC_C
bool update(std::map<std::string,xmlrpc_c::value>const& param);
#endif
}; };
} }

View File

@ -36,6 +36,7 @@ namespace Moses
beam_width = TransformScore(beam_width); beam_width = TransformScore(beam_width);
trans_opt_threshold = TransformScore(trans_opt_threshold); trans_opt_threshold = TransformScore(trans_opt_threshold);
early_discarding_threshold = TransformScore(early_discarding_threshold); early_discarding_threshold = TransformScore(early_discarding_threshold);
return true; return true;
} }

View File

@ -25,8 +25,7 @@ using Moses::Sentence;
boost::shared_ptr<TranslationRequest> boost::shared_ptr<TranslationRequest>
TranslationRequest:: TranslationRequest::
create(Translator* translator, xmlrpc_c::paramList const& paramList, create(Translator* translator, xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::condition_variable& cond, boost::mutex& mut)
boost::mutex& mut)
{ {
boost::shared_ptr<TranslationRequest> ret; boost::shared_ptr<TranslationRequest> ret;
ret.reset(new TranslationRequest(paramList, cond, mut)); ret.reset(new TranslationRequest(paramList, cond, mut));
@ -60,10 +59,9 @@ Run()
Moses::StaticData const& SD = Moses::StaticData::Instance(); Moses::StaticData const& SD = Moses::StaticData::Instance();
//Make sure alternative paths are retained, if necessary //Make sure alternative paths are retained, if necessary
if (m_withGraphInfo || m_nbestSize>0) // if (m_withGraphInfo || m_nbestSize>0)
// why on earth is this a global variable? Is this even thread-safe???? UG // why on earth is this a global variable? Is this even thread-safe???? UG
(const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true); // (const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
// std::stringstream out, graphInfo, transCollOpts; // std::stringstream out, graphInfo, transCollOpts;
if (SD.IsSyntax()) if (SD.IsSyntax())
@ -170,7 +168,14 @@ outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
{ {
TrellisPathList nBestList; TrellisPathList nBestList;
vector<xmlrpc_c::value> nBestXml; vector<xmlrpc_c::value> nBestXml;
manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct); manager.CalcNBest(m_options.nbest.nbest_size, nBestList,
m_options.nbest.only_distinct);
StaticData const& SD = StaticData::Instance();
manager.OutputNBest(cout, nBestList,
SD.GetOutputFactorOrder(),
m_source->GetTranslationId(),
options().output.ReportSegmentation);
BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) { BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) {
vector<const Hypothesis *> const& E = path->GetEdges(); vector<const Hypothesis *> const& E = path->GetEdges();
@ -180,7 +185,8 @@ outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
if (m_withScoreBreakdown) { if (m_withScoreBreakdown) {
// should the score breakdown be reported in a more structured manner? // should the score breakdown be reported in a more structured manner?
ostringstream buf; ostringstream buf;
path->GetScoreBreakdown()->OutputAllFeatureScores(buf); bool with_labels = m_options.nbest.include_feature_labels;
path->GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels);
nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str()); nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
} }
@ -228,23 +234,23 @@ insertTranslationOptions(Moses::Manager& manager,
retData["topt"] = xmlrpc_c::value_array(toptsXml); retData["topt"] = xmlrpc_c::value_array(toptsXml);
} }
bool
check(std::map<std::string, xmlrpc_c::value> const& params, std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (params.find(key) != params.end());
}
TranslationRequest:: TranslationRequest::
TranslationRequest(xmlrpc_c::paramList const& paramList, TranslationRequest(xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::mutex& mut) boost::condition_variable& cond, boost::mutex& mut)
: m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList) : m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList)
, m_nbestSize(0) // , m_nbestSize(0)
, m_session_id(0) , m_session_id(0)
{ {
m_options = StaticData::Instance().options(); m_options = StaticData::Instance().options();
} }
bool
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (param.find(key) != param.end());
}
void void
TranslationRequest:: TranslationRequest::
@ -274,10 +280,9 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
m_withWordAlignInfo = check(params, "word-align"); m_withWordAlignInfo = check(params, "word-align");
m_withGraphInfo = check(params, "sg"); m_withGraphInfo = check(params, "sg");
m_withTopts = check(params, "topt"); m_withTopts = check(params, "topt");
m_reportAllFactors = check(params, "report-all-factors"); // m_reportAllFactors = check(params, "report-all-factors");
m_nbestDistinct = check(params, "nbest-distinct"); // m_nbestDistinct = check(params, "nbest-distinct");
m_withScoreBreakdown = check(params, "add-score-breakdown"); m_withScoreBreakdown = check(params, "add-score-breakdown");
m_source.reset(new Sentence(0,m_source_string));
si = params.find("lambda"); si = params.find("lambda");
if (si != params.end()) if (si != params.end())
{ {
@ -298,9 +303,9 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
} }
} }
si = params.find("nbest"); // si = params.find("nbest");
if (si != params.end()) // if (si != params.end())
m_nbestSize = xmlrpc_c::value_int(si->second); // m_nbestSize = xmlrpc_c::value_int(si->second);
si = params.find("context"); si = params.find("context");
if (si != params.end()) if (si != params.end())
@ -309,6 +314,8 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
VERBOSE(1,"CONTEXT " << context); VERBOSE(1,"CONTEXT " << context);
m_context.reset(new std::vector<std::string>(1,context)); m_context.reset(new std::vector<std::string>(1,context));
} }
// // biased sampling for suffix-array-based sampling phrase table? // // biased sampling for suffix-array-based sampling phrase table?
// if ((si = params.find("bias")) != params.end()) // if ((si = params.find("bias")) != params.end())
// { // {
@ -317,6 +324,7 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
// for (size_t i = 1; i < tmp.size(); i += 2) // for (size_t i = 1; i < tmp.size(); i += 2)
// m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]); // m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
// } // }
m_source.reset(new Sentence(0,m_source_string,m_options));
} // end of Translationtask::parse_request() } // end of Translationtask::parse_request()
@ -326,8 +334,8 @@ run_chart_decoder()
{ {
Moses::TreeInput tinput; Moses::TreeInput tinput;
istringstream buf(m_source_string + "\n"); istringstream buf(m_source_string + "\n");
tinput.Read(buf, StaticData::Instance().GetInputFactorOrder()); tinput.Read(buf, StaticData::Instance().GetInputFactorOrder(), m_options);
Moses::ChartManager manager(this->self()); Moses::ChartManager manager(this->self());
manager.Decode(); manager.Decode();
@ -393,8 +401,13 @@ void
TranslationRequest:: TranslationRequest::
run_phrase_decoder() run_phrase_decoder()
{ {
if (m_withGraphInfo || m_options.nbest.nbest_size>0)
m_options.output.SearchGraph = "true";
Manager manager(this->self()); Manager manager(this->self());
// if (m_bias.size()) manager.SetBias(&m_bias); // if (m_bias.size()) manager.SetBias(&m_bias);
manager.Decode(); manager.Decode();
pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData); pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
@ -403,10 +416,10 @@ run_phrase_decoder()
if (m_withGraphInfo) insertGraphInfo(manager,m_retData); if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
if (m_withTopts) insertTranslationOptions(manager,m_retData); if (m_withTopts) insertTranslationOptions(manager,m_retData);
if (m_nbestSize) outputNBest(manager, m_retData); if (m_options.nbest.nbest_size) outputNBest(manager, m_retData);
(const_cast<StaticData&>(Moses::StaticData::Instance())) // (const_cast<StaticData&>(Moses::StaticData::Instance()))
.SetOutputSearchGraph(false); // .SetOutputSearchGraph(false);
// WTF? one more reason not to have this as global variable! --- UG // WTF? one more reason not to have this as global variable! --- UG
} }

View File

@ -43,9 +43,9 @@ TranslationRequest : public virtual Moses::TranslationTask
bool m_withGraphInfo; bool m_withGraphInfo;
bool m_withTopts; bool m_withTopts;
bool m_reportAllFactors; bool m_reportAllFactors;
bool m_nbestDistinct; // bool m_nbestDistinct;
bool m_withScoreBreakdown; bool m_withScoreBreakdown;
size_t m_nbestSize; // size_t m_nbestSize;
uint64_t m_session_id; // 0 means none, 1 means new uint64_t m_session_id; // 0 means none, 1 means new