Merge branch 'mmt-dev'

This commit is contained in:
Ulrich Germann 2015-10-31 13:36:40 +00:00
commit ff1977c29e
81 changed files with 1122 additions and 969 deletions

View File

@ -1,5 +1,5 @@
#BUILDING MOSES
#
#PACKAGES
#Language models (optional):
#--with-irstlm=/path/to/irstlm
@ -245,7 +245,7 @@ if [ option.get "with-mm" : : "yes" ]
moses/TranslationModel/UG//ptable-describe-features
moses/TranslationModel/UG//count-ptable-features
moses/TranslationModel/UG//ptable-lookup
# moses/TranslationModel/UG//spe-check-coverage
moses/TranslationModel/UG//check-coverage
moses/TranslationModel/UG/mm//mtt-demo1
moses/TranslationModel/UG/mm//mtt-build
moses/TranslationModel/UG/mm//mtt-dump
@ -256,6 +256,7 @@ if [ option.get "with-mm" : : "yes" ]
moses/TranslationModel/UG/mm//mmlex-lookup
moses/TranslationModel/UG/mm//mtt-count-words
moses/TranslationModel/UG/mm//calc-coverage
moses/TranslationModel/UG//check-coverage
moses/TranslationModel/UG//try-align
;
}

View File

@ -257,9 +257,9 @@ public:
const StaticData &staticData = StaticData::Instance();
//Make sure alternative paths are retained, if necessary
if (addGraphInfo || nbest_size>0) {
(const_cast<StaticData&>(staticData)).SetOutputSearchGraph(true);
}
// if (addGraphInfo || nbest_size>0) {
// (const_cast<StaticData&>(staticData)).SetOutputSearchGraph(true);
// }
stringstream out, graphInfo, transCollOpts;
@ -269,7 +269,7 @@ public:
boost::shared_ptr<TreeInput> tinput(new TreeInput);
const vector<FactorType>& IFO = staticData.GetInputFactorOrder();
istringstream in(source + "\n");
tinput->Read(in,IFO);
tinput->Read(in,IFO,staticData.options());
ttasksptr task = Moses::TranslationTask::create(tinput);
ChartManager manager(task);
manager.Decode();
@ -285,7 +285,8 @@ public:
else
{
// size_t lineNumber = 0; // TODO: Include sentence request number here?
boost::shared_ptr<Sentence> sentence(new Sentence(0,source));
boost::shared_ptr<Sentence> sentence;
sentence.reset(new Sentence(0,source,staticData.options()));
ttasksptr task = Moses::TranslationTask::create(sentence);
Manager manager(task);
manager.Decode();
@ -320,7 +321,7 @@ public:
outputNBest(manager, m_retData, nbest_size, nbest_distinct,
reportAllFactors, addAlignInfo, addScoreBreakdown);
}
(const_cast<StaticData&>(staticData)).SetOutputSearchGraph(false);
// (const_cast<StaticData&>(staticData)).SetOutputSearchGraph(false);
}
m_retData["text"] = value_string(out.str());
XVERBOSE(1,"Output: " << out.str() << endl);
@ -479,7 +480,9 @@ public:
{
// should the score breakdown be reported in a more structured manner?
ostringstream buf;
path.GetScoreBreakdown()->OutputAllFeatureScores(buf);
bool with_labels
= StaticData::Instance().options().nbest.include_feature_labels;
path.GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels);
nBestXMLItem["fvals"] = xmlrpc_c::value_string(buf.str());
}

View File

@ -202,8 +202,9 @@ int main(int argc, char* argv[])
<< " ||| ";
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
manager.OutputBestHypo(mbrBestHypo, lineCount,
SD.GetReportSegmentation(),
SD.GetReportAllFactors(),cout);
manager.options().output.ReportSegmentation,
manager.options().output.ReportAllFactors,
cout);
}
}
}

View File

@ -106,7 +106,9 @@ std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
}
bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,size_t> *b)
bool
compare_target(std::pair<size_t,size_t> const* a,
std::pair<size_t,size_t> const* b)
{
if(a->second < b->second) return true;
if(a->second == b->second) return (a->first < b->first);
@ -114,7 +116,9 @@ bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,si
}
std::vector< const std::pair<size_t,size_t>* > AlignmentInfo::GetSortedAlignments() const
std::vector< const std::pair<size_t,size_t>* >
AlignmentInfo::
GetSortedAlignments(WordAlignmentSort SortOrder) const
{
std::vector< const std::pair<size_t,size_t>* > ret;
@ -124,10 +128,7 @@ std::vector< const std::pair<size_t,size_t>* > AlignmentInfo::GetSortedAlignment
ret.push_back(&alignPair);
}
const StaticData &staticData = StaticData::Instance();
WordAlignmentSort wordAlignmentSort = staticData.GetWordAlignmentSort();
switch (wordAlignmentSort) {
switch (SortOrder) {
case NoSort:
break;
@ -136,7 +137,8 @@ std::vector< const std::pair<size_t,size_t>* > AlignmentInfo::GetSortedAlignment
break;
default:
UTIL_THROW(util::Exception, "Unknown alignment sort option: " << wordAlignmentSort);
UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
<< SortOrder);
}
return ret;

View File

@ -26,7 +26,7 @@
#include <cstdlib>
#include <boost/functional/hash.hpp>
#include "TypeDef.h"
namespace Moses
{
@ -83,7 +83,8 @@ public:
return m_collection.size();
}
std::vector< const std::pair<size_t,size_t>* > GetSortedAlignments() const;
std::vector< const std::pair<size_t,size_t>* >
GetSortedAlignments(WordAlignmentSort SortOrder) const;
std::vector<size_t> GetSourceIndex2PosMap() const;

View File

@ -27,7 +27,6 @@
#include "RuleCube.h"
#include "Range.h"
#include "Util.h"
#include "StaticData.h"
#include "ChartTranslationOptions.h"
#include "ChartTranslationOptionList.h"
#include "ChartManager.h"
@ -52,8 +51,7 @@ ChartCellBase::~ChartCellBase() {}
ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
ChartCellBase(startPos, endPos), m_manager(manager)
{
const StaticData &staticData = StaticData::Instance();
m_nBestIsEnabled = staticData.options().nbest.enabled;
m_nBestIsEnabled = manager.options().nbest.enabled;
}
ChartCell::~ChartCell() {}
@ -66,7 +64,14 @@ ChartCell::~ChartCell() {}
bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
{
const Word &targetLHS = hypo->GetTargetLHS();
return m_hypoColl[targetLHS].AddHypothesis(hypo, m_manager);
MapType::iterator m = m_hypoColl.find(targetLHS);
if (m == m_hypoColl.end())
{
std::pair<Word, ChartHypothesisCollection>
e(targetLHS, ChartHypothesisCollection(m_manager.options()));
m = m_hypoColl.insert(e).first;
}
return m->second.AddHypothesis(hypo, m_manager);
}
/** Prune each collection in this cell to a particular size */
@ -87,8 +92,6 @@ void ChartCell::PruneToSize()
void ChartCell::Decode(const ChartTranslationOptionList &transOptList
, const ChartCellCollection &allChartCells)
{
const StaticData &staticData = StaticData::Instance();
// priority queue for applicable rules with selected hypotheses
RuleCubeQueue queue(m_manager);
@ -100,7 +103,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
}
// pluck things out of queue and add to hypo collection
const size_t popLimit = staticData.options().cube.pop_limit;
const size_t popLimit = m_manager.options().cube.pop_limit;
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
ChartHypothesis *hypo = queue.Pop();
AddHypothesis(hypo);

View File

@ -256,12 +256,13 @@ void ChartHypothesis::CleanupArcList()
* However, may not be enough if only unique candidates are needed,
* so we'll keep all of arc list if nedd distinct n-best list
*/
AllOptions const& opts = StaticData::Instance().options();
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.options().nbest.nbest_size;
bool distinctNBest = (staticData.options().nbest.only_distinct
|| staticData.options().mbr.enabled
|| staticData.GetOutputSearchGraph()
|| staticData.GetOutputSearchGraphHypergraph());
size_t nBestSize = opts.nbest.nbest_size;
bool distinctNBest = (opts.nbest.only_distinct
|| opts.mbr.enabled
|| opts.output.NeedSearchGraph()
|| !opts.output.SearchGraphHG.empty());
if (!distinctNBest && m_arcList->size() > nBestSize) {
// prune arc list only if there too many arcs

View File

@ -26,6 +26,7 @@
#include "ChartManager.h"
#include "HypergraphOutput.h"
#include "util/exception.hh"
#include "parameters/AllOptions.h"
using namespace std;
using namespace Moses;
@ -33,13 +34,13 @@ using namespace Moses;
namespace Moses
{
ChartHypothesisCollection::ChartHypothesisCollection()
ChartHypothesisCollection::ChartHypothesisCollection(AllOptions const& opts)
{
const StaticData &staticData = StaticData::Instance();
// const StaticData &staticData = StaticData::Instance();
m_beamWidth = staticData.GetBeamWidth();
m_maxHypoStackSize = staticData.options().search.stack_size;
m_nBestIsEnabled = staticData.options().nbest.enabled;
m_beamWidth = opts.search.beam_width; // staticData.GetBeamWidth();
m_maxHypoStackSize = opts.search.stack_size; // staticData.options().search.stack_size;
m_nBestIsEnabled = opts.nbest.enabled; // staticData.options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity();
}

View File

@ -29,6 +29,7 @@ namespace Moses
{
class ChartSearchGraphWriter;
class AllOptions;
//! functor to compare (chart) hypotheses by (descending) score
class ChartHypothesisScoreOrderer
@ -70,7 +71,7 @@ public:
return m_hypos.end();
}
ChartHypothesisCollection();
ChartHypothesisCollection(AllOptions const& opts);
~ChartHypothesisCollection();
bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager);

View File

@ -371,7 +371,8 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| ";
boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation);
scoreBreakdown->OutputAllFeatureScores(out);
bool with_labels = options().nbest.include_feature_labels;
scoreBreakdown->OutputAllFeatureScores(out, with_labels);
out << " ||| " << derivation.score;
// optionally, print word alignments
@ -618,7 +619,7 @@ void ChartManager::OutputDetailedTranslationReport(
//DIMw
const StaticData &staticData = StaticData::Instance();
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
if (options().output.detailed_all_transrep_filepath.size()) {
const Sentence &sentence = static_cast<const Sentence &>(m_source);
size_t nBestSize = staticData.options().nbest.nbest_size;
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
@ -835,11 +836,11 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe
Backtrack(hypo);
VERBOSE(3,"0" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
if (options().output.ReportHypoScore) {
out << hypo->GetTotalScore() << " ";
}
if (StaticData::Instance().IsPathRecoveryEnabled()) {
if (options().output.RecoverPath) {
out << "||| ";
}
Phrase outPhrase(ARRAY_SIZE_INCR);
@ -858,7 +859,7 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe
} else {
VERBOSE(1, "NO BEST TRANSLATION" << endl);
if (StaticData::Instance().GetOutputHypoScore()) {
if (options().output.ReportHypoScore) {
out << "0 ";
}

View File

@ -107,8 +107,13 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
targetPhrase->SetAlignmentInfo("0-0");
targetPhrase->EvaluateInIsolation(*unksrc);
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) {
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
AllOptions const& opts = staticData.options();
if (!opts.output.detailed_tree_transrep_filepath.empty() ||
opts.nbest.print_trees || staticData.GetTreeStructure() != NULL) {
std::string prop = "[ ";
prop += (*targetLHS)[0]->GetString().as_string() + " ";
prop += sourceWord[0]->GetString().as_string() + " ]";
targetPhrase->SetProperty("Tree", prop);
}
// chart rule

View File

@ -110,29 +110,14 @@ ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
int
ConfusionNet::
Read(std::istream& in,
const std::vector<FactorType>& factorOrder)
const std::vector<FactorType>& factorOrder,
AllOptions const& opts)
{
int rv=ReadF(in,factorOrder,0);
if(rv) stats.collect(*this);
return rv;
}
#if 0
// Deprecated due to code duplication;
// use Word::CreateFromString() instead
void
ConfusionNet::
String2Word(const std::string& s,Word& w,
const std::vector<FactorType>& factorOrder)
{
std::vector<std::string> factorStrVector = Tokenize(s, "|");
for(size_t i=0; i<factorOrder.size(); ++i)
w.SetFactor(factorOrder[i],
FactorCollection::Instance().AddFactor
(Input,factorOrder[i], factorStrVector[i]));
}
#endif
bool
ConfusionNet::
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
@ -161,7 +146,8 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
for(size_t i=0; i < numInputScores; i++) {
double prob;
if (!(is>>prob)) {
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, "
<< "or wrong number of scores\n");
return false;
}
if(prob<0.0) {
@ -174,7 +160,8 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
}
//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
// store 'real' word count in last feature if we have one more
// weight than we do arc scores and not epsilon
if (addRealWordCount && word!=EPSILON && word!="")
probs.back() = -1.0;

View File

@ -67,7 +67,8 @@ public:
bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0);
virtual void Print(std::ostream&) const;
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
int Read(std::istream& in,const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
Phrase GetSubString(const Range&) const; //TODO not defined
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined

View File

@ -100,12 +100,14 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
}
}
void DecodeStepTranslation::ProcessInitialTranslation(
const InputType &source
,PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit
, const InputPath &inputPath
, TargetPhraseCollection::shared_ptr phraseColl) const
void
DecodeStepTranslation::
ProcessInitialTranslation(InputType const& source,
PartialTranslOptColl &outputPartialTranslOptColl,
size_t startPos, size_t endPos,
bool adhereTableLimit,
InputPath const& inputPath,
TargetPhraseCollection::shared_ptr phraseColl) const
{
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
const size_t tableLimit = phraseDictionary->GetTableLimit();
@ -114,8 +116,9 @@ void DecodeStepTranslation::ProcessInitialTranslation(
if (phraseColl != NULL) {
IFVERBOSE(3) {
if(StaticData::Instance().GetInputType() == SentenceInput)
TRACE_ERR("[" << source.GetSubString(range) << "; " << startPos << "-" << endPos << "]\n");
if(source.GetType() == SentenceInput)
TRACE_ERR("[" << source.GetSubString(range) << "; "
<< startPos << "-" << endPos << "]\n");
else
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
}
@ -137,11 +140,13 @@ void DecodeStepTranslation::ProcessInitialTranslation(
}
}
void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
const InputType &source
,PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit
, const InputPathList &inputPathList) const
void
DecodeStepTranslation::
ProcessInitialTransLEGACY(InputType const& source,
PartialTranslOptColl &outputPartialTranslOptColl,
size_t startPos, size_t endPos,
bool adhereTableLimit,
InputPathList const& inputPathList) const
{
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
const size_t tableLimit = phraseDictionary->GetTableLimit();
@ -152,8 +157,9 @@ void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
if (phraseColl != NULL) {
IFVERBOSE(3) {
if(StaticData::Instance().GetInputType() == SentenceInput)
TRACE_ERR("[" << source.GetSubString(range) << "; " << startPos << "-" << endPos << "]\n");
if(source.GetType() == SentenceInput)
TRACE_ERR("[" << source.GetSubString(range) << "; "
<< startPos << "-" << endPos << "]\n");
else
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
}

View File

@ -61,10 +61,13 @@ public:
, TargetPhraseCollection::shared_ptr phraseColl) const;
// legacy
void ProcessInitialTranslationLEGACY(const InputType &source
, PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit
, const InputPathList &inputPathList) const;
void
ProcessInitialTransLEGACY(InputType const& source,
PartialTranslOptColl &outputPartialTranslOptColl,
size_t startPos, size_t endPos,
bool adhereTableLimit,
InputPathList const& inputPathList) const;
void ProcessLEGACY(const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl

View File

@ -17,8 +17,10 @@ namespace Moses
{
//! populate this InputType with data from in stream
int ForestInput::Read(std::istream &in,
const std::vector<FactorType>& factorOrder)
int ForestInput::
Read(std::istream &in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
{
using Syntax::F2S::Forest;
@ -56,7 +58,7 @@ int ForestInput::Read(std::istream &in,
// not sure ForestInput needs to.
std::stringstream strme;
strme << "<s> " << sentence << " </s>" << std::endl;
Sentence::Read(strme, factorOrder);
Sentence::Read(strme, factorOrder, opts);
// Find the maximum end position of any vertex (0 if forest is empty).
std::size_t maxEnd = FindMaxEnd(*m_forest);

View File

@ -28,7 +28,10 @@ public:
}
//! populate this InputType with data from in stream
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
virtual int
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts);
//! Output debugging info to stream out
virtual void Print(std::ostream&) const;

View File

@ -56,7 +56,7 @@ WriteHypos(const ChartHypothesisCollection& hypos,
ChartHypothesisCollection::const_iterator iter;
for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
ChartHypothesis &mainHypo = **iter;
if (StaticData::Instance().GetUnprunedSearchGraph() ||
if (StaticData::Instance().options().output.DontPruneSearchGraph ||
reachable.find(mainHypo.GetId()) != reachable.end()) {
(*m_out) << m_lineNumber << " " << mainHypo << endl;
}
@ -90,7 +90,7 @@ WriteHypos(const ChartHypothesisCollection& hypos,
ChartHypothesisCollection::const_iterator iter;
for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
const ChartHypothesis* mainHypo = *iter;
if (!StaticData::Instance().GetUnprunedSearchGraph() &&
if (!StaticData::Instance().options().output.DontPruneSearchGraph &&
reachable.find(mainHypo->GetId()) == reachable.end()) {
//Ignore non reachable nodes
continue;

View File

@ -195,9 +195,8 @@ EvaluateWhenApplied(float futureScore)
const StatefulFeatureFunction &ff = *ffs[i];
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored(ff)) {
m_ffStates[i] = ff.EvaluateWhenApplied(*this,
m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
&m_currScoreBreakdown);
FFState const* s = m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL;
m_ffStates[i] = ff.EvaluateWhenApplied(*this, s, &m_currScoreBreakdown);
}
}
@ -276,15 +275,11 @@ CleanupArcList()
* However, may not be enough if only unique candidates are needed,
* so we'll keep all of arc list if nedd distinct n-best list
*/
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.options().nbest.nbest_size;
bool distinctNBest = (m_manager.options().nbest.only_distinct ||
staticData.GetLatticeSamplesSize() ||
m_manager.options().mbr.enabled ||
staticData.GetOutputSearchGraph() ||
staticData.GetOutputSearchGraphSLF() ||
staticData.GetOutputSearchGraphHypergraph() ||
m_manager.options().lmbr.enabled);
AllOptions const& opts = m_manager.options();
size_t nBestSize = opts.nbest.nbest_size;
bool distinctNBest = opts.NBestDistinct();
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
// prune arc list only if there too many arcs
@ -292,9 +287,8 @@ CleanupArcList()
m_arcList->end(), CompareHypothesisTotalScore());
// delete bad ones
ArcList::iterator iter;
for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter)
delete *iter;
ArcList::iterator i = m_arcList->begin() + nBestSize;
while (i != m_arcList->end()) delete *i++;
m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end());
}
@ -387,13 +381,15 @@ OutputAlignment(std::ostream &out) const
currentHypo = currentHypo->GetPrevHypo();
}
OutputAlignment(out, edges);
OutputAlignment(out, edges, m_manager.options().output.WA_SortOrder);
}
void
Hypothesis::
OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
OutputAlignment(ostream &out,
vector<const Hypothesis *> const& edges,
WordAlignmentSort waso)
{
size_t targetOffset = 0;
@ -402,7 +398,7 @@ OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
const TargetPhrase &tp = edge.GetCurrTargetPhrase();
size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset, waso);
targetOffset += tp.GetSize();
}
@ -412,15 +408,17 @@ OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
void
Hypothesis::
OutputAlignment(ostream &out, const AlignmentInfo &ai,
size_t sourceOffset, size_t targetOffset)
size_t sourceOffset, size_t targetOffset,
WordAlignmentSort waso)
{
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
AlignVec alignments = ai.GetSortedAlignments();
AlignVec alignments = ai.GetSortedAlignments(waso);
AlignVec::const_iterator it;
for (it = alignments.begin(); it != alignments.end(); ++it) {
const std::pair<size_t,size_t> &alignment = **it;
out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
out << alignment.first + sourceOffset << "-"
<< alignment.second + targetOffset << " ";
}
}
@ -526,15 +524,17 @@ OutputSurface(std::ostream &out, const Hypothesis &edge,
const int sourceEnd = sourceRange.GetEndPos();
out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
if (reportSegmentation == 2) {
WordAlignmentSort waso = m_manager.options().output.WA_SortOrder;
out << ",wa=";
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
Hypothesis::OutputAlignment(out, ai, 0, 0);
Hypothesis::OutputAlignment(out, ai, 0, 0, waso);
out << ",total=";
out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
scoreBreakdown.OutputAllFeatureScores(out);
bool with_labels = m_manager.options().nbest.include_feature_labels;
scoreBreakdown.OutputAllFeatureScores(out, with_labels);
}
out << "| ";
}
@ -609,8 +609,9 @@ OutputLocalWordAlignment(vector<xmlrpc_c::value>& dest) const
Range const& src = this->GetCurrSourceWordsRange();
Range const& trg = this->GetCurrTargetWordsRange();
WordAlignmentSort waso = m_manager.options().output.WA_SortOrder;
vector<pair<size_t,size_t> const* > a
= this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments();
= this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(waso);
typedef pair<size_t,size_t> item;
map<string, xmlrpc_c::value> M;
BOOST_FOREACH(item const* p, a) {

View File

@ -251,9 +251,18 @@ public:
return m_transOpt;
}
void OutputAlignment(std::ostream &out) const;
static void OutputAlignment(std::ostream &out, const std::vector<const Hypothesis *> &edges);
static void OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset);
void
OutputAlignment(std::ostream &out) const;
static void
OutputAlignment(std::ostream &out,
const std::vector<const Hypothesis *> &edges,
WordAlignmentSort waso);
static void
OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai,
size_t sourceOffset, size_t targetOffset,
WordAlignmentSort waso);
void OutputInput(std::ostream& os) const;
static void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo);

View File

@ -36,7 +36,7 @@ namespace Moses
HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
HypothesisStack(manager)
{
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
m_nBestIsEnabled = manager.options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = -std::numeric_limits<float>::infinity();
}

View File

@ -79,12 +79,6 @@ namespace Moses
IOWrapper::IOWrapper()
: m_nBestStream(NULL)
// , m_outputWordGraphStream(NULL)
// , m_outputSearchGraphStream(NULL)
// , m_detailedTranslationReportingStream(NULL)
// , m_unknownsStream(NULL)
// , m_alignmentInfoStream(NULL)
// , m_latticeSamplesStream(NULL)
, m_surpressSingleBestOutput(false)
, m_look_ahead(0)
, m_look_back(0)
@ -100,7 +94,7 @@ IOWrapper::IOWrapper()
m_look_ahead = staticData.options().context.look_ahead;
m_look_back = staticData.options().context.look_back;
m_inputType = staticData.GetInputType();
m_inputType = staticData.options().input.input_type;
UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput,
"Context-sensitive decoding currently works only with sentence input.");

View File

@ -216,6 +216,7 @@ boost::shared_ptr<InputType>
IOWrapper::
BufferInput()
{
AllOptions const& opts = StaticData::Instance().options();
boost::shared_ptr<itype> source;
boost::shared_ptr<InputType> ret;
if (m_future_input.size()) {
@ -224,13 +225,13 @@ BufferInput()
m_buffered_ahead -= ret->GetSize();
} else {
source.reset(new itype);
if (!source->Read(*m_inputStream, *m_inputFactorOrder))
if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts))
return ret;
ret = source;
}
while (m_buffered_ahead < m_look_ahead) {
source.reset(new itype);
if (!source->Read(*m_inputStream, *m_inputFactorOrder))
if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts))
break;
m_future_input.push_back(source);
m_buffered_ahead += source->GetSize();

View File

@ -320,10 +320,15 @@ void Manager::OutputNBest(OutputCollector *collector) const
OutputNBestList(collector, *completed_nbest_, m_source.GetTranslationId());
}
void Manager::OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const
void
Manager::
OutputNBestList(OutputCollector *collector,
std::vector<search::Applied> const& nbest,
long translationId) const
{
const StaticData &staticData = StaticData::Instance();
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
const std::vector<Moses::FactorType> &outputFactorOrder
= staticData.GetOutputFactorOrder();
std::ostringstream out;
// wtf? copied from the original OutputNBestList
@ -332,18 +337,21 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector<sear
}
Phrase outputPhrase;
ScoreComponentCollection features;
for (std::vector<search::Applied>::const_iterator i = nbest.begin(); i != nbest.end(); ++i) {
for (std::vector<search::Applied>::const_iterator i = nbest.begin();
i != nbest.end(); ++i) {
Incremental::PhraseAndFeatures(*i, outputPhrase, features);
// <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
"Output phrase should have contained at least 2 words "
<< "(beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| ";
features.OutputAllFeatureScores(out);
bool with_labels = options().nbest.include_feature_labels;
features.OutputAllFeatureScores(out, with_labels);
out << " ||| " << i->GetScore() << '\n';
}
out << std::flush;
@ -351,7 +359,9 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector<sear
collector->Write(translationId, out.str());
}
void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const
void
Manager::
OutputDetailedTranslationReport(OutputCollector *collector) const
{
if (collector && !completed_nbest_->empty()) {
const search::Applied &applied = completed_nbest_->at(0);
@ -498,7 +508,7 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied
if (collector == NULL) return;
std::ostringstream out;
FixPrecision(out);
if (StaticData::Instance().GetOutputHypoScore()) {
if (options().output.ReportHypoScore) {
out << applied.GetScore() << ' ';
}
Phrase outPhrase;
@ -515,10 +525,12 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied
VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl);
}
void Manager::OutputBestNone(OutputCollector *collector, long translationId) const
void
Manager::
OutputBestNone(OutputCollector *collector, long translationId) const
{
if (collector == NULL) return;
if (StaticData::Instance().GetOutputHypoScore()) {
if (options().output.ReportHypoScore) {
collector->Write(translationId, "0 \n");
} else {
collector->Write(translationId, "\n");

View File

@ -1,5 +1,4 @@
// -*- c++ -*-
// $Id$
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
// vim:tabstop=2
/***********************************************************************
@ -31,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "ReorderingConstraint.h"
#include "NonTerminal.h"
#include "Range.h"
#include "parameters/AllOptions.h"
namespace Moses
{
@ -184,7 +184,10 @@ public:
}
//! populate this InputType with data from in stream
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder) =0;
virtual int
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts) =0;
//! Output debugging info to stream out
virtual void Print(std::ostream&) const =0;

View File

@ -1,4 +1,5 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
@ -25,14 +26,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "n_gram.h"
#include "lmContainer.h"
// should be defined in lmContainer.h, if the version of IRSTLM used provides
// context-dependent functionality
#ifndef _IRSTLM_LMCONTEXTDEPENDENT
#define _IRSTLM_LMCONTEXTDEPENDENT 5
#else
#define IRSTLM_CONTEXT_DEPENDENT
#endif
using namespace irstlm;
#include "IRST.h"
@ -67,10 +60,9 @@ public:
}
};
LanguageModelIRST::
LanguageModelIRST(const std::string &line)
: LanguageModelSingleFactor(line)
, m_lmtb_dub(0), m_lmtb_size(0)
LanguageModelIRST::LanguageModelIRST(const std::string &line)
:LanguageModelSingleFactor(line)
,m_lmtb_dub(0), m_lmtb_size(0)
{
const StaticData &staticData = StaticData::Instance();
int threadCount = staticData.ThreadCount();
@ -86,9 +78,9 @@ LanguageModelIRST(const std::string &line)
VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_size:|" << m_lmtb_size << "|" << std::endl);
}
LanguageModelIRST::
~LanguageModelIRST()
LanguageModelIRST::~LanguageModelIRST()
{
#ifndef WIN32
TRACE_ERR( "reset mmap\n");
if (m_lmtb) m_lmtb->reset_mmap();
@ -98,17 +90,13 @@ LanguageModelIRST::
}
bool
LanguageModelIRST::
IsUseable(const FactorMask &mask) const
bool LanguageModelIRST::IsUseable(const FactorMask &mask) const
{
bool ret = mask[m_factorType];
return ret;
}
void
LanguageModelIRST::
Load()
void LanguageModelIRST::Load()
{
FactorCollection &factorCollection = FactorCollection::Instance();
@ -135,9 +123,7 @@ Load()
if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);
}
void
LanguageModelIRST::
CreateFactors(FactorCollection &factorCollection)
void LanguageModelIRST::CreateFactors(FactorCollection &factorCollection)
{
// add factors which have srilm id
// code copied & paste from SRI LM class. should do template function
@ -179,23 +165,17 @@ CreateFactors(FactorCollection &factorCollection)
}
}
int
LanguageModelIRST::
GetLmID( const std::string &str ) const
int LanguageModelIRST::GetLmID( const std::string &str ) const
{
return d->encode( str.c_str() ); // at the level of micro tags
}
int
LanguageModelIRST::
GetLmID( const Word &word ) const
int LanguageModelIRST::GetLmID( const Word &word ) const
{
return GetLmID( word.GetFactor(m_factorType) );
}
int
LanguageModelIRST::
GetLmID( const Factor *factor ) const
int LanguageModelIRST::GetLmID( const Factor *factor ) const
{
size_t factorId = factor->GetId();
@ -216,21 +196,21 @@ GetLmID( const Factor *factor ) const
///////////
///OLD PROBLEM - SOLVED
////////////
/// IL PPROBLEMA ERA QUI
/// m_lmIdLookup.push_back(code);
/// PERCHE' USANDO PUSH_BACK IN REALTA' INSEREVIVAMO L'ELEMENTO NUOVO
/// IN POSIZIONE (factorID-1) invece che in posizione factrID dove dopo andiamo a leggerlo (vedi caso C
/// Cosi' funziona ....
/// ho un dubbio su cosa c'e' nelle prime posizioni di m_lmIdLookup
/// quindi
/// e scopro che rimane vuota una entry ogni due
/// perche' factorID cresce di due in due (perche' codifica sia source che target) "vuota" la posizione (factorID-1)
/// non da problemi di correttezza, ma solo di "spreco" di memoria
/// potremmo sostituirerendere m_lmIdLookup una std:map invece che un std::vector,
/// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori
/// a te la scelta!!!!
////////////////
////////////
/// IL PPROBLEMA ERA QUI
/// m_lmIdLookup.push_back(code);
/// PERCHE' USANDO PUSH_BACK IN REALTA' INSEREVIVAMO L'ELEMENTO NUOVO
/// IN POSIZIONE (factorID-1) invece che in posizione factrID dove dopo andiamo a leggerlo (vedi caso C
/// Cosi' funziona ....
/// ho un dubbio su cosa c'e' nelle prime posizioni di m_lmIdLookup
/// quindi
/// e scopro che rimane vuota una entry ogni due
/// perche' factorID cresce di due in due (perche' codifica sia source che target) "vuota" la posizione (factorID-1)
/// non da problemi di correttezza, ma solo di "spreco" di memoria
/// potremmo sostituirerendere m_lmIdLookup una std:map invece che un std::vector,
/// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori
/// a te la scelta!!!!
////////////////
if (factorId >= m_lmIdLookup.size()) {
@ -251,34 +231,21 @@ GetLmID( const Factor *factor ) const
}
}
FFState const*
LanguageModelIRST::
EmptyHypothesisState(const InputType &/*input*/) const
const FFState* LanguageModelIRST::EmptyHypothesisState(const InputType &/*input*/) const
{
std::auto_ptr<IRSTLMState> ret(new IRSTLMState());
return ret.release();
}
void
LanguageModelIRST::
CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
void LanguageModelIRST::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
{
bool isContextAdaptive
= m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
fullScore = 0;
ngramScore = 0;
oovCount = 0;
if ( !phrase.GetSize() ) return;
//get the context_weight map here
SPTR<std::map<std::string, float> const> CW;
if (isContextAdaptive && phrase.HasScope()) {
CW = phrase.GetScope()->GetContextWeights();
}
int _min = min(m_lmtb_size - 1, (int) phrase.GetSize());
int codes[m_lmtb_size];
@ -289,78 +256,36 @@ CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oov
char* msp = NULL;
float before_boundary = 0.0;
#ifdef IRSTLM_CONTEXT_DEPENDENT
if (CW) {
for (; position < _min; ++position) {
codes[idx] = GetLmID(phrase.GetWord(position));
if (codes[idx] == m_unknownId) ++oovCount;
before_boundary += m_lmtb->clprob(codes,idx+1,*CW,NULL,NULL,&msp);
++idx;
}
} else {
#endif
for (; position < _min; ++position) {
codes[idx] = GetLmID(phrase.GetWord(position));
if (codes[idx] == m_unknownId) ++oovCount;
before_boundary += m_lmtb->clprob(codes,idx+1,NULL,NULL,&msp);
++idx;
}
#ifdef IRSTLM_CONTEXT_DEPENDENT
for (; position < _min; ++position) {
codes[idx] = GetLmID(phrase.GetWord(position));
if (codes[idx] == m_unknownId) ++oovCount;
before_boundary += m_lmtb->clprob(codes,idx+1,NULL,NULL,&msp);
++idx;
}
#endif
ngramScore = 0.0;
int end_loop = (int) phrase.GetSize();
#ifdef IRSTLM_CONTEXT_DEPENDENT
if (CW) {
for (; position < end_loop; ++position) {
for (idx = 1; idx < m_lmtb_size; ++idx) {
codes[idx-1] = codes[idx];
}
codes[idx-1] = GetLmID(phrase.GetWord(position));
if (codes[idx-1] == m_unknownId) ++oovCount;
ngramScore += m_lmtb->clprob(codes,idx,*CW,NULL,NULL,&msp);
for (; position < end_loop; ++position) {
for (idx = 1; idx < m_lmtb_size; ++idx) {
codes[idx-1] = codes[idx];
}
} else {
#endif
for (; position < end_loop; ++position) {
for (idx = 1; idx < m_lmtb_size; ++idx) {
codes[idx-1] = codes[idx];
}
codes[idx-1] = GetLmID(phrase.GetWord(position));
if (codes[idx-1] == m_unknownId) ++oovCount;
ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp);
}
#ifdef IRSTLM_CONTEXT_DEPENDENT
codes[idx-1] = GetLmID(phrase.GetWord(position));
if (codes[idx-1] == m_unknownId) ++oovCount;
ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp);
}
#endif
before_boundary = TransformLMScore(before_boundary);
ngramScore = TransformLMScore(ngramScore);
fullScore = ngramScore + before_boundary;
}
FFState*
LanguageModelIRST::
EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
ScoreComponentCollection *out) const
FFState* LanguageModelIRST::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
{
bool isContextAdaptive
= m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
if (!hypo.GetCurrTargetLength()) {
std::auto_ptr<IRSTLMState> ret(new IRSTLMState(ps));
return ret.release();
}
//get the context_weight map here
SPTR<std::map<std::string, float> const> CW;
if (isContextAdaptive) {
ttasksptr ttask = hypo.GetManager().GetTtask();
if (ttask) CW = ttask->GetScope()->GetContextWeights();
}
//[begin, end) in STL-like fashion.
const int begin = (const int) hypo.GetCurrTargetWordsRange().GetStartPos();
const int end = (const int) hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
@ -383,34 +308,18 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
}
char* msp = NULL;
float score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
position = (const int) begin+1;
float score;
#ifdef IRSTLM_CONTEXT_DEPENDENT
if (CW) {
score = m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
while (position < adjust_end) {
for (idx=1; idx<m_lmtb_size; idx++) {
codes[idx-1] = codes[idx];
}
codes[idx-1] = GetLmID(hypo.GetWord(position));
score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
++position;
while (position < adjust_end) {
for (idx=1; idx<m_lmtb_size; idx++) {
codes[idx-1] = codes[idx];
}
} else {
#endif
score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
position = (const int) begin+1;
while (position < adjust_end) {
for (idx=1; idx<m_lmtb_size; idx++) {
codes[idx-1] = codes[idx];
}
codes[idx-1] = GetLmID(hypo.GetWord(position));
score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
++position;
}
#ifdef IRSTLM_CONTEXT_DEPENDENT
codes[idx-1] = GetLmID(hypo.GetWord(position));
score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
++position;
}
#endif
//adding probability of having sentenceEnd symbol, after this phrase;
//this could happen only when all source words are covered
if (hypo.IsSourceCompleted()) {
@ -427,13 +336,8 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
codes[idx] = m_lmtb_sentenceStart;
--idx;
}
#ifdef IRSTLM_CONTEXT_DEPENDENT
if (CW) score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
else
#else
score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
#endif
} else {
} else {
// need to set the LM state
if (adjust_end < end) { //the LMstate of this target phrase refers to the last m_lmtb_size-1 words
@ -454,9 +358,7 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
return ret.release();
}
LMResult
LanguageModelIRST::
GetValue(const vector<const Word*> &contextFactor, State* finalState) const
LMResult LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
{
// set up context
size_t count = contextFactor.size();
@ -492,8 +394,7 @@ GetValue(const vector<const Word*> &contextFactor, State* finalState) const
return result;
}
bool
LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold)
bool LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold)
{
if (sentences_done==-1) return true;
if (m_lmcache_cleanup_threshold)
@ -510,9 +411,7 @@ void LanguageModelIRST::InitializeForInput(ttasksptr const& ttask)
#endif
}
void
LanguageModelIRST::
CleanUpAfterSentenceProcessing(const InputType& source)
void LanguageModelIRST::CleanUpAfterSentenceProcessing(const InputType& source)
{
const StaticData &staticData = StaticData::Instance();
static int sentenceCount = 0;
@ -526,9 +425,7 @@ CleanUpAfterSentenceProcessing(const InputType& source)
}
}
void
LanguageModelIRST::
SetParameter(const std::string& key, const std::string& value)
void LanguageModelIRST::SetParameter(const std::string& key, const std::string& value)
{
if (key == "dub") {
m_lmtb_dub = Scan<unsigned int>(value);

View File

@ -1,4 +1,3 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
// $Id$
/***********************************************************************
@ -92,20 +91,17 @@ public:
void Load();
const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
protected:
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
public:
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
/*
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const;
*/
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
/*
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const;
*/
void InitializeForInput(ttasksptr const& ttask);
void CleanUpAfterSentenceProcessing(const InputType& source);

View File

@ -1,6 +1,5 @@
// $Id$
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
@ -49,6 +48,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/mbr.h"
#include "moses/LatticeMBR.h"
#include <boost/foreach.hpp>
#ifdef HAVE_PROTOBUF
#include "hypergraph.pb.h"
#include "rule.pb.h"
@ -98,6 +99,10 @@ Manager::GetSource() const
*/
void Manager::Decode()
{
std::cerr << options().nbest.nbest_size << " "
<< options().nbest.enabled << " " << std::endl;
// initialize statistics
ResetSentenceStats(m_source);
IFVERBOSE(2) {
@ -123,7 +128,8 @@ void Manager::Decode()
// some reporting on how long this took
IFVERBOSE(1) {
GetSentenceStats().StopTimeCollectOpts();
TRACE_ERR("Line "<< m_source.GetTranslationId() << ": Collecting options took "
TRACE_ERR("Line "<< m_source.GetTranslationId()
<< ": Collecting options took "
<< GetSentenceStats().GetTimeCollectOpts() << " seconds at "
<< __FILE__ << ":" << __LINE__ << endl);
}
@ -1112,11 +1118,13 @@ void Manager::OutputSearchGraphAsSLF(long translationId, std::ostream &outputSea
}
void OutputSearchNode(long translationId, std::ostream &outputSearchGraphStream,
const SearchGraphNode& searchNode)
void
OutputSearchNode(AllOptions const& opts, long translationId,
std::ostream &outputSearchGraphStream,
SearchGraphNode const& searchNode)
{
const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
bool extendedFormat = StaticData::Instance().GetOutputSearchGraphExtended();
bool extendedFormat = opts.output.SearchGraphExtended.size();
outputSearchGraphStream << translationId;
// special case: initial hypothesis
@ -1369,24 +1377,32 @@ void Manager::SerializeSearchGraphPB(
}
#endif
void Manager::OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const
void
Manager::
OutputSearchGraph(long translationId, std::ostream &out) const
{
vector<SearchGraphNode> searchGraph;
GetSearchGraph(searchGraph);
for (size_t i = 0; i < searchGraph.size(); ++i) {
OutputSearchNode(translationId,outputSearchGraphStream,searchGraph[i]);
OutputSearchNode(options(),translationId,out,searchGraph[i]);
}
}
void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, set< const Hypothesis* > >* pOutgoingHyps, vector< float>* pFwdBwdScores) const
void
Manager::
GetForwardBackwardSearchGraph
( std::map< int, bool >* pConnected,
std::vector<Hypothesis const* >* pConnectedList,
std::map<Hypothesis const*, set<Hypothesis const*> >* pOutgoingHyps,
vector< float>* pFwdBwdScores) const
{
std::map < int, bool > &connected = *pConnected;
std::vector< const Hypothesis *>& connectedList = *pConnectedList;
std::map < int, int > forward;
std::map < int, double > forwardScore;
std::map < const Hypothesis*, set <const Hypothesis*> > & outgoingHyps = *pOutgoingHyps;
std::map < const Hypothesis*, set <const Hypothesis*> > & outgoingHyps
= *pOutgoingHyps;
vector< float> & estimatedScores = *pFwdBwdScores;
// *** find connected hypotheses ***
@ -1395,7 +1411,8 @@ void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
// ** compute best forward path for each hypothesis *** //
// forward cost of hypotheses on final stack is 0
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
const std::vector < HypothesisStack* > &hypoStackColl
= m_search->GetHypothesisStacks();
const HypothesisStack &finalStack = *hypoStackColl.back();
HypothesisStack::const_iterator iterHypo;
for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
@ -1504,34 +1521,34 @@ void Manager::OutputBest(OutputCollector *collector) const
if (!options().mbr.enabled) {
bestHypo = GetBestHypothesis();
if (bestHypo) {
if (StaticData::Instance().GetOutputHypoScore()) {
if (options().output.ReportHypoScore) {
out << bestHypo->GetTotalScore() << ' ';
}
if (staticData.IsPathRecoveryEnabled()) {
if (options().output.RecoverPath) {
bestHypo->OutputInput(out);
out << "||| ";
}
const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id");
if (params && params->size() && Scan<bool>(params->at(0)) ) {
out << translationId << " ";
}
// const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id");
if (options().output.PrintID) {
out << translationId << " ";
}
// VN : I put back the code for OutputPassthroughInformation
if (staticData.IsPassthroughEnabled()) {
OutputPassthroughInformation(out, bestHypo);
// VN : I put back the code for OutputPassthroughInformation
if (options().output.PrintPassThrough) {
OutputPassthroughInformation(out, bestHypo);
}
// end of add back
if (staticData.GetReportSegmentation() == 2) {
if (options().output.ReportSegmentation == 2) {
GetOutputLanguageModelOrder(out, bestHypo);
}
bestHypo->OutputBestSurface(
out,
staticData.GetOutputFactorOrder(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors());
if (staticData.PrintAlignmentInfo()) {
options().output.ReportSegmentation,
options().output.ReportAllFactors);
if (options().output.PrintAlignmentInfo) {
out << "||| ";
bestHypo->OutputAlignment(out);
}
@ -1572,8 +1589,9 @@ void Manager::OutputBest(OutputCollector *collector) const
} else {
//Lattice MBR decoding
vector<Word> mbrBestHypo = doLatticeMBR(*this,nBestList);
OutputBestHypo(mbrBestHypo, translationId, staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),out);
OutputBestHypo(mbrBestHypo, translationId,
options().output.ReportSegmentation,
options().output.ReportAllFactors, out);
IFVERBOSE(2) {
PrintUserTime("finished Lattice MBR decoding");
}
@ -1584,8 +1602,8 @@ void Manager::OutputBest(OutputCollector *collector) const
else if (options().search.consensus) {
const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList);
OutputBestHypo(conBestHypo, translationId,
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),out);
options().output.ReportSegmentation,
options().output.ReportAllFactors, out);
OutputAlignment(m_alignmentOut, conBestHypo);
IFVERBOSE(2) {
PrintUserTime("finished Consensus decoding");
@ -1596,8 +1614,8 @@ void Manager::OutputBest(OutputCollector *collector) const
else {
const TrellisPath &mbrBestHypo = doMBR(nBestList);
OutputBestHypo(mbrBestHypo, translationId,
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),out);
options().output.ReportSegmentation,
options().output.ReportAllFactors, out);
OutputAlignment(m_alignmentOut, mbrBestHypo);
IFVERBOSE(2) {
PrintUserTime("finished MBR decoding");
@ -1624,7 +1642,7 @@ void Manager::OutputNBest(OutputCollector *collector) const
long translationId = m_source.GetTranslationId();
if (options().lmbr.enabled) {
if (staticData.options().nbest.enabled) {
if (options().nbest.enabled) {
collector->Write(translationId, m_latticeNBestOut.str());
}
} else {
@ -1632,22 +1650,24 @@ void Manager::OutputNBest(OutputCollector *collector) const
ostringstream out;
CalcNBest(options().nbest.nbest_size, nBestList,
options().nbest.only_distinct);
OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(),
OutputNBest(out, nBestList,
staticData.GetOutputFactorOrder(),
m_source.GetTranslationId(),
staticData.GetReportSegmentation());
options().output.ReportSegmentation);
collector->Write(m_source.GetTranslationId(), out.str());
}
}
void Manager::OutputNBest(std::ostream& out
, const Moses::TrellisPathList &nBestList
, const std::vector<Moses::FactorType>& outputFactorOrder
, long translationId
, char reportSegmentation) const
void
Manager::
OutputNBest(std::ostream& out,
const Moses::TrellisPathList &nBestList,
const std::vector<Moses::FactorType>& outputFactorOrder,
long translationId, char reportSegmentation) const
{
const StaticData &staticData = StaticData::Instance();
NBestOptions const& nbo = staticData.options().nbest;
NBestOptions const& nbo = options().nbest;
bool reportAllFactors = nbo.include_all_factors;
bool includeSegmentation = nbo.include_segmentation;
bool includeWordAlignment = nbo.include_alignment_info;
@ -1661,12 +1681,14 @@ void Manager::OutputNBest(std::ostream& out
out << translationId << " ||| ";
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
OutputSurface(out, edge, outputFactorOrder, reportSegmentation,
reportAllFactors);
}
out << " |||";
// print scores with feature names
path.GetScoreBreakdown()->OutputAllFeatureScores(out);
bool with_labels = options().nbest.include_feature_labels;
path.GetScoreBreakdown()->OutputAllFeatureScores(out, with_labels);
// total
out << " ||| " << path.GetTotalScore();
@ -1704,7 +1726,7 @@ void Manager::OutputNBest(std::ostream& out
}
}
if (StaticData::Instance().IsPathRecoveryEnabled()) {
if (options().output.RecoverPath) {
out << " ||| ";
OutputInput(out, edges[0]);
}
@ -1719,8 +1741,11 @@ void Manager::OutputNBest(std::ostream& out
/***
* print surface factor only for the given phrase
*/
void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
void
Manager::
OutputSurface(std::ostream &out, const Hypothesis &edge,
const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Must specific at least 1 output factor");
@ -1788,26 +1813,33 @@ void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std
out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
scoreBreakdown.OutputAllFeatureScores(out);
bool with_labels = options().nbest.include_feature_labels;
scoreBreakdown.OutputAllFeatureScores(out, with_labels);
}
out << "| ";
}
}
void Manager::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const
void
Manager::
OutputAlignment(ostream &out, const AlignmentInfo &ai,
size_t sourceOffset, size_t targetOffset) const
{
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
AlignVec alignments = ai.GetSortedAlignments();
AlignVec alignments = ai.GetSortedAlignments(options().output.WA_SortOrder);
AlignVec::const_iterator it;
for (it = alignments.begin(); it != alignments.end(); ++it) {
const std::pair<size_t,size_t> &alignment = **it;
out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
out << alignment.first + sourceOffset << "-"
<< alignment.second + targetOffset << " ";
}
}
void Manager::OutputInput(std::ostream& os, const Hypothesis* hypo) const
void
Manager::
OutputInput(std::ostream& os, const Hypothesis* hypo) const
{
size_t len = hypo->GetInput().GetSize();
std::vector<const Phrase*> inp_phrases(len, 0);
@ -1851,8 +1883,10 @@ void Manager::OutputLatticeSamples(OutputCollector *collector) const
TrellisPathList latticeSamples;
ostringstream out;
CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
staticData.GetReportSegmentation());
OutputNBest(out,latticeSamples,
staticData.GetOutputFactorOrder(),
m_source.GetTranslationId(),
options().output.ReportSegmentation);
collector->Write(m_source.GetTranslationId(), out.str());
}
@ -1970,14 +2004,10 @@ void Manager::OutputSearchGraphSLF() const
long translationId = m_source.GetTranslationId();
// Output search graph in HTK standard lattice format (SLF)
bool slf = staticData.GetOutputSearchGraphSLF();
if (slf) {
std::string const& slf = options().output.SearchGraphSLF;
if (slf.size()) {
util::StringStream fileName;
string dir;
staticData.GetParameter().SetParameter<string>(dir, "output-search-graph-slf", "");
fileName << dir << "/" << translationId << ".slf";
fileName << slf << "/" << translationId << ".slf";
ofstream *file = new ofstream;
file->open(fileName.str().c_str());
if (file->is_open() && file->good()) {
@ -2045,7 +2075,11 @@ void Manager::OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*trans
out << endl;
}
void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, char reportSegmentation, bool reportAllFactors, std::ostream &out) const
void
Manager::
OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,
char reportSegmentation, bool reportAllFactors,
std::ostream &out) const
{
const std::vector<const Hypothesis *> &edges = path.GetEdges();
@ -2056,9 +2090,12 @@ void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationI
out << endl;
}
void Manager::OutputAlignment(std::ostringstream &out, const TrellisPath &path) const
void
Manager::
OutputAlignment(std::ostringstream &out, const TrellisPath &path) const
{
Hypothesis::OutputAlignment(out, path.GetEdges());
WordAlignmentSort waso = options().output.WA_SortOrder;
Hypothesis::OutputAlignment(out, path.GetEdges(), waso);
// Used by --alignment-output-file so requires endl
out << std::endl;
}

View File

@ -131,7 +131,7 @@ protected:
// nbest
mutable std::ostringstream m_latticeNBestOut;
mutable std::ostringstream m_alignmentOut;
public:
void OutputNBest(std::ostream& out
, const Moses::TrellisPathList &nBestList
, const std::vector<Moses::FactorType>& outputFactorOrder

View File

@ -39,16 +39,19 @@ MockHypothesisGuard
{
BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size());
std::vector<Moses::FactorType> factors(1,0);
m_sentence.reset(new Sentence(0, sourceSentence, &factors));
AllOptions const& opts = StaticData::Instance().options();
m_sentence.reset(new Sentence(0, sourceSentence, opts, &factors));
m_ttask = TranslationTask::create(m_sentence);
m_manager.reset(new Manager(m_ttask));
//Initial empty hypothesis
Bitmaps bitmaps(m_sentence.get()->GetSize(), m_sentence.get()->m_sourceCompleted);
Bitmaps bitmaps(m_sentence.get()->GetSize(),
m_sentence.get()->m_sourceCompleted);
m_manager->ResetSentenceStats(*m_sentence);
const Bitmap &initBitmap = bitmaps.GetInitialBitmap();
m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt, initBitmap);
m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt,
initBitmap);
//create the chain
vector<Alignment>::const_iterator ai = alignments.begin();
@ -56,7 +59,8 @@ MockHypothesisGuard
for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) {
Hypothesis* prevHypo = m_hypothesis;
Range range(ai->first,ai->second);
const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), range);
const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(),
range);
m_targetPhrases.push_back(TargetPhrase(NULL));
// m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL);

View File

@ -1620,6 +1620,13 @@ SetParameter<bool>(bool &parameter, std::string const& parameterName,
}
}
void
Parameter::
SetParameter(bool& var, std::string const& name)
{
SetParameter(var,name,false);
}
} // namespace

View File

@ -149,6 +149,20 @@ public:
}
}
void SetParameter(bool& var, std::string const& name);
bool SetBooleanSwitch(bool& val, std::string const name) {
// issues a warning if format is wrong
const PARAM_VEC *params = GetParam(name);
val = (params && params->size());
if (val && params->size() != 1)
{
TRACE_ERR("ERROR: wrong format for switch -" << name);
return false;
}
return true;
}
};
template<>

View File

@ -305,35 +305,38 @@ void ScoreComponentCollection::PlusEquals(const FeatureFunction* sp, const Score
}
}
void ScoreComponentCollection::OutputAllFeatureScores(std::ostream &out) const
void
ScoreComponentCollection::
OutputAllFeatureScores(std::ostream &out, bool with_labels) const
{
std::string lastName = "";
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) {
const StatefulFeatureFunction *ff = sff[i];
if (ff->IsTuneable()) {
OutputFeatureScores( out, ff, lastName );
OutputFeatureScores(out, ff, lastName, with_labels);
}
}
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
for( size_t i=0; i<slf.size(); i++ ) {
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
OutputFeatureScores( out, ff, lastName );
OutputFeatureScores(out, ff, lastName, with_labels);
}
}
}
void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
, const FeatureFunction *ff
, std::string &lastName ) const
void
ScoreComponentCollection::
OutputFeatureScores(std::ostream& out, FeatureFunction const* ff,
std::string &lastName, bool with_labels) const
{
const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.options().nbest.include_feature_labels;
// const StaticData &staticData = StaticData::Instance();
// bool labeledOutput = staticData.options().nbest.include_feature_labels;
// regular features (not sparse)
if (ff->HasTuneableComponents()) {
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
if( with_labels && lastName != ff->GetScoreProducerDescription() ) {
lastName = ff->GetScoreProducerDescription();
out << " " << lastName << "=";
}

View File

@ -433,10 +433,9 @@ public:
m_scores.merge(other.m_scores);
}
void OutputAllFeatureScores(std::ostream &out) const;
void OutputFeatureScores( std::ostream& out
, const Moses::FeatureFunction *ff
, std::string &lastName ) const;
void OutputAllFeatureScores(std::ostream &out, bool with_labels) const;
void OutputFeatureScores(std::ostream& out, Moses::FeatureFunction const* ff,
std::string &lastName, bool with_labels) const;
#ifdef MPI_ENABLE
public:

View File

@ -166,7 +166,8 @@ aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
void
Sentence::
init(string line, std::vector<FactorType> const& factorOrder)
init(string line, std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
{
using namespace std;
const StaticData &SD = StaticData::Instance();
@ -182,7 +183,8 @@ init(string line, std::vector<FactorType> const& factorOrder)
aux_interpret_dlt(line); // some poorly documented cache-based stuff
// if sentences is specified as "<passthrough tag1=""/>"
if (SD.IsPassthroughEnabled() || SD.options().nbest.include_passthrough) {
if (SD.options().output.PrintPassThrough ||
SD.options().nbest.include_passthrough) {
string pthru = PassthroughSGML(line,"passthrough");
this->SetPassthroughInformation(pthru);
}
@ -230,12 +232,14 @@ init(string line, std::vector<FactorType> const& factorOrder)
int
Sentence::
Read(std::istream& in,const std::vector<FactorType>& factorOrder)
Read(std::istream& in,
const std::vector<FactorType>& factorOrder,
AllOptions const& opts)
{
std::string line;
if (getline(in, line, '\n').eof())
return 0;
init(line, factorOrder);
init(line, factorOrder, opts);
return 1;
}
@ -366,12 +370,14 @@ CreateFromString(vector<FactorType> const& FOrder, string const& phraseString)
}
Sentence::
Sentence(size_t const transId, string const& stext,
Sentence(size_t const transId,
string const& stext,
AllOptions const& opts,
vector<FactorType> const* IFO)
: InputType(transId)
{
if (IFO) init(stext, *IFO);
else init(stext, StaticData::Instance().GetInputFactorOrder());
if (IFO) init(stext, *IFO, opts);
else init(stext, StaticData::Instance().GetInputFactorOrder(), opts);
}
}

View File

@ -1,6 +1,4 @@
// -*- c++ -*-
// $Id$
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
@ -28,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Word.h"
#include "Phrase.h"
#include "InputType.h"
#include "parameters/AllOptions.h"
namespace Moses
{
@ -66,7 +65,8 @@ protected:
public:
Sentence();
Sentence(size_t const transId, std::string const& stext,
std::vector<FactorType> const* IFO = NULL);
AllOptions const& opts,
std::vector<FactorType> const* IFO = NULL);
// Sentence(size_t const transId, std::string const& stext);
~Sentence();
@ -97,7 +97,10 @@ public:
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
virtual int
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
void Print(std::ostream& out) const;
TranslationOptionCollection*
@ -114,7 +117,8 @@ public:
void
init(std::string line, std::vector<FactorType> const& factorOrder);
init(std::string line, std::vector<FactorType> const& factorOrder,
AllOptions const& opts);
std::vector<std::map<std::string,std::string> > const&
GetDltMeta() const {

View File

@ -63,7 +63,7 @@ StaticData StaticData::s_instance;
StaticData::StaticData()
: m_sourceStartPosMattersForRecombination(false)
, m_requireSortingAfterSourceContext(false)
, m_inputType(SentenceInput)
// , m_inputType(SentenceInput)
, m_lmEnableOOVFeature(false)
, m_isAlwaysCreateDirectTranslationOption(false)
, m_currentWeightSetting("default")
@ -132,23 +132,11 @@ StaticData
const PARAM_VEC *params;
// input type has to be specified BEFORE loading the phrase tables!
m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
// m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
m_parameter->SetParameter(m_continuePartialTranslation,
"continue-partial-translation", false );
std::string s_it = "text input";
if (m_inputType == 1) {
s_it = "confusion net";
}
if (m_inputType == 2) {
s_it = "word lattice";
}
if (m_inputType == 3) {
s_it = "tree";
}
VERBOSE(2,"input type is: "<<s_it<<"\n");
// use of xml in input
m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
@ -181,119 +169,30 @@ StaticData
m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
m_parameter->SetParameter(m_recoverPath, "recover-input-path", false);
if (m_recoverPath && m_inputType == SentenceInput) {
TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
m_recoverPath = false;
}
m_parameter->SetParameter(m_includeLHSInSearchGraph,
"include-lhs-in-search-graph", false );
m_parameter->SetParameter(m_outputHypoScore, "output-hypo-score", false );
m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false );
m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort);
params = m_parameter->GetParam("alignment-output-file");
if (params && params->size()) {
m_alignmentOutputFile = Scan<std::string>(params->at(0));
}
m_parameter->SetParameter( m_PrintID, "print-id", false );
m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false );
params = m_parameter->GetParam("output-word-graph");
m_outputWordGraph = (params && params->size() == 2);
params = m_parameter->GetParam("output-search-graph");
if (params && params->size()) {
if (params->size() != 1) {
std::cerr << "ERROR: wrong format for switch -output-search-graph file";
return false;
}
m_outputSearchGraph = true;
}
// ... in extended format
else if (m_parameter->GetParam("output-search-graph-extended") &&
m_parameter->GetParam("output-search-graph-extended")->size()) {
if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) {
std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file";
return false;
}
m_outputSearchGraph = true;
m_outputSearchGraphExtended = true;
} else {
m_outputSearchGraph = false;
}
params = m_parameter->GetParam("output-search-graph-slf");
if (params && params->size()) {
m_outputSearchGraphSLF = true;
} else {
m_outputSearchGraphSLF = false;
}
params = m_parameter->GetParam("output-search-graph-hypergraph");
if (params && params->size()) {
m_outputSearchGraphHypergraph = true;
} else {
m_outputSearchGraphHypergraph = false;
}
#ifdef HAVE_PROTOBUF
params = m_parameter->GetParam("output-search-graph-pb");
if (params && params->size()) {
if (params->size() != 1) {
cerr << "ERROR: wrong format for switch -output-search-graph-pb path";
return false;
}
m_outputSearchGraphPB = true;
} else
m_outputSearchGraphPB = false;
#endif
m_parameter->SetParameter( m_unprunedSearchGraph, "unpruned-search-graph", false );
m_parameter->SetParameter( m_includeLHSInSearchGraph, "include-lhs-in-search-graph", false );
m_parameter->SetParameter<string>(m_outputUnknownsFile, "output-unknowns", "");
// printing source phrase spans
m_parameter->SetParameter( m_reportSegmentation, "report-segmentation", false );
m_parameter->SetParameter( m_reportSegmentationEnriched, "report-segmentation-enriched", false );
// print all factors of output translations
m_parameter->SetParameter( m_reportAllFactors, "report-all-factors", false );
m_parameter->SetParameter<string>(m_outputUnknownsFile,
"output-unknowns", "");
//Print Translation Options
m_parameter->SetParameter(m_printTranslationOptions, "print-translation-option", false );
m_parameter->SetParameter(m_printTranslationOptions,
"print-translation-option", false );
//Print All Derivations
m_parameter->SetParameter(m_printAllDerivations , "print-all-derivations", false );
m_parameter->SetParameter(m_printAllDerivations ,
"print-all-derivations", false );
// additional output
m_parameter->SetParameter<string>(m_detailedTranslationReportingFilePath,
"translation-details", "");
m_parameter->SetParameter<string>(m_detailedTreeFragmentsTranslationReportingFilePath,
"tree-translation-details", "");
m_parameter->SetParameter<string>(m_detailedAllTranslationReportingFilePath,
"translation-all-details", "");
m_parameter->SetParameter<long>(m_startTranslationId, "start-translation-id", 0);
m_parameter->SetParameter<long>(m_startTranslationId,
"start-translation-id", 0);
//lattice samples
params = m_parameter->GetParam("lattice-samples");
if (params) {
if (params->size() ==2 ) {
m_latticeSamplesFilePath = params->at(0);
m_latticeSamplesSize = Scan<size_t>(params->at(1));
} else {
std::cerr <<"wrong format for switch -lattice-samples file size";
return false;
}
} else {
m_latticeSamplesSize = 0;
}
return true;
}
void
StaticData
::ini_compact_table_options()
StaticData::
ini_compact_table_options()
{
// Compact phrase table and reordering model
m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false );
@ -301,8 +200,8 @@ StaticData
}
void
StaticData
::ini_lm_options()
StaticData::
ini_lm_options()
{
m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1);
}
@ -349,8 +248,8 @@ StaticData
}
void
StaticData
::ini_factor_maps()
StaticData::
ini_factor_maps()
{
const PARAM_VEC *params;
// factor delimiter
@ -380,8 +279,8 @@ StaticData
}
void
StaticData
::ini_oov_options()
StaticData::
ini_oov_options()
{
// unknown word processing
m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false );
@ -398,8 +297,8 @@ StaticData
}
void
StaticData
::ini_zombie_options()
StaticData::
ini_zombie_options()
{
//Disable discarding
m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false);
@ -434,20 +333,6 @@ bool StaticData::LoadData(Parameter *parameter)
// search
ini_oov_options();
// set m_nbest_options.enabled = true if necessary:
if (m_options.mbr.enabled
|| m_options.mira
|| m_options.search.consensus
|| m_outputSearchGraph
|| m_outputSearchGraphSLF
|| m_outputSearchGraphHypergraph
#ifdef HAVE_PROTOBUF
|| m_outputSearchGraphPB
#endif
|| m_latticeSamplesFilePath.size()) {
m_options.nbest.enabled = true;
}
// S2T decoder
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
RecursiveCYKPlus);
@ -455,7 +340,8 @@ bool StaticData::LoadData(Parameter *parameter)
ini_zombie_options(); // probably dead, or maybe not
m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", NOT_FOUND);
m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor",
NOT_FOUND);
// FEATURE FUNCTION INITIALIZATION HAPPENS HERE ===============================
initialize_features();
@ -507,7 +393,8 @@ void StaticData::SetWeight(const FeatureFunction* sp, float weight)
m_allWeights.Assign(sp,weight);
}
void StaticData::SetWeights(const FeatureFunction* sp, const std::vector<float>& weights)
void StaticData::SetWeights(const FeatureFunction* sp,
const std::vector<float>& weights)
{
m_allWeights.Resize();
m_allWeights.Assign(sp,weights);
@ -557,8 +444,10 @@ void StaticData::LoadChartDecodingParameters()
LoadNonTerminals();
// source label overlap
m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap", SourceLabelOverlapAdd);
m_parameter->SetParameter(m_ruleLimit, "rule-limit", DEFAULT_MAX_TRANS_OPT_SIZE);
m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap",
SourceLabelOverlapAdd);
m_parameter->SetParameter(m_ruleLimit, "rule-limit",
DEFAULT_MAX_TRANS_OPT_SIZE);
}
@ -596,12 +485,16 @@ void StaticData::LoadDecodeGraphs()
}
}
void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const vector<size_t> &maxChartSpans)
void
StaticData::
LoadDecodeGraphsOld(const vector<string> &mappingVector,
const vector<size_t> &maxChartSpans)
{
const vector<PhraseDictionary*>& pts = PhraseDictionary::GetColl();
const vector<GenerationDictionary*>& gens = GenerationDictionary::GetColl();
const std::vector<FeatureFunction*> *featuresRemaining = &FeatureFunction::GetFeatureFunctions();
const std::vector<FeatureFunction*> *featuresRemaining
= &FeatureFunction::GetFeatureFunctions();
DecodeStep *prev = 0;
size_t prevDecodeGraphInd = 0;
@ -620,7 +513,8 @@ void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const
// For specifying multiple translation model
decodeGraphInd = Scan<size_t>(token[0]);
//the vectorList index can only increment by one
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1,
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd
&& decodeGraphInd != prevDecodeGraphInd + 1,
"Malformed mapping");
if (decodeGraphInd > prevDecodeGraphInd) {
prev = NULL;
@ -707,7 +601,8 @@ void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVect
decodeGraphInd = Scan<size_t>(token[0]);
//the vectorList index can only increment by one
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1,
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd
&& decodeGraphInd != prevDecodeGraphInd + 1,
"Malformed mapping");
if (decodeGraphInd > prevDecodeGraphInd) {
prev = NULL;
@ -783,17 +678,6 @@ void StaticData::ReLoadBleuScoreFeatureParameter(float weight)
void StaticData::SetExecPath(const std::string &path)
{
/*
namespace fs = boost::filesystem;
fs::path full_path( fs::initial_path<fs::path>() );
full_path = fs::system_complete( fs::path( path ) );
//Without file name
m_binPath = full_path.parent_path().string();
*/
// NOT TESTED
size_t pos = path.rfind("/");
if (pos != string::npos) {
@ -810,34 +694,33 @@ const string &StaticData::GetBinDirectory() const
float StaticData::GetWeightWordPenalty() const
{
float weightWP = GetWeight(&WordPenaltyProducer::Instance());
//VERBOSE(1, "Read weightWP from translation sytem: " << weightWP << std::endl);
return weightWP;
}
void
StaticData
::InitializeForInput(ttasksptr const& ttask) const
StaticData::
InitializeForInput(ttasksptr const& ttask) const
{
const std::vector<FeatureFunction*> &producers
= FeatureFunction::GetFeatureFunctions();
= FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) {
FeatureFunction &ff = *producers[i];
if (! IsFeatureFunctionIgnored(ff)) {
Timer iTime;
iTime.start();
ff.InitializeForInput(ttask);
VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription() << " )"
<< "= " << iTime << endl);
VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription()
<< " )" << "= " << iTime << endl);
}
}
}
void
StaticData
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
StaticData::
CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
{
const std::vector<FeatureFunction*> &producers
= FeatureFunction::GetFeatureFunctions();
= FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) {
FeatureFunction &ff = *producers[i];
if (! IsFeatureFunctionIgnored(ff)) {

View File

@ -82,9 +82,6 @@ protected:
// Initial = 0 = can be used when creating poss trans
// Other = 1 = used to calculate LM score once all steps have been processed
float
// m_beamWidth,
// m_earlyDiscardingThreshold,
// m_translationOptionThreshold,
m_wordDeletionWeight;
@ -94,15 +91,9 @@ protected:
// -ve = no limit on distortion
// 0 = no disortion (monotone in old pharaoh)
bool m_reorderingConstraint; //! use additional reordering constraints
// bool m_useEarlyDistortionCost;
// size_t m_maxHypoStackSize; //! hypothesis-stack size that triggers pruning
// size_t m_minHypoStackDiversity; //! minimum number of hypothesis in stack for each source word coverage;
BookkeepingOptions m_bookkeeping_options;
size_t m_latticeSamplesSize;
// size_t m_maxNoTransOptPerCoverage;
// size_t m_maxNoPartTransOpt;
// size_t m_maxPhraseLength;
std::string m_latticeSamplesFilePath;
bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them
@ -116,48 +107,31 @@ protected:
bool m_printTranslationOptions;
bool m_sourceStartPosMattersForRecombination;
bool m_recoverPath;
bool m_outputHypoScore;
// bool m_recoverPath;
// bool m_outputHypoScore;
bool m_requireSortingAfterSourceContext;
// SearchAlgorithm m_searchAlgorithm;
InputTypeEnum m_inputType;
// InputTypeEnum m_inputType;
mutable size_t m_verboseLevel;
bool m_reportSegmentation;
bool m_reportSegmentationEnriched;
bool m_reportAllFactors;
std::string m_detailedTranslationReportingFilePath;
std::string m_detailedTreeFragmentsTranslationReportingFilePath;
std::string m_detailedAllTranslationReportingFilePath;
bool m_PrintAlignmentInfo;
bool m_PrintID;
bool m_PrintPassthroughInformation;
std::string m_alignmentOutputFile;
// bool m_reportSegmentation;
// bool m_reportSegmentationEnriched;
// bool m_reportAllFactors;
// std::string m_detailedTranslationReportingFilePath;
// std::string m_detailedTreeFragmentsTranslationReportingFilePath;
// std::string m_detailedAllTranslationReportingFilePath;
// bool m_PrintAlignmentInfo;
// bool m_PrintID;
// bool m_PrintPassthroughInformation;
// std::string m_alignmentOutputFile;
std::string m_factorDelimiter; //! by default, |, but it can be changed
XmlInputType m_xmlInputType; //! method for handling sentence XML input
std::pair<std::string,std::string> m_xmlBrackets; //! strings to use as XML tags' opening and closing brackets. Default are "<" and ">"
// bool m_mbr; //! use MBR decoder
// bool m_useLatticeMBR; //! use MBR decoder
// bool m_mira; // do mira training
// bool m_useConsensusDecoding; //! Use Consensus decoding (DeNero et al 2009)
// size_t m_mbrSize; //! number of translation candidates considered
// float m_mbrScale; //! scaling factor for computing marginal probability of candidate translation
// size_t m_lmbrPruning; //! average number of nodes per word wanted in pruned lattice
// std::vector<float> m_lmbrThetas; //! theta(s) for lattice mbr calculation
// bool m_useLatticeHypSetForLatticeMBR; //! to use nbest as hypothesis set during lattice MBR
// float m_lmbrPrecision; //! unigram precision theta - see Tromble et al 08 for more details
// float m_lmbrPRatio; //! decaying factor for ngram thetas - see Tromble et al 08 for more details
// float m_lmbrMapWeight; //! Weight given to the map solution. See Kumar et al 09 for details
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
bool m_lmEnableOOVFeature;
@ -167,15 +141,15 @@ protected:
bool m_isAlwaysCreateDirectTranslationOption;
//! constructor. only the 1 static variable can be created
bool m_outputWordGraph; //! whether to output word graph
bool m_outputSearchGraph; //! whether to output search graph
bool m_outputSearchGraphExtended; //! ... in extended format
bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF)
bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph
// bool m_outputWordGraph; //! whether to output word graph
// bool m_outputSearchGraph; //! whether to output search graph
// bool m_outputSearchGraphExtended; //! ... in extended format
// bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF)
// bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph
#ifdef HAVE_PROTOBUF
bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf
// bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf
#endif
bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only)
// bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only)
bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph
std::string m_outputUnknownsFile; //! output unknowns in this file
@ -190,7 +164,7 @@ protected:
Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
SourceLabelOverlap m_sourceLabelOverlap;
UnknownLHSList m_unknownLHS;
WordAlignmentSort m_wordAlignmentSort;
// WordAlignmentSort m_wordAlignmentSort;
int m_threadCount;
long m_startTranslationId;
@ -229,10 +203,6 @@ protected:
const StatefulFeatureFunction* m_treeStructure;
// number of nonterminal labels
// size_t m_nonTerminalSize;
void ini_compact_table_options();
void ini_consensus_decoding_options();
void ini_cube_pruning_options();
@ -278,7 +248,8 @@ public:
}
#endif
//! Load data into static instance. This function is required as LoadData() is not const
//! Load data into static instance. This function is required as
// LoadData() is not const
static bool LoadDataStatic(Parameter *parameter, const std::string &execPath);
//! Main function to load everything. Also initialize the Parameter object
@ -336,22 +307,6 @@ public:
bool IsWordDeletionEnabled() const {
return m_wordDeletionEnabled;
}
// size_t GetMaxHypoStackSize() const {
// return m_options.search.stack_size;
// }
// size_t GetMinHypoStackDiversity() const {
// return m_options.search.stack_diversity;
// }
size_t IsPathRecoveryEnabled() const {
return m_recoverPath;
}
bool IsIDEnabled() const {
return m_PrintID;
}
bool IsPassthroughEnabled() const {
return m_PrintPassthroughInformation;
}
int GetMaxDistortion() const {
return m_options.reordering.max_distortion;
@ -384,47 +339,6 @@ public:
void SetVerboseLevel(int x) const {
m_verboseLevel = x;
}
char GetReportSegmentation() const {
if (m_reportSegmentation) return 1;
if (m_reportSegmentationEnriched) return 2;
return 0;
}
void SetReportSegmentation(const int &val) {
if (val == 0)
m_reportSegmentation = m_reportSegmentationEnriched = false;
else if (val == 1)
m_reportSegmentation = true;
else if (val == 2)
m_reportSegmentationEnriched = true;
else
std::cerr << "Warning: Invalid value for reportSegmentation (0 - 2)! Ignoring";
}
bool GetReportAllFactors() const {
return m_reportAllFactors;
}
bool IsDetailedTranslationReportingEnabled() const {
return !m_detailedTranslationReportingFilePath.empty();
}
bool IsDetailedAllTranslationReportingEnabled() const {
return !m_detailedAllTranslationReportingFilePath.empty();
}
const std::string &GetDetailedTranslationReportingFilePath() const {
return m_detailedTranslationReportingFilePath;
}
bool IsDetailedTreeFragmentsTranslationReportingEnabled() const {
return !m_detailedTreeFragmentsTranslationReportingFilePath.empty();
}
const std::string &GetDetailedTreeFragmentsTranslationReportingFilePath() const {
return m_detailedTreeFragmentsTranslationReportingFilePath;
}
// bool IsLabeledNBestList() const {
// return m_options.nbest.include_feature_labels;
// }
bool UseMinphrInMemory() const {
return m_minphrMemory;
@ -434,19 +348,6 @@ public:
return m_minlexrMemory;
}
// for mert
// size_t GetNBestSize() const {
// return m_options.nbest.nbest_size;
// }
// const std::string &GetNBestFilePath() const {
// return m_options.nbest.output_file_path;
// }
// bool IsNBestEnabled() const {
// return m_options.nbest.enabled;
// }
size_t GetLatticeSamplesSize() const {
return m_latticeSamplesSize;
}
@ -455,22 +356,6 @@ public:
return m_latticeSamplesFilePath;
}
// size_t GetNBestFactor() const {
// return m_options.nbest.factor;
// }
bool GetOutputWordGraph() const {
return m_outputWordGraph;
}
//! Sets the global score vector weights for a given FeatureFunction.
InputTypeEnum GetInputType() const {
return m_inputType;
}
// SearchAlgorithm GetSearchAlgorithm() const {
// return m_searchAlgorithm;
// }
bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const {
if (algo == DefaultSearchAlgorithm)
algo = m_options.search.algo;
@ -577,33 +462,36 @@ public:
return m_lmEnableOOVFeature;
}
bool GetOutputSearchGraph() const {
return m_outputSearchGraph;
}
void SetOutputSearchGraph(bool outputSearchGraph) {
m_outputSearchGraph = outputSearchGraph;
}
bool GetOutputSearchGraphExtended() const {
return m_outputSearchGraphExtended;
}
bool GetOutputSearchGraphSLF() const {
return m_outputSearchGraphSLF;
}
bool GetOutputSearchGraphHypergraph() const {
return m_outputSearchGraphHypergraph;
}
#ifdef HAVE_PROTOBUF
bool GetOutputSearchGraphPB() const {
return m_outputSearchGraphPB;
}
#endif
// bool GetOutputSearchGraph() const {
// return m_outputSearchGraph;
// }
// void SetOutputSearchGraph(bool outputSearchGraph) {
// m_outputSearchGraph = outputSearchGraph;
// }
// bool GetOutputSearchGraphExtended() const {
// return m_outputSearchGraphExtended;
// }
// GetOutputSearchGraphSLF() const {
// return m_outputSearchGraphSLF;
// }
// bool GetOutputSearchGraphHypergraph() const {
// return m_outputSearchGraphHypergraph;
// }
// #ifdef HAVE_PROTOBUF
// bool GetOutputSearchGraphPB() const {
// return m_outputSearchGraphPB;
// }
// #endif
const std::string& GetOutputUnknownsFile() const {
return m_outputUnknownsFile;
}
bool GetUnprunedSearchGraph() const {
return m_unprunedSearchGraph;
}
// bool GetUnprunedSearchGraph() const {
// return m_unprunedSearchGraph;
// }
bool GetIncludeLHSInSearchGraph() const {
return m_includeLHSInSearchGraph;
@ -640,9 +528,9 @@ public:
return m_sourceLabelOverlap;
}
bool GetOutputHypoScore() const {
return m_outputHypoScore;
}
// bool GetOutputHypoScore() const {
// return m_outputHypoScore;
// }
size_t GetRuleLimit() const {
return m_ruleLimit;
}
@ -675,16 +563,16 @@ public:
return m_bookkeeping_options.need_alignment_info;
// return m_needAlignmentInfo;
}
const std::string &GetAlignmentOutputFile() const {
return m_alignmentOutputFile;
}
bool PrintAlignmentInfo() const {
return m_PrintAlignmentInfo;
}
// const std::string &GetAlignmentOutputFile() const {
// return m_alignmentOutputFile;
// }
// bool PrintAlignmentInfo() const {
// return m_PrintAlignmentInfo;
// }
WordAlignmentSort GetWordAlignmentSort() const {
return m_wordAlignmentSort;
}
// WordAlignmentSort GetWordAlignmentSort() const {
// return m_wordAlignmentSort;
// }
bool GetHasAlternateWeightSettings() const {
return m_weightSetting.size() > 0;

View File

@ -26,12 +26,12 @@ void Manager::OutputBest(OutputCollector *collector) const
const SHyperedge *best = GetBestSHyperedge();
if (best == NULL) {
VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
if (options().output.ReportHypoScore) {
out << "0 ";
}
out << '\n';
} else {
if (StaticData::Instance().GetOutputHypoScore()) {
if (options().output.ReportHypoScore) {
out << best->label.score << " ";
}
Phrase yield = GetOneBestTargetYield(*best);
@ -49,12 +49,10 @@ void Manager::OutputBest(OutputCollector *collector) const
void Manager::OutputNBest(OutputCollector *collector) const
{
if (collector) {
const StaticData &staticData = StaticData::Instance();
long translationId = m_source.GetTranslationId();
KBestExtractor::KBestVec nBestList;
ExtractKBest(staticData.options().nbest.nbest_size, nBestList,
staticData.options().nbest.only_distinct);
ExtractKBest(options().nbest.nbest_size, nBestList,
options().nbest.only_distinct);
OutputNBestList(collector, nBestList, translationId);
}
}
@ -111,7 +109,8 @@ void Manager::OutputNBestList(OutputCollector *collector,
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| ";
derivation.scoreBreakdown.OutputAllFeatureScores(out);
bool with_labels = options().nbest.include_feature_labels;
derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels);
out << " ||| " << derivation.score;
// optionally, print word alignments

View File

@ -66,7 +66,7 @@ template<typename RuleTrie>
TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase(
const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob)
{
const StaticData &staticData = StaticData::Instance();
const StaticData &SD = StaticData::Instance();
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
UnknownWordPenaltyProducer::Instance();
@ -82,8 +82,8 @@ TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase(
targetPhrase->EvaluateInIsolation(srcPhrase);
targetPhrase->SetTargetLHS(&targetLhs);
targetPhrase->SetAlignmentInfo("0-0");
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() ||
staticData.GetTreeStructure() != NULL) {
if (!SD.options().output.detailed_tree_transrep_filepath.empty() ||
SD.GetTreeStructure() != NULL) {
std::string value = "[ " + targetLhs[0]->GetString().as_string() + " " +
oov[0]->GetString().as_string() + " ]";
targetPhrase->SetProperty("Tree", value);

View File

@ -45,7 +45,11 @@ void TabbedSentence::CreateFromString(const std::vector<FactorType> &factorOrder
}
}
int TabbedSentence::Read(std::istream& in, const std::vector<FactorType>& factorOrder)
int
TabbedSentence::
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
{
TabbedColumns allColumns;
@ -58,14 +62,14 @@ int TabbedSentence::Read(std::istream& in, const std::vector<FactorType>& factor
if(allColumns.size() < 2) {
std::stringstream dummyStream;
dummyStream << line << std::endl;
return Sentence::Read(dummyStream, factorOrder);
return Sentence::Read(dummyStream, factorOrder, opts);
} else {
m_columns.resize(allColumns.size() - 1);
std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
std::stringstream dummyStream;
dummyStream << allColumns[0] << std::endl;
return Sentence::Read(dummyStream, factorOrder);
return Sentence::Read(dummyStream, factorOrder, opts);
}
}

View File

@ -67,7 +67,9 @@ public:
virtual void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::string &tabbedString);
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
virtual int
Read(std::istream& in,const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
const TabbedColumns& GetColumns() const {
return m_columns;

View File

@ -44,7 +44,7 @@ using namespace boost::algorithm;
namespace Moses
{
typename PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache;
PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache;
PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line)
:PhraseDictionary(line, true)

View File

@ -25,7 +25,7 @@ namespace Moses
{
boost::thread_specific_ptr<typename TargetPhraseCollectionCache::CacheMap>
boost::thread_specific_ptr<TargetPhraseCollectionCache::CacheMap>
TargetPhraseCollectionCache::m_phraseCache;
}

View File

@ -59,6 +59,18 @@ $(TOP)/moses/TranslationModel/UG//mmsapt
$(TOP)/util//kenutil
;
exe check-coverage :
check-coverage.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
$(TOP)/util//kenutil
;
exe sim-pe :
sim-pe.cc
$(TOP)/moses//moses

View File

@ -17,7 +17,7 @@ echo $$d
endef
MOSES_ROOT := $(shell $(find_moses_root))
$(info MOSES_ROOT=${MOSES_ROOT})
# ===============================================================================
# COMPILATION PREFERENCES
# ===============================================================================
@ -35,7 +35,9 @@ CXXFLAGS += -DMAX_NUM_FACTORS=4
CXXFLAGS += -DKENLM_MAX_ORDER=5
CXXFLAGS += -DWITH_THREADS
CXXFLAGS += -DNO_MOSES
CXXFLAGS += -I${MOSES_ROOT} -I.
CXXFLAGS += -DMMT
CXXFLAGS += -I$(dir ${MOSES_ROOT})mmt-only
CXXFLAGS += -I${MOSES_ROOT} -I. -I${MOSES_ROOT}/opt/include
ifeq ($(variant),debug)
CXXFLAGS += -ggdb -O0
@ -45,7 +47,7 @@ else ifeq ($(variant),syntax)
CXXFLAGS += -fsyntax-only
endif
# LDFLAGS = -L${MOSES_ROOT}/lib -L ./lib/
LDFLAGS = -L${MOSES_ROOT}/opt/lib64 -L./lib/
# WDIR = build/$(variant)/${HOSTTYPE}/${KERNEL}
WDIR = build/$(variant)
@ -60,14 +62,22 @@ nil:
# libraries required
LIBS = m z bz2 pthread dl ${BOOSTLIBS}
#LIBS += tcmalloc
BOOSTLIBS := thread system filesystem program_options iostreams
BOOSTLIBS := $(addprefix boost_,${BOOSTLIBS})
ifdef ($(BOOSTLIBTAG),"")
BOOSTLIBS := program_options iostreams thread system filesystem
BOOSTLIBS := $(addprefix -lboost_,${BOOSTLIBS})
ifeq ($(BOOSTLIBTAG),"")
BOOSTLIBS := $(addsuffix ${BOOSTLIBTAG},${BOOSTLIBS})
endif
STATIC_LIBS = m bz2 z dl rt
DYNAMIC_LIBS = pthread
#DYNAMIC_LIBS += tcmalloc
LIBS = -Wl,-B$(link)
LIBS += -L${MOSES_ROOT}/opt/lib64 ${BOOSTLIBS}
LIBS += $(addprefix -l,${STATIC_LIBS})
LIBS += -Wl,-Bdynamic
LIBS += $(addprefix -l,${DYNAMIC_LIBS})
cc2obj = $(addsuffix .o,$(patsubst ${MOSES_ROOT}%,$(WDIR)%,\
$(patsubst .%,$(WDIR)%,$(basename $1))))
cc2exe = $(addprefix ./bin/$(variant)/,$(basename $(notdir $1)))
@ -79,7 +89,7 @@ DEP += $(basename $(call cc2obj,$1)).d
$(call cc2obj,$1): $1
@echo -e "COMPILING $1"
@mkdir -p $$(@D)
@${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@
${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@
endef
@ -90,7 +100,7 @@ $(call cc2exe,$1): $(call cc2obj,$1) $(LIBOBJ)
ifneq ($(variant),syntax)
@echo -e "LINKING $$@"
@mkdir -p $${@D}
@${CXX} ${CXXFLAGS} -o $$@ $(LIBOBJ) $(addprefix -l,${LIBS}) $$<
${CXX} ${CXXFLAGS} -o $$@ $$< $(LIBOBJ) ${LIBS}
endif
endef
@ -106,7 +116,8 @@ skip += ug_splice_arglist.cc
# skip += ug_lexical_reordering.cc
# objects from elsewhere in the moses tree that are needed
extra = ${MOSES_ROOT}/util/exception.cc
extra = ${MOSES_ROOT}/util/exception.cc
extra += ${MOSES_ROOT}/util/integer_to_string.cc
$(foreach f,$(skip),$(eval broken+=$(shell find -name $f)))
broken += $(wildcard ./mm/stashed/*)

View File

@ -0,0 +1,81 @@
// #include "mmsapt.h"
// #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
// #include "moses/TranslationTask.h"
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <boost/tokenizer.hpp>
#include <boost/shared_ptr.hpp>
#include <algorithm>
#include <iostream>
#include "mm/ug_bitext.h"
#include "generic/file_io/ug_stream.h"
#include <string>
#include <sstream>
using namespace Moses;
using namespace sapt;
using namespace std;
using namespace boost;
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
typedef mmBitext<Token> bitext_t;
struct mycmp
{
bool operator() (pair<string,uint32_t> const& a,
pair<string,uint32_t> const& b) const
{
return a.second > b.second;
}
};
string
basename(string const path, string const suffix)
{
size_t p = path.find_last_of("/");
size_t k = path.size() - suffix.size();
cout << path << " " << suffix << endl;
cout << path.substr(0,p) << " " << path.substr(k) << endl;
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
}
int main(int argc, char* argv[])
{
bitext_t B;
B.open(argv[1],argv[2],argv[3]);
string line;
string ifile = argv[4];
string docname = basename(ifile, string(".") + argv[2] + ".gz");
boost::iostreams::filtering_istream in;
ugdiss::open_input_stream(ifile,in);
while(getline(in,line))
{
cout << line << " [" << docname << "]" << endl;
vector<id_type> snt;
B.V1->fillIdSeq(line,snt);
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B.I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
{
if (m.ca() > 500) continue;
sapt::tsa::ArrayEntry I(m.lower_bound(-1));
char const* stop = m.upper_bound(-1);
map<string,uint32_t> cnt;
while (I.next != stop)
{
m.root->readEntry(I.next,I);
++cnt[B.docname(I.sid)];
}
cout << setw(8) << int(m.ca()) << " " << B.V1->toString(&snt[i],&snt[k+1]) << endl;
typedef pair<string,uint32_t> entry;
vector<entry> ranked; ranked.reserve(cnt.size());
BOOST_FOREACH(entry const& e, cnt) ranked.push_back(e);
sort(ranked.begin(),ranked.end(),mycmp());
BOOST_FOREACH(entry const& e, ranked)
cout << setw(12) << " " << e.second << " " << e.first << endl;
cout << endl;
}
}
}
}

View File

@ -0,0 +1,67 @@
// for each word in the input, keep track of the longest matching ngram covering it
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <boost/tokenizer.hpp>
#include <boost/shared_ptr.hpp>
#include <algorithm>
#include <iostream>
#include "mm/ug_bitext.h"
#include "generic/file_io/ug_stream.h"
#include <string>
#include <sstream>
using namespace Moses;
using namespace sapt;
using namespace std;
using namespace boost;
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
typedef mmBitext<Token> bitext_t;
struct mycmp
{
bool operator() (pair<string,uint32_t> const& a,
pair<string,uint32_t> const& b) const
{
return a.second > b.second;
}
};
string
basename(string const path, string const suffix)
{
size_t p = path.find_last_of("/");
size_t k = path.size() - suffix.size();
cout << path << " " << suffix << endl;
cout << path.substr(0,p) << " " << path.substr(k) << endl;
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
}
int main(int argc, char* argv[])
{
bitext_t B;
B.open(argv[1],argv[2],argv[3]);
B.V1->setDynamic(true);
string line;
string ifile = argv[4];
string docname = basename(ifile, string(".") + argv[2] + ".gz");
boost::iostreams::filtering_istream in;
ugdiss::open_input_stream(ifile,in);
while(getline(in,line))
{
cout << line << " [" << docname << "]" << endl;
vector<id_type> snt;
B.V1->fillIdSeq(line,snt);
vector<size_t> match(snt.size(),0);
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B.I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k);
for (size_t j = 0; j < m.size(); ++j)
match[i+j] = max(match[i+j], m.size());
}
for (size_t i = 0; i < snt.size(); ++i)
cout << setw(3) << match[i] << " " << (*B.V1)[snt[i]] << endl;
}
}

View File

@ -0,0 +1,70 @@
// #include "mmsapt.h"
// #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
// #include "moses/TranslationTask.h"
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <boost/tokenizer.hpp>
#include <boost/shared_ptr.hpp>
#include <algorithm>
#include <iostream>
#include "mm/ug_bitext.h"
#include "generic/file_io/ug_stream.h"
#include <string>
#include <sstream>
#include "mm/ug_bitext_sampler.h"
using namespace Moses;
using namespace sapt;
using namespace std;
using namespace boost;
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
typedef mmBitext<Token> bitext_t;
struct mycmp
{
bool operator() (pair<string,uint32_t> const& a,
pair<string,uint32_t> const& b) const
{
return a.second > b.second;
}
};
string
basename(string const path, string const suffix)
{
size_t p = path.find_last_of("/");
size_t k = path.size() - suffix.size();
cout << path << " " << suffix << endl;
cout << path.substr(0,p) << " " << path.substr(k) << endl;
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
}
int main(int argc, char* argv[])
{
boost::intrusive_ptr<bitext_t> B(new bitext_t);
B->open(argv[1],argv[2],argv[3]);
string line;
string ifile = argv[4];
string docname = basename(ifile, string(".") + argv[2] + ".gz");
boost::iostreams::filtering_istream in;
ugdiss::open_input_stream(ifile,in);
while(getline(in,line))
{
cout << line << " [" << docname << "]" << endl;
vector<id_type> snt;
B->V1->fillIdSeq(line,snt);
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B->I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
{
SPTR<SamplingBias const> zilch;
BitextSampler<Token> s(B.get(), m, zilch, 1000, 1000,
sapt::random_sampling);
s();
cout << m.size() << " " << s.stats()->trg.size() << endl;
}
}
}
}

View File

@ -29,7 +29,7 @@ HOST ?= $(shell hostname)
HOSTTYPE ?= $(shell uname -m)
KERNEL = $(shell uname -r)
MOSES_ROOT = ${HOME}/code/mosesdecoder
MOSES_ROOT ?= ${HOME}/code/mosesdecoder
WDIR = build/${HOSTTYPE}/${KERNEL}/${OPTI}
VPATH = ${HOME}/code/mosesdecoder/
CXXFLAGS = ${PROF} -ggdb -Wall -O${OPTI} ${INCLUDES}

View File

@ -28,8 +28,8 @@ Bitext<Token>::agenda
while (j->nextSample(sid,offset))
{
aln.clear();
int po_fwd = Moses::LRModel::NONE;
int po_bwd = Moses::LRModel::NONE;
int po_fwd = LRModel::NONE;
int po_bwd = LRModel::NONE;
int docid = j->m_bias ? j->m_bias->GetClass(sid) : -1;
bitvector* full_aln = j->fwd ? &full_alignment : NULL;

View File

@ -17,7 +17,7 @@ namespace sapt
jstats()
: my_rcnt(0), my_cnt2(0), my_wcnt(0), my_bcnt(0)
{
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
for (int i = 0; i <= LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0;
my_aln.reserve(1);
}
@ -30,7 +30,7 @@ namespace sapt
my_bcnt = other.bcnt();
my_aln = other.aln();
indoc = other.indoc;
for (int i = 0; i <= Moses::LRModel::NONE; i++)
for (int i = 0; i <= LRModel::NONE; i++)
{
ofwd[i] = other.ofwd[i];
obwd[i] = other.obwd[i];
@ -41,7 +41,7 @@ namespace sapt
jstats::
dcnt_fwd(PhraseOrientation const idx) const
{
assert(idx <= Moses::LRModel::NONE);
assert(idx <= LRModel::NONE);
return ofwd[idx];
}
@ -49,7 +49,7 @@ namespace sapt
jstats::
dcnt_bwd(PhraseOrientation const idx) const
{
assert(idx <= Moses::LRModel::NONE);
assert(idx <= LRModel::NONE);
return obwd[idx];
}

View File

@ -24,8 +24,8 @@ namespace sapt
std::vector<std::pair<size_t, std::vector<unsigned char> > > my_aln;
// internal word alignment
uint32_t ofwd[Moses::LRModel::NONE+1]; // forward distortion type counts
uint32_t obwd[Moses::LRModel::NONE+1]; // backward distortion type counts
uint32_t ofwd[LRModel::NONE+1]; // forward distortion type counts
uint32_t obwd[LRModel::NONE+1]; // backward distortion type counts
public:
std::map<uint32_t,uint32_t> indoc;
@ -48,8 +48,8 @@ namespace sapt
bool valid();
uint32_t dcnt_fwd(PhraseOrientation const idx) const;
uint32_t dcnt_bwd(PhraseOrientation const idx) const;
void fill_lr_vec(Moses::LRModel::Direction const& dir,
Moses::LRModel::ModelType const& mdl,
void fill_lr_vec(LRModel::Direction const& dir,
LRModel::ModelType const& mdl,
std::vector<float>& v);
};
}

View File

@ -12,7 +12,7 @@ namespace sapt
pstats::
pstats() : raw_cnt(0), sample_cnt(0), good(0), sum_pairs(0), in_progress(0)
{
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
for (int i = 0; i <= LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0;
}

View File

@ -30,8 +30,8 @@ namespace sapt
size_t sum_pairs; // total number of target phrases extracted (can be > raw_cnt)
size_t in_progress; // how many threads are currently working on this?
uint32_t ofwd[Moses::LRModel::NONE+1]; // distribution of fwd phrase orientations
uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations
uint32_t ofwd[LRModel::NONE+1]; // distribution of fwd phrase orientations
uint32_t obwd[LRModel::NONE+1]; // distribution of bwd phrase orientations
indoc_map_t indoc;
trg_map_t trg;
@ -43,13 +43,13 @@ namespace sapt
bool
add(uint64_t const pid, // target phrase id
float const w, // sample weight (1./(# of phrases extractable))
float const b, // sample bias score
alnvec const& a, // local alignment
uint32_t const cnt2, // raw target phrase count
uint32_t fwd_o, // fwd. phrase orientation
uint32_t bwd_o, // bwd. phrase orientation
int const docid); // document where sample was found
float const w, // sample weight (1./(# of phrases extractable))
float const b, // sample bias score
alnvec const& a, // local alignment
uint32_t const cnt2, // raw target phrase count
uint32_t fwd_o, // fwd. phrase orientation
uint32_t bwd_o, // bwd. phrase orientation
int const docid); // document where sample was found
void
count_sample(int const docid, // document where sample was found

View File

@ -74,8 +74,11 @@ BitextSampler : public Moses::reference_counter
public:
BitextSampler(BitextSampler const& other);
BitextSampler const& operator=(BitextSampler const& other);
BitextSampler(bitext const* const bitext, typename bitext::iter const& phrase,
SPTR<SamplingBias const> const& bias, size_t const min_samples, size_t const max_samples,
BitextSampler(bitext const* const bitext,
typename bitext::iter const& phrase,
SPTR<SamplingBias const> const& bias,
size_t const min_samples,
size_t const max_samples,
sampling_method const method);
~BitextSampler();
SPTR<pstats> stats();

View File

@ -227,7 +227,9 @@ namespace sapt
// Now sort the array
if (log) *log << "sorting .... with " << threads << " threads." << std::endl;
#ifndef NO_MOSES
double start_time = util::WallTime();
#endif
boost::scoped_ptr<ug::ThreadPool> tpool;
tpool.reset(new ug::ThreadPool(threads));
@ -252,8 +254,10 @@ namespace sapt
}
}
tpool.reset();
#ifndef NO_MOSES
if (log) *log << "Done sorting after " << util::WallTime() - start_time
<< " seconds." << std::endl;
#endif
this->startArray = reinterpret_cast<char const*>(&(*sufa.begin()));
this->endArray = reinterpret_cast<char const*>(&(*sufa.end()));
this->numTokens = sufa.size();

View File

@ -4,7 +4,7 @@ namespace sapt
{
using namespace std;
Moses::LRModel::ReorderingType po_other = Moses::LRModel::NONE;
LRModel::ReorderingType po_other = LRModel::NONE;
// check if min and max in the aligmnet vector v are within the
// bounds LFT and RGT and update the actual bounds L and R; update
// the total count of alignment links in the underlying phrase
@ -83,54 +83,56 @@ namespace sapt
return ret;
}
Moses::LRModel::ReorderingType
// LRModel::ReorderingType
sapt::PhraseOrientation
find_po_fwd(vector<vector<ushort> >& a1,
vector<vector<ushort> >& a2,
size_t s1, size_t e1,
size_t s2, size_t e2)
{
if (e2 == a2.size()) // end of target sentence
return Moses::LRModel::M;
return LRModel::M;
size_t y = e2, L = e2, R = a2.size()-1; // won't change
size_t x = e1, T = e1, B = a1.size()-1;
if (e1 < a1.size() && expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::M;
return LRModel::M;
B = x = s1-1; T = 0;
if (s1 && expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::S;
return LRModel::S;
while (e2 < a2.size() && a2[e2].size() == 0) ++e2;
if (e2 == a2.size()) // should never happen, actually
return Moses::LRModel::NONE;
return LRModel::NONE;
if (a2[e2].back() < s1)
return Moses::LRModel::DL;
return LRModel::DL;
if (a2[e2].front() >= e1)
return Moses::LRModel::DR;
return Moses::LRModel::NONE;
return LRModel::DR;
return LRModel::NONE;
}
Moses::LRModel::ReorderingType
// LRModel::ReorderingType
PhraseOrientation
find_po_bwd(vector<vector<ushort> >& a1,
vector<vector<ushort> >& a2,
size_t s1, size_t e1,
size_t s2, size_t e2)
{
if (s1 == 0 && s2 == 0) return Moses::LRModel::M;
if (s2 == 0) return Moses::LRModel::DR;
if (s1 == 0) return Moses::LRModel::DL;
if (s1 == 0 && s2 == 0) return LRModel::M;
if (s2 == 0) return LRModel::DR;
if (s1 == 0) return LRModel::DL;
size_t y = s2-1, L = 0, R = s2-1; // won't change
size_t x = s1-1, T = 0, B = s1-1;
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::M;
return LRModel::M;
T = x = e1; B = a1.size()-1;
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::S;
return LRModel::S;
while (s2-- && a2[s2].size() == 0);
Moses::LRModel::ReorderingType ret;
LRModel::ReorderingType ret;
ret = (a2[s2].size() == 0 ? po_other :
a2[s2].back() < s1 ? Moses::LRModel::DR :
a2[s2].front() >= e1 ? Moses::LRModel::DL :
a2[s2].back() < s1 ? LRModel::DR :
a2[s2].front() >= e1 ? LRModel::DL :
po_other);
#if 0
cout << "s1=" << s1 << endl;

View File

@ -12,7 +12,7 @@ namespace sapt {
#ifdef NO_MOSES
class LRModel{
public:
enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
enum Direction { Forward, Backward, Bidirectional };

View File

@ -26,8 +26,8 @@ namespace sapt
uint32_t raw1, raw2, sample1, sample2, good1, good2, joint;
float cum_bias;
std::vector<float> fvals;
float dfwd[Moses::LRModel::NONE+1]; // distortion counts // counts or probs?
float dbwd[Moses::LRModel::NONE+1]; // distortion counts
float dfwd[LRModel::NONE+1]; // distortion counts // counts or probs?
float dbwd[LRModel::NONE+1]; // distortion counts
std::vector<unsigned char> aln;
float score;
bool inverse;
@ -125,7 +125,7 @@ namespace sapt
// }
// should we do that here or leave the raw counts?
for (int i = 0; i <= Moses::LRModel::NONE; i++)
for (int i = 0; i <= LRModel::NONE; i++)
{
PhraseOrientation po = static_cast<PhraseOrientation>(i);
dfwd[i] = js.dcnt_fwd(po);
@ -201,7 +201,7 @@ namespace sapt
, inverse(o.inverse)
, indoc(o.indoc)
{
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
for (int i = 0; i <= LRModel::NONE; ++i)
{
dfwd[i] = o.dfwd[i];
dbwd[i] = o.dbwd[i];

View File

@ -63,7 +63,9 @@ namespace Moses
, btfix(new mmbitext)
, m_bias_log(NULL)
, m_bias_loglevel(0)
#ifndef NO_MOSES
, m_lr_func(NULL)
#endif
, m_sampling_method(random_sampling)
, bias_key(((char*)this)+3)
, cache_key(((char*)this)+2)
@ -597,6 +599,7 @@ namespace Moses
// Evaluate with all features that can be computed using available factors
tp->EvaluateInIsolation(src, m_featuresToApply);
#ifndef NO_MOSES
if (m_lr_func)
{
LRModel::ModelType mdl = m_lr_func->GetModel().GetModelType();
@ -605,6 +608,7 @@ namespace Moses
pool.fill_lr_vec(dir, mdl, *scores);
tp->SetExtraScores(m_lr_func, scores);
}
#endif
return tp;
}
@ -863,10 +867,10 @@ namespace Moses
boost::unique_lock<boost::shared_mutex> ctxlock(context->lock);
if (localcache) std::cerr << "have local cache " << std::endl;
std::cerr << "BOO at " << HERE << std::endl;
// std::cerr << "BOO at " << HERE << std::endl;
if (!localcache)
{
std::cerr << "no local cache at " << HERE << std::endl;
// std::cerr << "no local cache at " << HERE << std::endl;
setup_bias(ttask);
if (context->bias)
{
@ -879,6 +883,7 @@ namespace Moses
if (!context->cache1) context->cache1.reset(new pstats::cache_t);
if (!context->cache2) context->cache2.reset(new pstats::cache_t);
#ifndef NO_MOSES
if (m_lr_func_name.size() && m_lr_func == NULL)
{
FeatureFunction* lr = &FeatureFunction::FindFeatureFunction(m_lr_func_name);
@ -887,6 +892,7 @@ namespace Moses
<< " does not seem to be a lexical reordering function!");
// todo: verify that lr_func implements a hierarchical reordering model
}
#endif
}
bool

View File

@ -26,7 +26,9 @@
#include "moses/TranslationModel/UG/TargetPhraseCollectionCache.h"
#ifndef NO_MOSES
#include "moses/FF/LexicalReordering/LexicalReordering.h"
#endif
#include "moses/InputFileStream.h"
#include "moses/FactorTypeSet.h"
@ -82,7 +84,9 @@ namespace Moses
boost::scoped_ptr<std::ofstream> m_bias_logger; // for logging to a file
std::ostream* m_bias_log;
int m_bias_loglevel;
#ifndef NO_MOSES
LexicalReordering* m_lr_func; // associated lexical reordering function
#endif
std::string m_lr_func_name; // name of associated lexical reordering function
sapt::sampling_method m_sampling_method; // sampling method, see ug_bitext_sampler
boost::scoped_ptr<ug::ThreadPool> m_thread_pool;

View File

@ -69,7 +69,7 @@ int main(int argc, char* argv[])
while (true)
{
boost::shared_ptr<Sentence> phrase(new Sentence);
if (!phrase->Read(cin,ifo)) break;
if (!phrase->Read(cin,ifo, StaticData::Instance().options())) break;
boost::shared_ptr<TranslationTask> ttask;
ttask = TranslationTask::create(phrase);
if (pdta)

View File

@ -1,7 +1,7 @@
# -*- makefile -*-
# # -*- makefile -*-
MOSES_CODE=/fs/gna0/germann/code/mosesdecoder
MOSES_ROOT=/fs/gna0/germann/moses
LIBS = $(addprefix -l,moses icuuc icuio icui18n boost_iostreams)
ibm1-align: ibm1-align.cc
g++ -o $@ -L ${MOSES_ROOT}/lib -I ${MOSES_CODE} $^ ${LIBS} -ggdb
# MOSES_CODE=/fs/gna0/germann/code/mosesdecoder
# MOSES_ROOT=/fs/gna0/germann/moses
# LIBS = $(addprefix -l,moses icuuc icuio icui18n boost_iostreams)
# ibm1-align: ibm1-align.cc
# g++ -o $@ -L ${MOSES_ROOT}/lib -I ${MOSES_CODE} $^ ${LIBS} -ggdb

View File

@ -1,4 +1,4 @@
// $Id$
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
#include <list>
#include <vector>
@ -12,7 +12,7 @@
#include "TranslationModel/PhraseDictionaryTreeAdaptor.h"
#include "util/exception.hh"
#include <boost/foreach.hpp>
#include "TranslationTask.h"
using namespace std;
namespace Moses
@ -41,7 +41,7 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
size_t inputSize = input.GetSize();
m_inputPathMatrix.resize(inputSize);
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
size_t maxSizePhrase = ttask->options().search.max_phrase_length;
maxSizePhrase = std::min(inputSize, maxSizePhrase);
// 1-word phrases
@ -234,8 +234,10 @@ CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t st
list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin();
const DecodeStep &decodeStep = **iterStep;
static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslationLEGACY
(m_source, *oldPtoc, startPos, endPos, adhereTableLimit, inputPathList);
DecodeStepTranslation const& dstep
= static_cast<const DecodeStepTranslation&>(decodeStep);
dstep.ProcessInitialTransLEGACY(m_source, *oldPtoc, startPos, endPos,
adhereTableLimit, inputPathList);
// do rest of decode steps
int indexStep = 0;

View File

@ -186,7 +186,8 @@ void TranslationTask::Run()
// report thread number
#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
VERBOSE(2, "Translating line " << translationId << " in thread id " << pthread_self() << endl);
VERBOSE(2, "Translating line " << translationId << " in thread id "
<< pthread_self() << endl);
#endif
@ -214,8 +215,8 @@ void TranslationTask::Run()
OutputCollector* ocoll;
Timer additionalReportingTime;
additionalReportingTime.start();
boost::shared_ptr<IOWrapper> const& io = m_ioWrapper;
manager->OutputBest(io->GetSingleBestOutputCollector());
// output word graph
@ -229,7 +230,7 @@ void TranslationTask::Run()
// Output search graph in hypergraph format for Kenneth Heafield's
// lazy hypergraph decoder; writes to stderr
if (StaticData::Instance().GetOutputSearchGraphHypergraph()) {
if (options().output.SearchGraphHG.size()) {
size_t transId = manager->GetSource().GetTranslationId();
string fname = io->GetHypergraphOutputFileName(transId);
manager->OutputSearchGraphAsHypergraph(fname, PRECISION);

View File

@ -237,7 +237,10 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
}
//! populate this InputType with data from in stream
int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
int
TreeInput::
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
AllOptions const& opts)
{
const StaticData &staticData = StaticData::Instance();
@ -254,7 +257,7 @@ int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
stringstream strme;
strme << line << endl;
Sentence::Read(strme, factorOrder);
Sentence::Read(strme, factorOrder, opts);
// size input chart
size_t sourceSize = GetSize();

View File

@ -53,7 +53,10 @@ public:
}
//! populate this InputType with data from in stream
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
virtual int
Read(std::istream& in,
const std::vector<FactorType>& factorOrder,
AllOptions const& opts);
//! Output debugging info to stream out
virtual void Print(std::ostream&) const;

View File

@ -147,7 +147,11 @@ InitializeFromPCNDataType
return !cn.empty();
}
int WordLattice::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
int
WordLattice::
Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
{
Clear();
std::string line;

View File

@ -43,7 +43,9 @@ public:
int InitializeFromPCNDataType(const PCN::CN& cn, const std::vector<FactorType>& factorOrder, const std::string& debug_line = "");
/** Read from PLF format (1 lattice per line)
*/
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
int Read(std::istream& in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts);
/** Convert internal representation into an edge matrix
* @note edges[1][2] means there is an edge from 1 to 2

View File

@ -21,6 +21,7 @@ namespace Moses
if (!input.init(param)) return false;
if (!mbr.init(param)) return false;
if (!lmbr.init(param)) return false;
if (!output.init(param)) return false;
param.SetParameter(mira, "mira", false);
@ -45,12 +46,31 @@ namespace Moses
{
if (mbr.enabled)
{
cerr << "Error: Cannot use consensus decoding together with mbr" << endl;
cerr << "Error: Cannot use consensus decoding together with mbr"
<< endl;
return false;
}
mbr.enabled = true;
}
// RecoverPath should only be used with confusion net or word lattice input
if (output.RecoverPath && input.input_type == SentenceInput)
{
TRACE_ERR("--recover-input-path should only be used with "
<<"confusion net or word lattice input!\n");
output.RecoverPath = false;
}
// set m_nbest_options.enabled = true if necessary:
nbest.enabled = (nbest.enabled || mira || search.consensus
|| nbest.nbest_size > 0
|| !output.SearchGraph.empty()
|| !output.SearchGraphExtended.empty()
|| !output.SearchGraphSLF.empty()
|| !output.SearchGraphHG.empty()
|| !output.SearchGraphPB.empty()
|| output.lattice_sample_size != 0);
return true;
}
@ -67,9 +87,24 @@ namespace Moses
if (!input.update(param)) return false;
if (!mbr.update(param)) return false;
if (!lmbr.update(param)) return false;
return true;
if (!output.update(param)) return false;
return sanity_check();
}
#endif
bool
AllOptions::
NBestDistinct() const
{
return (nbest.only_distinct
|| mbr.enabled || lmbr.enabled
|| output.lattice_sample_size
|| !output.SearchGraph.empty()
|| !output.SearchGraphExtended.empty()
|| !output.SearchGraphSLF.empty()
|| !output.SearchGraphHG.empty());
}
}

View File

@ -11,6 +11,7 @@
#include "InputOptions.h"
#include "MBR_Options.h"
#include "LMBR_Options.h"
#include "ReportingOptions.h"
namespace Moses
{
struct
@ -24,7 +25,7 @@ namespace Moses
InputOptions input;
MBR_Options mbr;
LMBR_Options lmbr;
ReportingOptions output;
bool mira;
// StackOptions stack;
@ -38,6 +39,8 @@ namespace Moses
bool update(std::map<std::string,xmlrpc_c::value>const& param);
#endif
bool NBestDistinct() const;
};
}

View File

@ -1,4 +1,4 @@
// -*- mode: c++; cc-style: gnu -*-
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "moses/Parameter.h"
#include "NBestOptions.h"
@ -33,4 +33,21 @@ init(Parameter const& P)
enabled = output_file_path.size();
return true;
}
#ifdef HAVE_XMLRPC_C
bool
NBestOptions::
update(std::map<std::string,xmlrpc_c::value>const& param)
{
typedef std::map<std::string, xmlrpc_c::value> params_t;
params_t::const_iterator si = param.find("nbest");
if (si != param.end())
nbest_size = xmlrpc_c::value_int(si->second);
only_distinct = check(param, "nbest-distinct");
enabled = (nbest_size > 0);
return true;
}
#endif
} // namespace Moses

View File

@ -24,6 +24,10 @@ struct NBestOptions : public OptionsBaseClass
bool init(Parameter const& param);
#ifdef HAVE_XMLRPC_C
bool update(std::map<std::string,xmlrpc_c::value>const& param);
#endif
};
}

View File

@ -10,6 +10,16 @@ namespace Moses
{
return true;
}
bool
OptionsBaseClass::
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (param.find(key) != param.end());
}
#endif
}

View File

@ -12,6 +12,10 @@ namespace Moses
#ifdef HAVE_XMLRPC_C
virtual bool
update(std::map<std::string,xmlrpc_c::value>const& params);
bool
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key);
#endif
};
}

View File

@ -1,5 +1,4 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#if 0
#include "ReportingOptions.h"
#include "moses/Parameter.h"
@ -9,82 +8,70 @@ namespace Moses {
ReportingOptions::
init(Parameter const& param)
{
// including factors in the output
param.SetParameter(ReportAllFactors, "report-all-factors", false);
// segmentation reporting
ReportSegmentation = (param.GetParam("report-segmentation-enriched")
? 2 : param.GetParam("report-segmentation")
? 1 : 0);
// word alignment reporting
param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false);
param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort);
std::string e; // hack to save us param.SetParameter<string>(...)
param.SetParameter(AlignmentOutputFile,"alignment-output-file", e);
// output a word graph
PARAM_VEC const* params;
param.SetParameter(segmentation, "report-segmentation", false );
param.SetParameter(segmentation_enriched, "report-segmentation-enriched", false);
param.SetParameter(all_factors, "report-all-factors", false );
// print ...
param.SetParameter(id, "print-id", false );
param.SetParameter(aln_info, "print-alignment-info", false);
param.SetParameter(passthrough, "print-passthrough", false );
param.SetParameter<string>(detailed_transrep_filepath, "translation-details", "");
param.SetParameter<string>(detailed_tree_transrep_filepath,
"tree-translation-details", "");
param.SetParameter<string>(detailed_all_transrep_filepath,
"translation-all-details", "");
// output search graph
param.SetParameter<string>(output,
"translation-all-details", "");
param.SetParameter(sort_word_alignment, "sort-word-alignment", NoSort);
// Is there a reason why we can't use SetParameter here? [UG]
= param.GetParam("alignment-output-file");
if (params && params->size()) {
m_alignmentOutputFile = Scan<std::string>(params->at(0));
}
params = param.GetParam("output-word-graph");
output_word_graph = (params && params->size() == 2);
// bizarre code ahead! Why do we need to do the checks here?
// as adapted from StaticData.cpp
params = param.GetParam("output-search-graph");
if (params && params->size()) {
if (params->size() != 1) {
std::cerr << "ERROR: wrong format for switch -output-search-graph file";
return false;
}
output_search_graph = true;
}
else if (m_parameter->GetParam("output-search-graph-extended") &&
m_parameter->GetParam("output-search-graph-extended")->size()) {
if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) {
std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file";
return false;
}
output_search_graph = true;
m_outputSearchGraphExtended = true;
} else {
m_outputSearchGraph = false;
}
params = m_parameter->GetParam("output-search-graph-slf");
output_search_graph_slf = params && params->size();
params = m_parameter->GetParam("output-search-graph-hypergraph");
output_search_graph_hypergraph = params && params->size();
WordGraph = (params && params->size() == 2); // what are the two options?
// dump the search graph
param.SetParameter(SearchGraph, "output-search-graph", e);
param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e);
param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e);
param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e);
#ifdef HAVE_PROTOBUF
params = m_parameter->GetParam("output-search-graph-pb");
if (params && params->size()) {
if (params->size() != 1) {
cerr << "ERROR: wrong format for switch -output-search-graph-pb path";
return false;
}
m_outputSearchGraphPB = true;
} else
m_outputSearchGraphPB = false;
param.SetParameter(SearchGraphPB, "output-search-graph-pb", e);
#endif
param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false);
// miscellaneous
param.SetParameter(RecoverPath, "recover-input-path",false);
param.SetParameter(ReportHypoScore, "output-hypo-score",false);
param.SetParameter(PrintID, "print-id",false);
param.SetParameter(PrintPassThrough, "print-passthrough",false);
param.SetParameter(detailed_all_transrep_filepath,
"translation-all-details", e);
param.SetParameter(detailed_transrep_filepath, "translation-details", e);
param.SetParameter(detailed_tree_transrep_filepath,
"tree-translation-details", e);
params = param.GetParam("lattice-samples");
if (params) {
if (params->size() ==2 ) {
lattice_sample_filepath = params->at(0);
lattice_sample_size = Scan<size_t>(params->at(1));
} else {
std::cerr <<"wrong format for switch -lattice-samples file size";
return false;
}
} else {
lattice_sample_size = 0;
}
return true;
}
#ifdef HAVE_XMLRPC_C
bool
ReportingOptions::
update(std::map<std::string,xmlrpc_c::value>const& param)
{
ReportAllFactors = check(param, "report-all-factors");
return true;
}
}
#endif
}

View File

@ -2,40 +2,59 @@
#pragma once
#include <string>
#include "moses/Parameter.h"
#include "OptionsBaseClass.h"
namespace Moses
{
struct
ReportingOptions
ReportingOptions : public OptionsBaseClass
{
bool ReportAllFactors; // m_reportAllFactors;
WordAlignmentSort sort_word_alignment; // 0: no, 1: target order
int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
bool PrintAlignmentInfo; // m_PrintAlignmentInfo
WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
std::string AlignmentOutputFile;
bool segmentation; // m_reportSegmentation;
bool segmentation_enriched; // m_reportSegmentationEnriched;
bool all_factors; // m_reportAllFactors;
bool WordGraph;
bool output_word_graph;
bool output_search_graph;
bool output_search_graph_extended;
bool output_search_graph_slf;
bool output_search_graph_hypergraph;
bool output_search_graph_protobuf;
std::string SearchGraph;
std::string SearchGraphExtended;
std::string SearchGraphSLF;
std::string SearchGraphHG;
std::string SearchGraphPB;
bool DontPruneSearchGraph;
bool RecoverPath; // recover input path?
bool ReportHypoScore;
bool PrintID;
bool PrintPassThrough;
// print ..
bool aln_info; // m_PrintAlignmentInfo;
bool id; // m_PrintID;
bool passthrough; // m_PrintPassthroughInformation;
// transrep = translation reporting
std::string detailed_transrep_filepath;
std::string detailed_tree_transrep_filepath;
std::string detailed_all_transrep_filepath;
std::string aln_output_file; // m_alignmentOutputFile;
std::string lattice_sample_filepath;
size_t lattice_sample_size;
bool init(Parameter const& param);
/// do we need to keep the search graph from decoding?
bool NeedSearchGraph() const {
return !(SearchGraph.empty() && SearchGraphExtended.empty());
}
#ifdef HAVE_XMLRPC_C
bool update(std::map<std::string,xmlrpc_c::value>const& param);
#endif
};
}

View File

@ -36,6 +36,7 @@ namespace Moses
beam_width = TransformScore(beam_width);
trans_opt_threshold = TransformScore(trans_opt_threshold);
early_discarding_threshold = TransformScore(early_discarding_threshold);
return true;
}

View File

@ -25,8 +25,7 @@ using Moses::Sentence;
boost::shared_ptr<TranslationRequest>
TranslationRequest::
create(Translator* translator, xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond,
boost::mutex& mut)
boost::condition_variable& cond, boost::mutex& mut)
{
boost::shared_ptr<TranslationRequest> ret;
ret.reset(new TranslationRequest(paramList, cond, mut));
@ -60,10 +59,9 @@ Run()
Moses::StaticData const& SD = Moses::StaticData::Instance();
//Make sure alternative paths are retained, if necessary
if (m_withGraphInfo || m_nbestSize>0)
// why on earth is this a global variable? Is this even thread-safe???? UG
(const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
// if (m_withGraphInfo || m_nbestSize>0)
// why on earth is this a global variable? Is this even thread-safe???? UG
// (const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
// std::stringstream out, graphInfo, transCollOpts;
if (SD.IsSyntax())
@ -170,7 +168,14 @@ outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
{
TrellisPathList nBestList;
vector<xmlrpc_c::value> nBestXml;
manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct);
manager.CalcNBest(m_options.nbest.nbest_size, nBestList,
m_options.nbest.only_distinct);
StaticData const& SD = StaticData::Instance();
manager.OutputNBest(cout, nBestList,
SD.GetOutputFactorOrder(),
m_source->GetTranslationId(),
options().output.ReportSegmentation);
BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) {
vector<const Hypothesis *> const& E = path->GetEdges();
@ -180,7 +185,8 @@ outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
if (m_withScoreBreakdown) {
// should the score breakdown be reported in a more structured manner?
ostringstream buf;
path->GetScoreBreakdown()->OutputAllFeatureScores(buf);
bool with_labels = m_options.nbest.include_feature_labels;
path->GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels);
nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
}
@ -228,23 +234,23 @@ insertTranslationOptions(Moses::Manager& manager,
retData["topt"] = xmlrpc_c::value_array(toptsXml);
}
bool
check(std::map<std::string, xmlrpc_c::value> const& params, std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (params.find(key) != params.end());
}
TranslationRequest::
TranslationRequest(xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::mutex& mut)
: m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList)
, m_nbestSize(0)
// , m_nbestSize(0)
, m_session_id(0)
{
m_options = StaticData::Instance().options();
}
bool
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (param.find(key) != param.end());
}
void
TranslationRequest::
@ -274,10 +280,9 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
m_withWordAlignInfo = check(params, "word-align");
m_withGraphInfo = check(params, "sg");
m_withTopts = check(params, "topt");
m_reportAllFactors = check(params, "report-all-factors");
m_nbestDistinct = check(params, "nbest-distinct");
// m_reportAllFactors = check(params, "report-all-factors");
// m_nbestDistinct = check(params, "nbest-distinct");
m_withScoreBreakdown = check(params, "add-score-breakdown");
m_source.reset(new Sentence(0,m_source_string));
si = params.find("lambda");
if (si != params.end())
{
@ -298,9 +303,9 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
}
}
si = params.find("nbest");
if (si != params.end())
m_nbestSize = xmlrpc_c::value_int(si->second);
// si = params.find("nbest");
// if (si != params.end())
// m_nbestSize = xmlrpc_c::value_int(si->second);
si = params.find("context");
if (si != params.end())
@ -309,6 +314,8 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
VERBOSE(1,"CONTEXT " << context);
m_context.reset(new std::vector<std::string>(1,context));
}
// // biased sampling for suffix-array-based sampling phrase table?
// if ((si = params.find("bias")) != params.end())
// {
@ -317,6 +324,7 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
// for (size_t i = 1; i < tmp.size(); i += 2)
// m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
// }
m_source.reset(new Sentence(0,m_source_string,m_options));
} // end of Translationtask::parse_request()
@ -326,7 +334,7 @@ run_chart_decoder()
{
Moses::TreeInput tinput;
istringstream buf(m_source_string + "\n");
tinput.Read(buf, StaticData::Instance().GetInputFactorOrder());
tinput.Read(buf, StaticData::Instance().GetInputFactorOrder(), m_options);
Moses::ChartManager manager(this->self());
manager.Decode();
@ -393,8 +401,13 @@ void
TranslationRequest::
run_phrase_decoder()
{
if (m_withGraphInfo || m_options.nbest.nbest_size>0)
m_options.output.SearchGraph = "true";
Manager manager(this->self());
// if (m_bias.size()) manager.SetBias(&m_bias);
manager.Decode();
pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
@ -403,10 +416,10 @@ run_phrase_decoder()
if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
if (m_withTopts) insertTranslationOptions(manager,m_retData);
if (m_nbestSize) outputNBest(manager, m_retData);
if (m_options.nbest.nbest_size) outputNBest(manager, m_retData);
(const_cast<StaticData&>(Moses::StaticData::Instance()))
.SetOutputSearchGraph(false);
// (const_cast<StaticData&>(Moses::StaticData::Instance()))
// .SetOutputSearchGraph(false);
// WTF? one more reason not to have this as global variable! --- UG
}

View File

@ -43,9 +43,9 @@ TranslationRequest : public virtual Moses::TranslationTask
bool m_withGraphInfo;
bool m_withTopts;
bool m_reportAllFactors;
bool m_nbestDistinct;
// bool m_nbestDistinct;
bool m_withScoreBreakdown;
size_t m_nbestSize;
// size_t m_nbestSize;
uint64_t m_session_id; // 0 means none, 1 means new