mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-01 16:33:16 +03:00
Merge branch 'mmt-dev'
This commit is contained in:
commit
ff1977c29e
5
Jamroot
5
Jamroot
@ -1,5 +1,5 @@
|
||||
#BUILDING MOSES
|
||||
#
|
||||
|
||||
#PACKAGES
|
||||
#Language models (optional):
|
||||
#--with-irstlm=/path/to/irstlm
|
||||
@ -245,7 +245,7 @@ if [ option.get "with-mm" : : "yes" ]
|
||||
moses/TranslationModel/UG//ptable-describe-features
|
||||
moses/TranslationModel/UG//count-ptable-features
|
||||
moses/TranslationModel/UG//ptable-lookup
|
||||
# moses/TranslationModel/UG//spe-check-coverage
|
||||
moses/TranslationModel/UG//check-coverage
|
||||
moses/TranslationModel/UG/mm//mtt-demo1
|
||||
moses/TranslationModel/UG/mm//mtt-build
|
||||
moses/TranslationModel/UG/mm//mtt-dump
|
||||
@ -256,6 +256,7 @@ if [ option.get "with-mm" : : "yes" ]
|
||||
moses/TranslationModel/UG/mm//mmlex-lookup
|
||||
moses/TranslationModel/UG/mm//mtt-count-words
|
||||
moses/TranslationModel/UG/mm//calc-coverage
|
||||
moses/TranslationModel/UG//check-coverage
|
||||
moses/TranslationModel/UG//try-align
|
||||
;
|
||||
}
|
||||
|
@ -257,9 +257,9 @@ public:
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
//Make sure alternative paths are retained, if necessary
|
||||
if (addGraphInfo || nbest_size>0) {
|
||||
(const_cast<StaticData&>(staticData)).SetOutputSearchGraph(true);
|
||||
}
|
||||
// if (addGraphInfo || nbest_size>0) {
|
||||
// (const_cast<StaticData&>(staticData)).SetOutputSearchGraph(true);
|
||||
// }
|
||||
|
||||
|
||||
stringstream out, graphInfo, transCollOpts;
|
||||
@ -269,7 +269,7 @@ public:
|
||||
boost::shared_ptr<TreeInput> tinput(new TreeInput);
|
||||
const vector<FactorType>& IFO = staticData.GetInputFactorOrder();
|
||||
istringstream in(source + "\n");
|
||||
tinput->Read(in,IFO);
|
||||
tinput->Read(in,IFO,staticData.options());
|
||||
ttasksptr task = Moses::TranslationTask::create(tinput);
|
||||
ChartManager manager(task);
|
||||
manager.Decode();
|
||||
@ -285,7 +285,8 @@ public:
|
||||
else
|
||||
{
|
||||
// size_t lineNumber = 0; // TODO: Include sentence request number here?
|
||||
boost::shared_ptr<Sentence> sentence(new Sentence(0,source));
|
||||
boost::shared_ptr<Sentence> sentence;
|
||||
sentence.reset(new Sentence(0,source,staticData.options()));
|
||||
ttasksptr task = Moses::TranslationTask::create(sentence);
|
||||
Manager manager(task);
|
||||
manager.Decode();
|
||||
@ -320,7 +321,7 @@ public:
|
||||
outputNBest(manager, m_retData, nbest_size, nbest_distinct,
|
||||
reportAllFactors, addAlignInfo, addScoreBreakdown);
|
||||
}
|
||||
(const_cast<StaticData&>(staticData)).SetOutputSearchGraph(false);
|
||||
// (const_cast<StaticData&>(staticData)).SetOutputSearchGraph(false);
|
||||
}
|
||||
m_retData["text"] = value_string(out.str());
|
||||
XVERBOSE(1,"Output: " << out.str() << endl);
|
||||
@ -479,7 +480,9 @@ public:
|
||||
{
|
||||
// should the score breakdown be reported in a more structured manner?
|
||||
ostringstream buf;
|
||||
path.GetScoreBreakdown()->OutputAllFeatureScores(buf);
|
||||
bool with_labels
|
||||
= StaticData::Instance().options().nbest.include_feature_labels;
|
||||
path.GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels);
|
||||
nBestXMLItem["fvals"] = xmlrpc_c::value_string(buf.str());
|
||||
}
|
||||
|
||||
|
@ -202,8 +202,9 @@ int main(int argc, char* argv[])
|
||||
<< " ||| ";
|
||||
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
|
||||
manager.OutputBestHypo(mbrBestHypo, lineCount,
|
||||
SD.GetReportSegmentation(),
|
||||
SD.GetReportAllFactors(),cout);
|
||||
manager.options().output.ReportSegmentation,
|
||||
manager.options().output.ReportAllFactors,
|
||||
cout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -106,7 +106,9 @@ std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
|
||||
}
|
||||
|
||||
|
||||
bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,size_t> *b)
|
||||
bool
|
||||
compare_target(std::pair<size_t,size_t> const* a,
|
||||
std::pair<size_t,size_t> const* b)
|
||||
{
|
||||
if(a->second < b->second) return true;
|
||||
if(a->second == b->second) return (a->first < b->first);
|
||||
@ -114,7 +116,9 @@ bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,si
|
||||
}
|
||||
|
||||
|
||||
std::vector< const std::pair<size_t,size_t>* > AlignmentInfo::GetSortedAlignments() const
|
||||
std::vector< const std::pair<size_t,size_t>* >
|
||||
AlignmentInfo::
|
||||
GetSortedAlignments(WordAlignmentSort SortOrder) const
|
||||
{
|
||||
std::vector< const std::pair<size_t,size_t>* > ret;
|
||||
|
||||
@ -124,10 +128,7 @@ std::vector< const std::pair<size_t,size_t>* > AlignmentInfo::GetSortedAlignment
|
||||
ret.push_back(&alignPair);
|
||||
}
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
WordAlignmentSort wordAlignmentSort = staticData.GetWordAlignmentSort();
|
||||
|
||||
switch (wordAlignmentSort) {
|
||||
switch (SortOrder) {
|
||||
case NoSort:
|
||||
break;
|
||||
|
||||
@ -136,7 +137,8 @@ std::vector< const std::pair<size_t,size_t>* > AlignmentInfo::GetSortedAlignment
|
||||
break;
|
||||
|
||||
default:
|
||||
UTIL_THROW(util::Exception, "Unknown alignment sort option: " << wordAlignmentSort);
|
||||
UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
|
||||
<< SortOrder);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include <cstdlib>
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#include "TypeDef.h"
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
@ -83,7 +83,8 @@ public:
|
||||
return m_collection.size();
|
||||
}
|
||||
|
||||
std::vector< const std::pair<size_t,size_t>* > GetSortedAlignments() const;
|
||||
std::vector< const std::pair<size_t,size_t>* >
|
||||
GetSortedAlignments(WordAlignmentSort SortOrder) const;
|
||||
|
||||
std::vector<size_t> GetSourceIndex2PosMap() const;
|
||||
|
||||
|
@ -27,7 +27,6 @@
|
||||
#include "RuleCube.h"
|
||||
#include "Range.h"
|
||||
#include "Util.h"
|
||||
#include "StaticData.h"
|
||||
#include "ChartTranslationOptions.h"
|
||||
#include "ChartTranslationOptionList.h"
|
||||
#include "ChartManager.h"
|
||||
@ -52,8 +51,7 @@ ChartCellBase::~ChartCellBase() {}
|
||||
ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
|
||||
ChartCellBase(startPos, endPos), m_manager(manager)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
m_nBestIsEnabled = staticData.options().nbest.enabled;
|
||||
m_nBestIsEnabled = manager.options().nbest.enabled;
|
||||
}
|
||||
|
||||
ChartCell::~ChartCell() {}
|
||||
@ -66,7 +64,14 @@ ChartCell::~ChartCell() {}
|
||||
bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
|
||||
{
|
||||
const Word &targetLHS = hypo->GetTargetLHS();
|
||||
return m_hypoColl[targetLHS].AddHypothesis(hypo, m_manager);
|
||||
MapType::iterator m = m_hypoColl.find(targetLHS);
|
||||
if (m == m_hypoColl.end())
|
||||
{
|
||||
std::pair<Word, ChartHypothesisCollection>
|
||||
e(targetLHS, ChartHypothesisCollection(m_manager.options()));
|
||||
m = m_hypoColl.insert(e).first;
|
||||
}
|
||||
return m->second.AddHypothesis(hypo, m_manager);
|
||||
}
|
||||
|
||||
/** Prune each collection in this cell to a particular size */
|
||||
@ -87,8 +92,6 @@ void ChartCell::PruneToSize()
|
||||
void ChartCell::Decode(const ChartTranslationOptionList &transOptList
|
||||
, const ChartCellCollection &allChartCells)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// priority queue for applicable rules with selected hypotheses
|
||||
RuleCubeQueue queue(m_manager);
|
||||
|
||||
@ -100,7 +103,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
|
||||
}
|
||||
|
||||
// pluck things out of queue and add to hypo collection
|
||||
const size_t popLimit = staticData.options().cube.pop_limit;
|
||||
const size_t popLimit = m_manager.options().cube.pop_limit;
|
||||
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
|
||||
ChartHypothesis *hypo = queue.Pop();
|
||||
AddHypothesis(hypo);
|
||||
|
@ -256,12 +256,13 @@ void ChartHypothesis::CleanupArcList()
|
||||
* However, may not be enough if only unique candidates are needed,
|
||||
* so we'll keep all of arc list if nedd distinct n-best list
|
||||
*/
|
||||
AllOptions const& opts = StaticData::Instance().options();
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
bool distinctNBest = (staticData.options().nbest.only_distinct
|
||||
|| staticData.options().mbr.enabled
|
||||
|| staticData.GetOutputSearchGraph()
|
||||
|| staticData.GetOutputSearchGraphHypergraph());
|
||||
size_t nBestSize = opts.nbest.nbest_size;
|
||||
bool distinctNBest = (opts.nbest.only_distinct
|
||||
|| opts.mbr.enabled
|
||||
|| opts.output.NeedSearchGraph()
|
||||
|| !opts.output.SearchGraphHG.empty());
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize) {
|
||||
// prune arc list only if there too many arcs
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "ChartManager.h"
|
||||
#include "HypergraphOutput.h"
|
||||
#include "util/exception.hh"
|
||||
#include "parameters/AllOptions.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace Moses;
|
||||
@ -33,13 +34,13 @@ using namespace Moses;
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
ChartHypothesisCollection::ChartHypothesisCollection()
|
||||
ChartHypothesisCollection::ChartHypothesisCollection(AllOptions const& opts)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
m_beamWidth = staticData.GetBeamWidth();
|
||||
m_maxHypoStackSize = staticData.options().search.stack_size;
|
||||
m_nBestIsEnabled = staticData.options().nbest.enabled;
|
||||
m_beamWidth = opts.search.beam_width; // staticData.GetBeamWidth();
|
||||
m_maxHypoStackSize = opts.search.stack_size; // staticData.options().search.stack_size;
|
||||
m_nBestIsEnabled = opts.nbest.enabled; // staticData.options().nbest.enabled;
|
||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,7 @@ namespace Moses
|
||||
{
|
||||
|
||||
class ChartSearchGraphWriter;
|
||||
class AllOptions;
|
||||
|
||||
//! functor to compare (chart) hypotheses by (descending) score
|
||||
class ChartHypothesisScoreOrderer
|
||||
@ -70,7 +71,7 @@ public:
|
||||
return m_hypos.end();
|
||||
}
|
||||
|
||||
ChartHypothesisCollection();
|
||||
ChartHypothesisCollection(AllOptions const& opts);
|
||||
~ChartHypothesisCollection();
|
||||
bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager);
|
||||
|
||||
|
@ -371,7 +371,8 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
|
||||
OutputSurface(out, outputPhrase, outputFactorOrder, false);
|
||||
out << " ||| ";
|
||||
boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation);
|
||||
scoreBreakdown->OutputAllFeatureScores(out);
|
||||
bool with_labels = options().nbest.include_feature_labels;
|
||||
scoreBreakdown->OutputAllFeatureScores(out, with_labels);
|
||||
out << " ||| " << derivation.score;
|
||||
|
||||
// optionally, print word alignments
|
||||
@ -618,7 +619,7 @@ void ChartManager::OutputDetailedTranslationReport(
|
||||
//DIMw
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
|
||||
if (options().output.detailed_all_transrep_filepath.size()) {
|
||||
const Sentence &sentence = static_cast<const Sentence &>(m_source);
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
@ -835,11 +836,11 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe
|
||||
Backtrack(hypo);
|
||||
VERBOSE(3,"0" << std::endl);
|
||||
|
||||
if (StaticData::Instance().GetOutputHypoScore()) {
|
||||
if (options().output.ReportHypoScore) {
|
||||
out << hypo->GetTotalScore() << " ";
|
||||
}
|
||||
|
||||
if (StaticData::Instance().IsPathRecoveryEnabled()) {
|
||||
if (options().output.RecoverPath) {
|
||||
out << "||| ";
|
||||
}
|
||||
Phrase outPhrase(ARRAY_SIZE_INCR);
|
||||
@ -858,7 +859,7 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe
|
||||
} else {
|
||||
VERBOSE(1, "NO BEST TRANSLATION" << endl);
|
||||
|
||||
if (StaticData::Instance().GetOutputHypoScore()) {
|
||||
if (options().output.ReportHypoScore) {
|
||||
out << "0 ";
|
||||
}
|
||||
|
||||
|
@ -107,8 +107,13 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha
|
||||
targetPhrase->SetAlignmentInfo("0-0");
|
||||
targetPhrase->EvaluateInIsolation(*unksrc);
|
||||
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) {
|
||||
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
|
||||
AllOptions const& opts = staticData.options();
|
||||
if (!opts.output.detailed_tree_transrep_filepath.empty() ||
|
||||
opts.nbest.print_trees || staticData.GetTreeStructure() != NULL) {
|
||||
std::string prop = "[ ";
|
||||
prop += (*targetLHS)[0]->GetString().as_string() + " ";
|
||||
prop += sourceWord[0]->GetString().as_string() + " ]";
|
||||
targetPhrase->SetProperty("Tree", prop);
|
||||
}
|
||||
|
||||
// chart rule
|
||||
|
@ -110,29 +110,14 @@ ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format)
|
||||
int
|
||||
ConfusionNet::
|
||||
Read(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder)
|
||||
const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts)
|
||||
{
|
||||
int rv=ReadF(in,factorOrder,0);
|
||||
if(rv) stats.collect(*this);
|
||||
return rv;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Deprecated due to code duplication;
|
||||
// use Word::CreateFromString() instead
|
||||
void
|
||||
ConfusionNet::
|
||||
String2Word(const std::string& s,Word& w,
|
||||
const std::vector<FactorType>& factorOrder)
|
||||
{
|
||||
std::vector<std::string> factorStrVector = Tokenize(s, "|");
|
||||
for(size_t i=0; i<factorOrder.size(); ++i)
|
||||
w.SetFactor(factorOrder[i],
|
||||
FactorCollection::Instance().AddFactor
|
||||
(Input,factorOrder[i], factorStrVector[i]));
|
||||
}
|
||||
#endif
|
||||
|
||||
bool
|
||||
ConfusionNet::
|
||||
ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
@ -161,7 +146,8 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
for(size_t i=0; i < numInputScores; i++) {
|
||||
double prob;
|
||||
if (!(is>>prob)) {
|
||||
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n");
|
||||
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, "
|
||||
<< "or wrong number of scores\n");
|
||||
return false;
|
||||
}
|
||||
if(prob<0.0) {
|
||||
@ -174,7 +160,8 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
|
||||
|
||||
}
|
||||
//store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon
|
||||
// store 'real' word count in last feature if we have one more
|
||||
// weight than we do arc scores and not epsilon
|
||||
if (addRealWordCount && word!=EPSILON && word!="")
|
||||
probs.back() = -1.0;
|
||||
|
||||
|
@ -67,7 +67,8 @@ public:
|
||||
bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0);
|
||||
virtual void Print(std::ostream&) const;
|
||||
|
||||
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
|
||||
int Read(std::istream& in,const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts);
|
||||
|
||||
Phrase GetSubString(const Range&) const; //TODO not defined
|
||||
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
|
||||
|
@ -100,12 +100,14 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeStepTranslation::ProcessInitialTranslation(
|
||||
const InputType &source
|
||||
,PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, size_t startPos, size_t endPos, bool adhereTableLimit
|
||||
, const InputPath &inputPath
|
||||
, TargetPhraseCollection::shared_ptr phraseColl) const
|
||||
void
|
||||
DecodeStepTranslation::
|
||||
ProcessInitialTranslation(InputType const& source,
|
||||
PartialTranslOptColl &outputPartialTranslOptColl,
|
||||
size_t startPos, size_t endPos,
|
||||
bool adhereTableLimit,
|
||||
InputPath const& inputPath,
|
||||
TargetPhraseCollection::shared_ptr phraseColl) const
|
||||
{
|
||||
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
|
||||
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
||||
@ -114,8 +116,9 @@ void DecodeStepTranslation::ProcessInitialTranslation(
|
||||
|
||||
if (phraseColl != NULL) {
|
||||
IFVERBOSE(3) {
|
||||
if(StaticData::Instance().GetInputType() == SentenceInput)
|
||||
TRACE_ERR("[" << source.GetSubString(range) << "; " << startPos << "-" << endPos << "]\n");
|
||||
if(source.GetType() == SentenceInput)
|
||||
TRACE_ERR("[" << source.GetSubString(range) << "; "
|
||||
<< startPos << "-" << endPos << "]\n");
|
||||
else
|
||||
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
|
||||
}
|
||||
@ -137,11 +140,13 @@ void DecodeStepTranslation::ProcessInitialTranslation(
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
|
||||
const InputType &source
|
||||
,PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, size_t startPos, size_t endPos, bool adhereTableLimit
|
||||
, const InputPathList &inputPathList) const
|
||||
void
|
||||
DecodeStepTranslation::
|
||||
ProcessInitialTransLEGACY(InputType const& source,
|
||||
PartialTranslOptColl &outputPartialTranslOptColl,
|
||||
size_t startPos, size_t endPos,
|
||||
bool adhereTableLimit,
|
||||
InputPathList const& inputPathList) const
|
||||
{
|
||||
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
|
||||
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
||||
@ -152,8 +157,9 @@ void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
|
||||
|
||||
if (phraseColl != NULL) {
|
||||
IFVERBOSE(3) {
|
||||
if(StaticData::Instance().GetInputType() == SentenceInput)
|
||||
TRACE_ERR("[" << source.GetSubString(range) << "; " << startPos << "-" << endPos << "]\n");
|
||||
if(source.GetType() == SentenceInput)
|
||||
TRACE_ERR("[" << source.GetSubString(range) << "; "
|
||||
<< startPos << "-" << endPos << "]\n");
|
||||
else
|
||||
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
|
||||
}
|
||||
|
@ -61,10 +61,13 @@ public:
|
||||
, TargetPhraseCollection::shared_ptr phraseColl) const;
|
||||
|
||||
// legacy
|
||||
void ProcessInitialTranslationLEGACY(const InputType &source
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, size_t startPos, size_t endPos, bool adhereTableLimit
|
||||
, const InputPathList &inputPathList) const;
|
||||
void
|
||||
ProcessInitialTransLEGACY(InputType const& source,
|
||||
PartialTranslOptColl &outputPartialTranslOptColl,
|
||||
size_t startPos, size_t endPos,
|
||||
bool adhereTableLimit,
|
||||
InputPathList const& inputPathList) const;
|
||||
|
||||
void ProcessLEGACY(const TranslationOption &inputPartialTranslOpt
|
||||
, const DecodeStep &decodeStep
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
|
@ -17,8 +17,10 @@ namespace Moses
|
||||
{
|
||||
|
||||
//! populate this InputType with data from in stream
|
||||
int ForestInput::Read(std::istream &in,
|
||||
const std::vector<FactorType>& factorOrder)
|
||||
int ForestInput::
|
||||
Read(std::istream &in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts)
|
||||
{
|
||||
using Syntax::F2S::Forest;
|
||||
|
||||
@ -56,7 +58,7 @@ int ForestInput::Read(std::istream &in,
|
||||
// not sure ForestInput needs to.
|
||||
std::stringstream strme;
|
||||
strme << "<s> " << sentence << " </s>" << std::endl;
|
||||
Sentence::Read(strme, factorOrder);
|
||||
Sentence::Read(strme, factorOrder, opts);
|
||||
|
||||
// Find the maximum end position of any vertex (0 if forest is empty).
|
||||
std::size_t maxEnd = FindMaxEnd(*m_forest);
|
||||
|
@ -28,7 +28,10 @@ public:
|
||||
}
|
||||
|
||||
//! populate this InputType with data from in stream
|
||||
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
|
||||
virtual int
|
||||
Read(std::istream& in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts);
|
||||
|
||||
//! Output debugging info to stream out
|
||||
virtual void Print(std::ostream&) const;
|
||||
|
@ -56,7 +56,7 @@ WriteHypos(const ChartHypothesisCollection& hypos,
|
||||
ChartHypothesisCollection::const_iterator iter;
|
||||
for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
|
||||
ChartHypothesis &mainHypo = **iter;
|
||||
if (StaticData::Instance().GetUnprunedSearchGraph() ||
|
||||
if (StaticData::Instance().options().output.DontPruneSearchGraph ||
|
||||
reachable.find(mainHypo.GetId()) != reachable.end()) {
|
||||
(*m_out) << m_lineNumber << " " << mainHypo << endl;
|
||||
}
|
||||
@ -90,7 +90,7 @@ WriteHypos(const ChartHypothesisCollection& hypos,
|
||||
ChartHypothesisCollection::const_iterator iter;
|
||||
for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) {
|
||||
const ChartHypothesis* mainHypo = *iter;
|
||||
if (!StaticData::Instance().GetUnprunedSearchGraph() &&
|
||||
if (!StaticData::Instance().options().output.DontPruneSearchGraph &&
|
||||
reachable.find(mainHypo->GetId()) == reachable.end()) {
|
||||
//Ignore non reachable nodes
|
||||
continue;
|
||||
|
@ -195,9 +195,8 @@ EvaluateWhenApplied(float futureScore)
|
||||
const StatefulFeatureFunction &ff = *ffs[i];
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
if (! staticData.IsFeatureFunctionIgnored(ff)) {
|
||||
m_ffStates[i] = ff.EvaluateWhenApplied(*this,
|
||||
m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
|
||||
&m_currScoreBreakdown);
|
||||
FFState const* s = m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL;
|
||||
m_ffStates[i] = ff.EvaluateWhenApplied(*this, s, &m_currScoreBreakdown);
|
||||
}
|
||||
}
|
||||
|
||||
@ -276,15 +275,11 @@ CleanupArcList()
|
||||
* However, may not be enough if only unique candidates are needed,
|
||||
* so we'll keep all of arc list if nedd distinct n-best list
|
||||
*/
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
bool distinctNBest = (m_manager.options().nbest.only_distinct ||
|
||||
staticData.GetLatticeSamplesSize() ||
|
||||
m_manager.options().mbr.enabled ||
|
||||
staticData.GetOutputSearchGraph() ||
|
||||
staticData.GetOutputSearchGraphSLF() ||
|
||||
staticData.GetOutputSearchGraphHypergraph() ||
|
||||
m_manager.options().lmbr.enabled);
|
||||
AllOptions const& opts = m_manager.options();
|
||||
size_t nBestSize = opts.nbest.nbest_size;
|
||||
bool distinctNBest = opts.NBestDistinct();
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
|
||||
// prune arc list only if there too many arcs
|
||||
@ -292,9 +287,8 @@ CleanupArcList()
|
||||
m_arcList->end(), CompareHypothesisTotalScore());
|
||||
|
||||
// delete bad ones
|
||||
ArcList::iterator iter;
|
||||
for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter)
|
||||
delete *iter;
|
||||
ArcList::iterator i = m_arcList->begin() + nBestSize;
|
||||
while (i != m_arcList->end()) delete *i++;
|
||||
m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end());
|
||||
}
|
||||
|
||||
@ -387,13 +381,15 @@ OutputAlignment(std::ostream &out) const
|
||||
currentHypo = currentHypo->GetPrevHypo();
|
||||
}
|
||||
|
||||
OutputAlignment(out, edges);
|
||||
OutputAlignment(out, edges, m_manager.options().output.WA_SortOrder);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
Hypothesis::
|
||||
OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
|
||||
OutputAlignment(ostream &out,
|
||||
vector<const Hypothesis *> const& edges,
|
||||
WordAlignmentSort waso)
|
||||
{
|
||||
size_t targetOffset = 0;
|
||||
|
||||
@ -402,7 +398,7 @@ OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
|
||||
const TargetPhrase &tp = edge.GetCurrTargetPhrase();
|
||||
size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
|
||||
|
||||
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
|
||||
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset, waso);
|
||||
|
||||
targetOffset += tp.GetSize();
|
||||
}
|
||||
@ -412,15 +408,17 @@ OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
|
||||
void
|
||||
Hypothesis::
|
||||
OutputAlignment(ostream &out, const AlignmentInfo &ai,
|
||||
size_t sourceOffset, size_t targetOffset)
|
||||
size_t sourceOffset, size_t targetOffset,
|
||||
WordAlignmentSort waso)
|
||||
{
|
||||
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
|
||||
AlignVec alignments = ai.GetSortedAlignments();
|
||||
AlignVec alignments = ai.GetSortedAlignments(waso);
|
||||
|
||||
AlignVec::const_iterator it;
|
||||
for (it = alignments.begin(); it != alignments.end(); ++it) {
|
||||
const std::pair<size_t,size_t> &alignment = **it;
|
||||
out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
|
||||
out << alignment.first + sourceOffset << "-"
|
||||
<< alignment.second + targetOffset << " ";
|
||||
}
|
||||
|
||||
}
|
||||
@ -526,15 +524,17 @@ OutputSurface(std::ostream &out, const Hypothesis &edge,
|
||||
const int sourceEnd = sourceRange.GetEndPos();
|
||||
out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
|
||||
if (reportSegmentation == 2) {
|
||||
WordAlignmentSort waso = m_manager.options().output.WA_SortOrder;
|
||||
out << ",wa=";
|
||||
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
|
||||
Hypothesis::OutputAlignment(out, ai, 0, 0);
|
||||
Hypothesis::OutputAlignment(out, ai, 0, 0, waso);
|
||||
out << ",total=";
|
||||
out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
|
||||
out << ",";
|
||||
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
|
||||
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
|
||||
scoreBreakdown.OutputAllFeatureScores(out);
|
||||
bool with_labels = m_manager.options().nbest.include_feature_labels;
|
||||
scoreBreakdown.OutputAllFeatureScores(out, with_labels);
|
||||
}
|
||||
out << "| ";
|
||||
}
|
||||
@ -609,8 +609,9 @@ OutputLocalWordAlignment(vector<xmlrpc_c::value>& dest) const
|
||||
Range const& src = this->GetCurrSourceWordsRange();
|
||||
Range const& trg = this->GetCurrTargetWordsRange();
|
||||
|
||||
WordAlignmentSort waso = m_manager.options().output.WA_SortOrder;
|
||||
vector<pair<size_t,size_t> const* > a
|
||||
= this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments();
|
||||
= this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(waso);
|
||||
typedef pair<size_t,size_t> item;
|
||||
map<string, xmlrpc_c::value> M;
|
||||
BOOST_FOREACH(item const* p, a) {
|
||||
|
@ -251,9 +251,18 @@ public:
|
||||
return m_transOpt;
|
||||
}
|
||||
|
||||
void OutputAlignment(std::ostream &out) const;
|
||||
static void OutputAlignment(std::ostream &out, const std::vector<const Hypothesis *> &edges);
|
||||
static void OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset);
|
||||
void
|
||||
OutputAlignment(std::ostream &out) const;
|
||||
|
||||
static void
|
||||
OutputAlignment(std::ostream &out,
|
||||
const std::vector<const Hypothesis *> &edges,
|
||||
WordAlignmentSort waso);
|
||||
|
||||
static void
|
||||
OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai,
|
||||
size_t sourceOffset, size_t targetOffset,
|
||||
WordAlignmentSort waso);
|
||||
|
||||
void OutputInput(std::ostream& os) const;
|
||||
static void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo);
|
||||
|
@ -36,7 +36,7 @@ namespace Moses
|
||||
HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
|
||||
HypothesisStack(manager)
|
||||
{
|
||||
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
|
||||
m_nBestIsEnabled = manager.options().nbest.enabled;
|
||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||
m_worstScore = -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
@ -79,12 +79,6 @@ namespace Moses
|
||||
|
||||
IOWrapper::IOWrapper()
|
||||
: m_nBestStream(NULL)
|
||||
// , m_outputWordGraphStream(NULL)
|
||||
// , m_outputSearchGraphStream(NULL)
|
||||
// , m_detailedTranslationReportingStream(NULL)
|
||||
// , m_unknownsStream(NULL)
|
||||
// , m_alignmentInfoStream(NULL)
|
||||
// , m_latticeSamplesStream(NULL)
|
||||
, m_surpressSingleBestOutput(false)
|
||||
, m_look_ahead(0)
|
||||
, m_look_back(0)
|
||||
@ -100,7 +94,7 @@ IOWrapper::IOWrapper()
|
||||
m_look_ahead = staticData.options().context.look_ahead;
|
||||
m_look_back = staticData.options().context.look_back;
|
||||
|
||||
m_inputType = staticData.GetInputType();
|
||||
m_inputType = staticData.options().input.input_type;
|
||||
|
||||
UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput,
|
||||
"Context-sensitive decoding currently works only with sentence input.");
|
||||
|
@ -216,6 +216,7 @@ boost::shared_ptr<InputType>
|
||||
IOWrapper::
|
||||
BufferInput()
|
||||
{
|
||||
AllOptions const& opts = StaticData::Instance().options();
|
||||
boost::shared_ptr<itype> source;
|
||||
boost::shared_ptr<InputType> ret;
|
||||
if (m_future_input.size()) {
|
||||
@ -224,13 +225,13 @@ BufferInput()
|
||||
m_buffered_ahead -= ret->GetSize();
|
||||
} else {
|
||||
source.reset(new itype);
|
||||
if (!source->Read(*m_inputStream, *m_inputFactorOrder))
|
||||
if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts))
|
||||
return ret;
|
||||
ret = source;
|
||||
}
|
||||
while (m_buffered_ahead < m_look_ahead) {
|
||||
source.reset(new itype);
|
||||
if (!source->Read(*m_inputStream, *m_inputFactorOrder))
|
||||
if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts))
|
||||
break;
|
||||
m_future_input.push_back(source);
|
||||
m_buffered_ahead += source->GetSize();
|
||||
|
@ -320,10 +320,15 @@ void Manager::OutputNBest(OutputCollector *collector) const
|
||||
OutputNBestList(collector, *completed_nbest_, m_source.GetTranslationId());
|
||||
}
|
||||
|
||||
void Manager::OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const
|
||||
void
|
||||
Manager::
|
||||
OutputNBestList(OutputCollector *collector,
|
||||
std::vector<search::Applied> const& nbest,
|
||||
long translationId) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
|
||||
const std::vector<Moses::FactorType> &outputFactorOrder
|
||||
= staticData.GetOutputFactorOrder();
|
||||
|
||||
std::ostringstream out;
|
||||
// wtf? copied from the original OutputNBestList
|
||||
@ -332,18 +337,21 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector<sear
|
||||
}
|
||||
Phrase outputPhrase;
|
||||
ScoreComponentCollection features;
|
||||
for (std::vector<search::Applied>::const_iterator i = nbest.begin(); i != nbest.end(); ++i) {
|
||||
for (std::vector<search::Applied>::const_iterator i = nbest.begin();
|
||||
i != nbest.end(); ++i) {
|
||||
Incremental::PhraseAndFeatures(*i, outputPhrase, features);
|
||||
// <s> and </s>
|
||||
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
|
||||
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
||||
"Output phrase should have contained at least 2 words "
|
||||
<< "(beginning and end-of-sentence)");
|
||||
|
||||
outputPhrase.RemoveWord(0);
|
||||
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
|
||||
out << translationId << " ||| ";
|
||||
OutputSurface(out, outputPhrase, outputFactorOrder, false);
|
||||
out << " ||| ";
|
||||
features.OutputAllFeatureScores(out);
|
||||
bool with_labels = options().nbest.include_feature_labels;
|
||||
features.OutputAllFeatureScores(out, with_labels);
|
||||
out << " ||| " << i->GetScore() << '\n';
|
||||
}
|
||||
out << std::flush;
|
||||
@ -351,7 +359,9 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector<sear
|
||||
collector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const
|
||||
void
|
||||
Manager::
|
||||
OutputDetailedTranslationReport(OutputCollector *collector) const
|
||||
{
|
||||
if (collector && !completed_nbest_->empty()) {
|
||||
const search::Applied &applied = completed_nbest_->at(0);
|
||||
@ -498,7 +508,7 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied
|
||||
if (collector == NULL) return;
|
||||
std::ostringstream out;
|
||||
FixPrecision(out);
|
||||
if (StaticData::Instance().GetOutputHypoScore()) {
|
||||
if (options().output.ReportHypoScore) {
|
||||
out << applied.GetScore() << ' ';
|
||||
}
|
||||
Phrase outPhrase;
|
||||
@ -515,10 +525,12 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied
|
||||
VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl);
|
||||
}
|
||||
|
||||
void Manager::OutputBestNone(OutputCollector *collector, long translationId) const
|
||||
void
|
||||
Manager::
|
||||
OutputBestNone(OutputCollector *collector, long translationId) const
|
||||
{
|
||||
if (collector == NULL) return;
|
||||
if (StaticData::Instance().GetOutputHypoScore()) {
|
||||
if (options().output.ReportHypoScore) {
|
||||
collector->Write(translationId, "0 \n");
|
||||
} else {
|
||||
collector->Write(translationId, "\n");
|
||||
|
@ -1,5 +1,4 @@
|
||||
// -*- c++ -*-
|
||||
// $Id$
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
@ -31,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "ReorderingConstraint.h"
|
||||
#include "NonTerminal.h"
|
||||
#include "Range.h"
|
||||
#include "parameters/AllOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -184,7 +184,10 @@ public:
|
||||
}
|
||||
|
||||
//! populate this InputType with data from in stream
|
||||
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder) =0;
|
||||
virtual int
|
||||
Read(std::istream& in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts) =0;
|
||||
|
||||
//! Output debugging info to stream out
|
||||
virtual void Print(std::ostream&) const =0;
|
||||
|
@ -1,4 +1,5 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
@ -25,14 +26,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "n_gram.h"
|
||||
#include "lmContainer.h"
|
||||
|
||||
// should be defined in lmContainer.h, if the version of IRSTLM used provides
|
||||
// context-dependent functionality
|
||||
#ifndef _IRSTLM_LMCONTEXTDEPENDENT
|
||||
#define _IRSTLM_LMCONTEXTDEPENDENT 5
|
||||
#else
|
||||
#define IRSTLM_CONTEXT_DEPENDENT
|
||||
#endif
|
||||
|
||||
using namespace irstlm;
|
||||
|
||||
#include "IRST.h"
|
||||
@ -67,10 +60,9 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
LanguageModelIRST::
|
||||
LanguageModelIRST(const std::string &line)
|
||||
: LanguageModelSingleFactor(line)
|
||||
, m_lmtb_dub(0), m_lmtb_size(0)
|
||||
LanguageModelIRST::LanguageModelIRST(const std::string &line)
|
||||
:LanguageModelSingleFactor(line)
|
||||
,m_lmtb_dub(0), m_lmtb_size(0)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
int threadCount = staticData.ThreadCount();
|
||||
@ -86,9 +78,9 @@ LanguageModelIRST(const std::string &line)
|
||||
VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_size:|" << m_lmtb_size << "|" << std::endl);
|
||||
}
|
||||
|
||||
LanguageModelIRST::
|
||||
~LanguageModelIRST()
|
||||
LanguageModelIRST::~LanguageModelIRST()
|
||||
{
|
||||
|
||||
#ifndef WIN32
|
||||
TRACE_ERR( "reset mmap\n");
|
||||
if (m_lmtb) m_lmtb->reset_mmap();
|
||||
@ -98,17 +90,13 @@ LanguageModelIRST::
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
LanguageModelIRST::
|
||||
IsUseable(const FactorMask &mask) const
|
||||
bool LanguageModelIRST::IsUseable(const FactorMask &mask) const
|
||||
{
|
||||
bool ret = mask[m_factorType];
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
LanguageModelIRST::
|
||||
Load()
|
||||
void LanguageModelIRST::Load()
|
||||
{
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
@ -135,9 +123,7 @@ Load()
|
||||
if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);
|
||||
}
|
||||
|
||||
void
|
||||
LanguageModelIRST::
|
||||
CreateFactors(FactorCollection &factorCollection)
|
||||
void LanguageModelIRST::CreateFactors(FactorCollection &factorCollection)
|
||||
{
|
||||
// add factors which have srilm id
|
||||
// code copied & paste from SRI LM class. should do template function
|
||||
@ -179,23 +165,17 @@ CreateFactors(FactorCollection &factorCollection)
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
LanguageModelIRST::
|
||||
GetLmID( const std::string &str ) const
|
||||
int LanguageModelIRST::GetLmID( const std::string &str ) const
|
||||
{
|
||||
return d->encode( str.c_str() ); // at the level of micro tags
|
||||
}
|
||||
|
||||
int
|
||||
LanguageModelIRST::
|
||||
GetLmID( const Word &word ) const
|
||||
int LanguageModelIRST::GetLmID( const Word &word ) const
|
||||
{
|
||||
return GetLmID( word.GetFactor(m_factorType) );
|
||||
}
|
||||
|
||||
int
|
||||
LanguageModelIRST::
|
||||
GetLmID( const Factor *factor ) const
|
||||
int LanguageModelIRST::GetLmID( const Factor *factor ) const
|
||||
{
|
||||
size_t factorId = factor->GetId();
|
||||
|
||||
@ -216,21 +196,21 @@ GetLmID( const Factor *factor ) const
|
||||
///////////
|
||||
|
||||
///OLD PROBLEM - SOLVED
|
||||
////////////
|
||||
/// IL PPROBLEMA ERA QUI
|
||||
/// m_lmIdLookup.push_back(code);
|
||||
/// PERCHE' USANDO PUSH_BACK IN REALTA' INSEREVIVAMO L'ELEMENTO NUOVO
|
||||
/// IN POSIZIONE (factorID-1) invece che in posizione factrID dove dopo andiamo a leggerlo (vedi caso C
|
||||
/// Cosi' funziona ....
|
||||
/// ho un dubbio su cosa c'e' nelle prime posizioni di m_lmIdLookup
|
||||
/// quindi
|
||||
/// e scopro che rimane vuota una entry ogni due
|
||||
/// perche' factorID cresce di due in due (perche' codifica sia source che target) "vuota" la posizione (factorID-1)
|
||||
/// non da problemi di correttezza, ma solo di "spreco" di memoria
|
||||
/// potremmo sostituirerendere m_lmIdLookup una std:map invece che un std::vector,
|
||||
/// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori
|
||||
/// a te la scelta!!!!
|
||||
////////////////
|
||||
////////////
|
||||
/// IL PPROBLEMA ERA QUI
|
||||
/// m_lmIdLookup.push_back(code);
|
||||
/// PERCHE' USANDO PUSH_BACK IN REALTA' INSEREVIVAMO L'ELEMENTO NUOVO
|
||||
/// IN POSIZIONE (factorID-1) invece che in posizione factrID dove dopo andiamo a leggerlo (vedi caso C
|
||||
/// Cosi' funziona ....
|
||||
/// ho un dubbio su cosa c'e' nelle prime posizioni di m_lmIdLookup
|
||||
/// quindi
|
||||
/// e scopro che rimane vuota una entry ogni due
|
||||
/// perche' factorID cresce di due in due (perche' codifica sia source che target) "vuota" la posizione (factorID-1)
|
||||
/// non da problemi di correttezza, ma solo di "spreco" di memoria
|
||||
/// potremmo sostituirerendere m_lmIdLookup una std:map invece che un std::vector,
|
||||
/// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori
|
||||
/// a te la scelta!!!!
|
||||
////////////////
|
||||
|
||||
|
||||
if (factorId >= m_lmIdLookup.size()) {
|
||||
@ -251,34 +231,21 @@ GetLmID( const Factor *factor ) const
|
||||
}
|
||||
}
|
||||
|
||||
FFState const*
|
||||
LanguageModelIRST::
|
||||
EmptyHypothesisState(const InputType &/*input*/) const
|
||||
const FFState* LanguageModelIRST::EmptyHypothesisState(const InputType &/*input*/) const
|
||||
{
|
||||
std::auto_ptr<IRSTLMState> ret(new IRSTLMState());
|
||||
|
||||
return ret.release();
|
||||
}
|
||||
|
||||
void
|
||||
LanguageModelIRST::
|
||||
CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
|
||||
void LanguageModelIRST::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
|
||||
{
|
||||
bool isContextAdaptive
|
||||
= m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
|
||||
|
||||
fullScore = 0;
|
||||
ngramScore = 0;
|
||||
oovCount = 0;
|
||||
|
||||
if ( !phrase.GetSize() ) return;
|
||||
|
||||
//get the context_weight map here
|
||||
SPTR<std::map<std::string, float> const> CW;
|
||||
if (isContextAdaptive && phrase.HasScope()) {
|
||||
CW = phrase.GetScope()->GetContextWeights();
|
||||
}
|
||||
|
||||
int _min = min(m_lmtb_size - 1, (int) phrase.GetSize());
|
||||
|
||||
int codes[m_lmtb_size];
|
||||
@ -289,78 +256,36 @@ CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oov
|
||||
|
||||
char* msp = NULL;
|
||||
float before_boundary = 0.0;
|
||||
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
if (CW) {
|
||||
for (; position < _min; ++position) {
|
||||
codes[idx] = GetLmID(phrase.GetWord(position));
|
||||
if (codes[idx] == m_unknownId) ++oovCount;
|
||||
before_boundary += m_lmtb->clprob(codes,idx+1,*CW,NULL,NULL,&msp);
|
||||
++idx;
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
for (; position < _min; ++position) {
|
||||
codes[idx] = GetLmID(phrase.GetWord(position));
|
||||
if (codes[idx] == m_unknownId) ++oovCount;
|
||||
before_boundary += m_lmtb->clprob(codes,idx+1,NULL,NULL,&msp);
|
||||
++idx;
|
||||
}
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
for (; position < _min; ++position) {
|
||||
codes[idx] = GetLmID(phrase.GetWord(position));
|
||||
if (codes[idx] == m_unknownId) ++oovCount;
|
||||
before_boundary += m_lmtb->clprob(codes,idx+1,NULL,NULL,&msp);
|
||||
++idx;
|
||||
}
|
||||
#endif
|
||||
|
||||
ngramScore = 0.0;
|
||||
int end_loop = (int) phrase.GetSize();
|
||||
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
if (CW) {
|
||||
for (; position < end_loop; ++position) {
|
||||
for (idx = 1; idx < m_lmtb_size; ++idx) {
|
||||
codes[idx-1] = codes[idx];
|
||||
}
|
||||
codes[idx-1] = GetLmID(phrase.GetWord(position));
|
||||
if (codes[idx-1] == m_unknownId) ++oovCount;
|
||||
ngramScore += m_lmtb->clprob(codes,idx,*CW,NULL,NULL,&msp);
|
||||
for (; position < end_loop; ++position) {
|
||||
for (idx = 1; idx < m_lmtb_size; ++idx) {
|
||||
codes[idx-1] = codes[idx];
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
for (; position < end_loop; ++position) {
|
||||
for (idx = 1; idx < m_lmtb_size; ++idx) {
|
||||
codes[idx-1] = codes[idx];
|
||||
}
|
||||
codes[idx-1] = GetLmID(phrase.GetWord(position));
|
||||
if (codes[idx-1] == m_unknownId) ++oovCount;
|
||||
ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp);
|
||||
}
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
codes[idx-1] = GetLmID(phrase.GetWord(position));
|
||||
if (codes[idx-1] == m_unknownId) ++oovCount;
|
||||
ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp);
|
||||
}
|
||||
#endif
|
||||
before_boundary = TransformLMScore(before_boundary);
|
||||
ngramScore = TransformLMScore(ngramScore);
|
||||
fullScore = ngramScore + before_boundary;
|
||||
}
|
||||
|
||||
FFState*
|
||||
LanguageModelIRST::
|
||||
EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
|
||||
ScoreComponentCollection *out) const
|
||||
FFState* LanguageModelIRST::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
|
||||
{
|
||||
bool isContextAdaptive
|
||||
= m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
|
||||
|
||||
if (!hypo.GetCurrTargetLength()) {
|
||||
std::auto_ptr<IRSTLMState> ret(new IRSTLMState(ps));
|
||||
return ret.release();
|
||||
}
|
||||
|
||||
//get the context_weight map here
|
||||
SPTR<std::map<std::string, float> const> CW;
|
||||
if (isContextAdaptive) {
|
||||
ttasksptr ttask = hypo.GetManager().GetTtask();
|
||||
if (ttask) CW = ttask->GetScope()->GetContextWeights();
|
||||
}
|
||||
|
||||
|
||||
//[begin, end) in STL-like fashion.
|
||||
const int begin = (const int) hypo.GetCurrTargetWordsRange().GetStartPos();
|
||||
const int end = (const int) hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
|
||||
@ -383,34 +308,18 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
|
||||
}
|
||||
|
||||
char* msp = NULL;
|
||||
float score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
|
||||
|
||||
position = (const int) begin+1;
|
||||
float score;
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
if (CW) {
|
||||
score = m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
|
||||
while (position < adjust_end) {
|
||||
for (idx=1; idx<m_lmtb_size; idx++) {
|
||||
codes[idx-1] = codes[idx];
|
||||
}
|
||||
codes[idx-1] = GetLmID(hypo.GetWord(position));
|
||||
score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
|
||||
++position;
|
||||
while (position < adjust_end) {
|
||||
for (idx=1; idx<m_lmtb_size; idx++) {
|
||||
codes[idx-1] = codes[idx];
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
|
||||
position = (const int) begin+1;
|
||||
while (position < adjust_end) {
|
||||
for (idx=1; idx<m_lmtb_size; idx++) {
|
||||
codes[idx-1] = codes[idx];
|
||||
}
|
||||
codes[idx-1] = GetLmID(hypo.GetWord(position));
|
||||
score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
|
||||
++position;
|
||||
}
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
codes[idx-1] = GetLmID(hypo.GetWord(position));
|
||||
score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
|
||||
++position;
|
||||
}
|
||||
#endif
|
||||
|
||||
//adding probability of having sentenceEnd symbol, after this phrase;
|
||||
//this could happen only when all source words are covered
|
||||
if (hypo.IsSourceCompleted()) {
|
||||
@ -427,13 +336,8 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
|
||||
codes[idx] = m_lmtb_sentenceStart;
|
||||
--idx;
|
||||
}
|
||||
#ifdef IRSTLM_CONTEXT_DEPENDENT
|
||||
if (CW) score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
|
||||
else
|
||||
#else
|
||||
score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
|
||||
#endif
|
||||
} else {
|
||||
} else {
|
||||
// need to set the LM state
|
||||
|
||||
if (adjust_end < end) { //the LMstate of this target phrase refers to the last m_lmtb_size-1 words
|
||||
@ -454,9 +358,7 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
|
||||
return ret.release();
|
||||
}
|
||||
|
||||
LMResult
|
||||
LanguageModelIRST::
|
||||
GetValue(const vector<const Word*> &contextFactor, State* finalState) const
|
||||
LMResult LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
|
||||
{
|
||||
// set up context
|
||||
size_t count = contextFactor.size();
|
||||
@ -492,8 +394,7 @@ GetValue(const vector<const Word*> &contextFactor, State* finalState) const
|
||||
return result;
|
||||
}
|
||||
|
||||
bool
|
||||
LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold)
|
||||
bool LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold)
|
||||
{
|
||||
if (sentences_done==-1) return true;
|
||||
if (m_lmcache_cleanup_threshold)
|
||||
@ -510,9 +411,7 @@ void LanguageModelIRST::InitializeForInput(ttasksptr const& ttask)
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
LanguageModelIRST::
|
||||
CleanUpAfterSentenceProcessing(const InputType& source)
|
||||
void LanguageModelIRST::CleanUpAfterSentenceProcessing(const InputType& source)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
static int sentenceCount = 0;
|
||||
@ -526,9 +425,7 @@ CleanUpAfterSentenceProcessing(const InputType& source)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LanguageModelIRST::
|
||||
SetParameter(const std::string& key, const std::string& value)
|
||||
void LanguageModelIRST::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "dub") {
|
||||
m_lmtb_dub = Scan<unsigned int>(value);
|
||||
|
@ -1,4 +1,3 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
@ -92,20 +91,17 @@ public:
|
||||
void Load();
|
||||
const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
|
||||
|
||||
protected:
|
||||
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
|
||||
public:
|
||||
|
||||
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
|
||||
|
||||
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
|
||||
|
||||
/*
|
||||
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
|
||||
|
||||
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const;
|
||||
*/
|
||||
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
|
||||
/*
|
||||
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
|
||||
|
||||
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const;
|
||||
*/
|
||||
|
||||
void InitializeForInput(ttasksptr const& ttask);
|
||||
void CleanUpAfterSentenceProcessing(const InputType& source);
|
||||
|
@ -1,6 +1,5 @@
|
||||
// $Id$
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
@ -49,6 +48,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "moses/mbr.h"
|
||||
#include "moses/LatticeMBR.h"
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
#ifdef HAVE_PROTOBUF
|
||||
#include "hypergraph.pb.h"
|
||||
#include "rule.pb.h"
|
||||
@ -98,6 +99,10 @@ Manager::GetSource() const
|
||||
*/
|
||||
void Manager::Decode()
|
||||
{
|
||||
|
||||
std::cerr << options().nbest.nbest_size << " "
|
||||
<< options().nbest.enabled << " " << std::endl;
|
||||
|
||||
// initialize statistics
|
||||
ResetSentenceStats(m_source);
|
||||
IFVERBOSE(2) {
|
||||
@ -123,7 +128,8 @@ void Manager::Decode()
|
||||
// some reporting on how long this took
|
||||
IFVERBOSE(1) {
|
||||
GetSentenceStats().StopTimeCollectOpts();
|
||||
TRACE_ERR("Line "<< m_source.GetTranslationId() << ": Collecting options took "
|
||||
TRACE_ERR("Line "<< m_source.GetTranslationId()
|
||||
<< ": Collecting options took "
|
||||
<< GetSentenceStats().GetTimeCollectOpts() << " seconds at "
|
||||
<< __FILE__ << ":" << __LINE__ << endl);
|
||||
}
|
||||
@ -1112,11 +1118,13 @@ void Manager::OutputSearchGraphAsSLF(long translationId, std::ostream &outputSea
|
||||
|
||||
}
|
||||
|
||||
void OutputSearchNode(long translationId, std::ostream &outputSearchGraphStream,
|
||||
const SearchGraphNode& searchNode)
|
||||
void
|
||||
OutputSearchNode(AllOptions const& opts, long translationId,
|
||||
std::ostream &outputSearchGraphStream,
|
||||
SearchGraphNode const& searchNode)
|
||||
{
|
||||
const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
|
||||
bool extendedFormat = StaticData::Instance().GetOutputSearchGraphExtended();
|
||||
bool extendedFormat = opts.output.SearchGraphExtended.size();
|
||||
outputSearchGraphStream << translationId;
|
||||
|
||||
// special case: initial hypothesis
|
||||
@ -1369,24 +1377,32 @@ void Manager::SerializeSearchGraphPB(
|
||||
}
|
||||
#endif
|
||||
|
||||
void Manager::OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const
|
||||
void
|
||||
Manager::
|
||||
OutputSearchGraph(long translationId, std::ostream &out) const
|
||||
{
|
||||
vector<SearchGraphNode> searchGraph;
|
||||
GetSearchGraph(searchGraph);
|
||||
for (size_t i = 0; i < searchGraph.size(); ++i) {
|
||||
OutputSearchNode(translationId,outputSearchGraphStream,searchGraph[i]);
|
||||
OutputSearchNode(options(),translationId,out,searchGraph[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
|
||||
std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, set< const Hypothesis* > >* pOutgoingHyps, vector< float>* pFwdBwdScores) const
|
||||
void
|
||||
Manager::
|
||||
GetForwardBackwardSearchGraph
|
||||
( std::map< int, bool >* pConnected,
|
||||
std::vector<Hypothesis const* >* pConnectedList,
|
||||
std::map<Hypothesis const*, set<Hypothesis const*> >* pOutgoingHyps,
|
||||
vector< float>* pFwdBwdScores) const
|
||||
{
|
||||
std::map < int, bool > &connected = *pConnected;
|
||||
std::vector< const Hypothesis *>& connectedList = *pConnectedList;
|
||||
std::map < int, int > forward;
|
||||
std::map < int, double > forwardScore;
|
||||
|
||||
std::map < const Hypothesis*, set <const Hypothesis*> > & outgoingHyps = *pOutgoingHyps;
|
||||
std::map < const Hypothesis*, set <const Hypothesis*> > & outgoingHyps
|
||||
= *pOutgoingHyps;
|
||||
vector< float> & estimatedScores = *pFwdBwdScores;
|
||||
|
||||
// *** find connected hypotheses ***
|
||||
@ -1395,7 +1411,8 @@ void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
|
||||
// ** compute best forward path for each hypothesis *** //
|
||||
|
||||
// forward cost of hypotheses on final stack is 0
|
||||
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
|
||||
const std::vector < HypothesisStack* > &hypoStackColl
|
||||
= m_search->GetHypothesisStacks();
|
||||
const HypothesisStack &finalStack = *hypoStackColl.back();
|
||||
HypothesisStack::const_iterator iterHypo;
|
||||
for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
|
||||
@ -1504,34 +1521,34 @@ void Manager::OutputBest(OutputCollector *collector) const
|
||||
if (!options().mbr.enabled) {
|
||||
bestHypo = GetBestHypothesis();
|
||||
if (bestHypo) {
|
||||
if (StaticData::Instance().GetOutputHypoScore()) {
|
||||
if (options().output.ReportHypoScore) {
|
||||
out << bestHypo->GetTotalScore() << ' ';
|
||||
}
|
||||
if (staticData.IsPathRecoveryEnabled()) {
|
||||
if (options().output.RecoverPath) {
|
||||
bestHypo->OutputInput(out);
|
||||
out << "||| ";
|
||||
}
|
||||
|
||||
const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id");
|
||||
if (params && params->size() && Scan<bool>(params->at(0)) ) {
|
||||
out << translationId << " ";
|
||||
}
|
||||
// const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id");
|
||||
if (options().output.PrintID) {
|
||||
out << translationId << " ";
|
||||
}
|
||||
|
||||
// VN : I put back the code for OutputPassthroughInformation
|
||||
if (staticData.IsPassthroughEnabled()) {
|
||||
OutputPassthroughInformation(out, bestHypo);
|
||||
// VN : I put back the code for OutputPassthroughInformation
|
||||
if (options().output.PrintPassThrough) {
|
||||
OutputPassthroughInformation(out, bestHypo);
|
||||
}
|
||||
// end of add back
|
||||
|
||||
if (staticData.GetReportSegmentation() == 2) {
|
||||
if (options().output.ReportSegmentation == 2) {
|
||||
GetOutputLanguageModelOrder(out, bestHypo);
|
||||
}
|
||||
bestHypo->OutputBestSurface(
|
||||
out,
|
||||
staticData.GetOutputFactorOrder(),
|
||||
staticData.GetReportSegmentation(),
|
||||
staticData.GetReportAllFactors());
|
||||
if (staticData.PrintAlignmentInfo()) {
|
||||
options().output.ReportSegmentation,
|
||||
options().output.ReportAllFactors);
|
||||
if (options().output.PrintAlignmentInfo) {
|
||||
out << "||| ";
|
||||
bestHypo->OutputAlignment(out);
|
||||
}
|
||||
@ -1572,8 +1589,9 @@ void Manager::OutputBest(OutputCollector *collector) const
|
||||
} else {
|
||||
//Lattice MBR decoding
|
||||
vector<Word> mbrBestHypo = doLatticeMBR(*this,nBestList);
|
||||
OutputBestHypo(mbrBestHypo, translationId, staticData.GetReportSegmentation(),
|
||||
staticData.GetReportAllFactors(),out);
|
||||
OutputBestHypo(mbrBestHypo, translationId,
|
||||
options().output.ReportSegmentation,
|
||||
options().output.ReportAllFactors, out);
|
||||
IFVERBOSE(2) {
|
||||
PrintUserTime("finished Lattice MBR decoding");
|
||||
}
|
||||
@ -1584,8 +1602,8 @@ void Manager::OutputBest(OutputCollector *collector) const
|
||||
else if (options().search.consensus) {
|
||||
const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList);
|
||||
OutputBestHypo(conBestHypo, translationId,
|
||||
staticData.GetReportSegmentation(),
|
||||
staticData.GetReportAllFactors(),out);
|
||||
options().output.ReportSegmentation,
|
||||
options().output.ReportAllFactors, out);
|
||||
OutputAlignment(m_alignmentOut, conBestHypo);
|
||||
IFVERBOSE(2) {
|
||||
PrintUserTime("finished Consensus decoding");
|
||||
@ -1596,8 +1614,8 @@ void Manager::OutputBest(OutputCollector *collector) const
|
||||
else {
|
||||
const TrellisPath &mbrBestHypo = doMBR(nBestList);
|
||||
OutputBestHypo(mbrBestHypo, translationId,
|
||||
staticData.GetReportSegmentation(),
|
||||
staticData.GetReportAllFactors(),out);
|
||||
options().output.ReportSegmentation,
|
||||
options().output.ReportAllFactors, out);
|
||||
OutputAlignment(m_alignmentOut, mbrBestHypo);
|
||||
IFVERBOSE(2) {
|
||||
PrintUserTime("finished MBR decoding");
|
||||
@ -1624,7 +1642,7 @@ void Manager::OutputNBest(OutputCollector *collector) const
|
||||
long translationId = m_source.GetTranslationId();
|
||||
|
||||
if (options().lmbr.enabled) {
|
||||
if (staticData.options().nbest.enabled) {
|
||||
if (options().nbest.enabled) {
|
||||
collector->Write(translationId, m_latticeNBestOut.str());
|
||||
}
|
||||
} else {
|
||||
@ -1632,22 +1650,24 @@ void Manager::OutputNBest(OutputCollector *collector) const
|
||||
ostringstream out;
|
||||
CalcNBest(options().nbest.nbest_size, nBestList,
|
||||
options().nbest.only_distinct);
|
||||
OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(),
|
||||
OutputNBest(out, nBestList,
|
||||
staticData.GetOutputFactorOrder(),
|
||||
m_source.GetTranslationId(),
|
||||
staticData.GetReportSegmentation());
|
||||
options().output.ReportSegmentation);
|
||||
collector->Write(m_source.GetTranslationId(), out.str());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Manager::OutputNBest(std::ostream& out
|
||||
, const Moses::TrellisPathList &nBestList
|
||||
, const std::vector<Moses::FactorType>& outputFactorOrder
|
||||
, long translationId
|
||||
, char reportSegmentation) const
|
||||
void
|
||||
Manager::
|
||||
OutputNBest(std::ostream& out,
|
||||
const Moses::TrellisPathList &nBestList,
|
||||
const std::vector<Moses::FactorType>& outputFactorOrder,
|
||||
long translationId, char reportSegmentation) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
NBestOptions const& nbo = staticData.options().nbest;
|
||||
NBestOptions const& nbo = options().nbest;
|
||||
bool reportAllFactors = nbo.include_all_factors;
|
||||
bool includeSegmentation = nbo.include_segmentation;
|
||||
bool includeWordAlignment = nbo.include_alignment_info;
|
||||
@ -1661,12 +1681,14 @@ void Manager::OutputNBest(std::ostream& out
|
||||
out << translationId << " ||| ";
|
||||
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
|
||||
const Hypothesis &edge = *edges[currEdge];
|
||||
OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
|
||||
OutputSurface(out, edge, outputFactorOrder, reportSegmentation,
|
||||
reportAllFactors);
|
||||
}
|
||||
out << " |||";
|
||||
|
||||
// print scores with feature names
|
||||
path.GetScoreBreakdown()->OutputAllFeatureScores(out);
|
||||
bool with_labels = options().nbest.include_feature_labels;
|
||||
path.GetScoreBreakdown()->OutputAllFeatureScores(out, with_labels);
|
||||
|
||||
// total
|
||||
out << " ||| " << path.GetTotalScore();
|
||||
@ -1704,7 +1726,7 @@ void Manager::OutputNBest(std::ostream& out
|
||||
}
|
||||
}
|
||||
|
||||
if (StaticData::Instance().IsPathRecoveryEnabled()) {
|
||||
if (options().output.RecoverPath) {
|
||||
out << " ||| ";
|
||||
OutputInput(out, edges[0]);
|
||||
}
|
||||
@ -1719,8 +1741,11 @@ void Manager::OutputNBest(std::ostream& out
|
||||
/***
|
||||
* print surface factor only for the given phrase
|
||||
*/
|
||||
void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
|
||||
char reportSegmentation, bool reportAllFactors) const
|
||||
void
|
||||
Manager::
|
||||
OutputSurface(std::ostream &out, const Hypothesis &edge,
|
||||
const std::vector<FactorType> &outputFactorOrder,
|
||||
char reportSegmentation, bool reportAllFactors) const
|
||||
{
|
||||
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
|
||||
"Must specific at least 1 output factor");
|
||||
@ -1788,26 +1813,33 @@ void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std
|
||||
out << ",";
|
||||
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
|
||||
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
|
||||
scoreBreakdown.OutputAllFeatureScores(out);
|
||||
bool with_labels = options().nbest.include_feature_labels;
|
||||
scoreBreakdown.OutputAllFeatureScores(out, with_labels);
|
||||
}
|
||||
out << "| ";
|
||||
}
|
||||
}
|
||||
|
||||
void Manager::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const
|
||||
void
|
||||
Manager::
|
||||
OutputAlignment(ostream &out, const AlignmentInfo &ai,
|
||||
size_t sourceOffset, size_t targetOffset) const
|
||||
{
|
||||
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
|
||||
AlignVec alignments = ai.GetSortedAlignments();
|
||||
AlignVec alignments = ai.GetSortedAlignments(options().output.WA_SortOrder);
|
||||
|
||||
AlignVec::const_iterator it;
|
||||
for (it = alignments.begin(); it != alignments.end(); ++it) {
|
||||
const std::pair<size_t,size_t> &alignment = **it;
|
||||
out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
|
||||
out << alignment.first + sourceOffset << "-"
|
||||
<< alignment.second + targetOffset << " ";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Manager::OutputInput(std::ostream& os, const Hypothesis* hypo) const
|
||||
void
|
||||
Manager::
|
||||
OutputInput(std::ostream& os, const Hypothesis* hypo) const
|
||||
{
|
||||
size_t len = hypo->GetInput().GetSize();
|
||||
std::vector<const Phrase*> inp_phrases(len, 0);
|
||||
@ -1851,8 +1883,10 @@ void Manager::OutputLatticeSamples(OutputCollector *collector) const
|
||||
TrellisPathList latticeSamples;
|
||||
ostringstream out;
|
||||
CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
|
||||
OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
|
||||
staticData.GetReportSegmentation());
|
||||
OutputNBest(out,latticeSamples,
|
||||
staticData.GetOutputFactorOrder(),
|
||||
m_source.GetTranslationId(),
|
||||
options().output.ReportSegmentation);
|
||||
collector->Write(m_source.GetTranslationId(), out.str());
|
||||
}
|
||||
|
||||
@ -1970,14 +2004,10 @@ void Manager::OutputSearchGraphSLF() const
|
||||
long translationId = m_source.GetTranslationId();
|
||||
|
||||
// Output search graph in HTK standard lattice format (SLF)
|
||||
bool slf = staticData.GetOutputSearchGraphSLF();
|
||||
if (slf) {
|
||||
std::string const& slf = options().output.SearchGraphSLF;
|
||||
if (slf.size()) {
|
||||
util::StringStream fileName;
|
||||
|
||||
string dir;
|
||||
staticData.GetParameter().SetParameter<string>(dir, "output-search-graph-slf", "");
|
||||
|
||||
fileName << dir << "/" << translationId << ".slf";
|
||||
fileName << slf << "/" << translationId << ".slf";
|
||||
ofstream *file = new ofstream;
|
||||
file->open(fileName.str().c_str());
|
||||
if (file->is_open() && file->good()) {
|
||||
@ -2045,7 +2075,11 @@ void Manager::OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*trans
|
||||
out << endl;
|
||||
}
|
||||
|
||||
void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, char reportSegmentation, bool reportAllFactors, std::ostream &out) const
|
||||
void
|
||||
Manager::
|
||||
OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,
|
||||
char reportSegmentation, bool reportAllFactors,
|
||||
std::ostream &out) const
|
||||
{
|
||||
const std::vector<const Hypothesis *> &edges = path.GetEdges();
|
||||
|
||||
@ -2056,9 +2090,12 @@ void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationI
|
||||
out << endl;
|
||||
}
|
||||
|
||||
void Manager::OutputAlignment(std::ostringstream &out, const TrellisPath &path) const
|
||||
void
|
||||
Manager::
|
||||
OutputAlignment(std::ostringstream &out, const TrellisPath &path) const
|
||||
{
|
||||
Hypothesis::OutputAlignment(out, path.GetEdges());
|
||||
WordAlignmentSort waso = options().output.WA_SortOrder;
|
||||
Hypothesis::OutputAlignment(out, path.GetEdges(), waso);
|
||||
// Used by --alignment-output-file so requires endl
|
||||
out << std::endl;
|
||||
}
|
||||
|
@ -131,7 +131,7 @@ protected:
|
||||
// nbest
|
||||
mutable std::ostringstream m_latticeNBestOut;
|
||||
mutable std::ostringstream m_alignmentOut;
|
||||
|
||||
public:
|
||||
void OutputNBest(std::ostream& out
|
||||
, const Moses::TrellisPathList &nBestList
|
||||
, const std::vector<Moses::FactorType>& outputFactorOrder
|
||||
|
@ -39,16 +39,19 @@ MockHypothesisGuard
|
||||
{
|
||||
BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size());
|
||||
std::vector<Moses::FactorType> factors(1,0);
|
||||
m_sentence.reset(new Sentence(0, sourceSentence, &factors));
|
||||
AllOptions const& opts = StaticData::Instance().options();
|
||||
m_sentence.reset(new Sentence(0, sourceSentence, opts, &factors));
|
||||
m_ttask = TranslationTask::create(m_sentence);
|
||||
m_manager.reset(new Manager(m_ttask));
|
||||
|
||||
//Initial empty hypothesis
|
||||
Bitmaps bitmaps(m_sentence.get()->GetSize(), m_sentence.get()->m_sourceCompleted);
|
||||
Bitmaps bitmaps(m_sentence.get()->GetSize(),
|
||||
m_sentence.get()->m_sourceCompleted);
|
||||
m_manager->ResetSentenceStats(*m_sentence);
|
||||
|
||||
const Bitmap &initBitmap = bitmaps.GetInitialBitmap();
|
||||
m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt, initBitmap);
|
||||
m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt,
|
||||
initBitmap);
|
||||
|
||||
//create the chain
|
||||
vector<Alignment>::const_iterator ai = alignments.begin();
|
||||
@ -56,7 +59,8 @@ MockHypothesisGuard
|
||||
for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) {
|
||||
Hypothesis* prevHypo = m_hypothesis;
|
||||
Range range(ai->first,ai->second);
|
||||
const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), range);
|
||||
const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(),
|
||||
range);
|
||||
|
||||
m_targetPhrases.push_back(TargetPhrase(NULL));
|
||||
// m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL);
|
||||
|
@ -1620,6 +1620,13 @@ SetParameter<bool>(bool ¶meter, std::string const& parameterName,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Parameter::
|
||||
SetParameter(bool& var, std::string const& name)
|
||||
{
|
||||
SetParameter(var,name,false);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
@ -149,6 +149,20 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void SetParameter(bool& var, std::string const& name);
|
||||
|
||||
bool SetBooleanSwitch(bool& val, std::string const name) {
|
||||
// issues a warning if format is wrong
|
||||
const PARAM_VEC *params = GetParam(name);
|
||||
val = (params && params->size());
|
||||
if (val && params->size() != 1)
|
||||
{
|
||||
TRACE_ERR("ERROR: wrong format for switch -" << name);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<>
|
||||
|
@ -305,35 +305,38 @@ void ScoreComponentCollection::PlusEquals(const FeatureFunction* sp, const Score
|
||||
}
|
||||
}
|
||||
|
||||
void ScoreComponentCollection::OutputAllFeatureScores(std::ostream &out) const
|
||||
void
|
||||
ScoreComponentCollection::
|
||||
OutputAllFeatureScores(std::ostream &out, bool with_labels) const
|
||||
{
|
||||
std::string lastName = "";
|
||||
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
||||
for( size_t i=0; i<sff.size(); i++ ) {
|
||||
const StatefulFeatureFunction *ff = sff[i];
|
||||
if (ff->IsTuneable()) {
|
||||
OutputFeatureScores( out, ff, lastName );
|
||||
OutputFeatureScores(out, ff, lastName, with_labels);
|
||||
}
|
||||
}
|
||||
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
||||
for( size_t i=0; i<slf.size(); i++ ) {
|
||||
const StatelessFeatureFunction *ff = slf[i];
|
||||
if (ff->IsTuneable()) {
|
||||
OutputFeatureScores( out, ff, lastName );
|
||||
OutputFeatureScores(out, ff, lastName, with_labels);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
|
||||
, const FeatureFunction *ff
|
||||
, std::string &lastName ) const
|
||||
void
|
||||
ScoreComponentCollection::
|
||||
OutputFeatureScores(std::ostream& out, FeatureFunction const* ff,
|
||||
std::string &lastName, bool with_labels) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
bool labeledOutput = staticData.options().nbest.include_feature_labels;
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
// bool labeledOutput = staticData.options().nbest.include_feature_labels;
|
||||
|
||||
// regular features (not sparse)
|
||||
if (ff->HasTuneableComponents()) {
|
||||
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
|
||||
if( with_labels && lastName != ff->GetScoreProducerDescription() ) {
|
||||
lastName = ff->GetScoreProducerDescription();
|
||||
out << " " << lastName << "=";
|
||||
}
|
||||
|
@ -433,10 +433,9 @@ public:
|
||||
m_scores.merge(other.m_scores);
|
||||
}
|
||||
|
||||
void OutputAllFeatureScores(std::ostream &out) const;
|
||||
void OutputFeatureScores( std::ostream& out
|
||||
, const Moses::FeatureFunction *ff
|
||||
, std::string &lastName ) const;
|
||||
void OutputAllFeatureScores(std::ostream &out, bool with_labels) const;
|
||||
void OutputFeatureScores(std::ostream& out, Moses::FeatureFunction const* ff,
|
||||
std::string &lastName, bool with_labels) const;
|
||||
|
||||
#ifdef MPI_ENABLE
|
||||
public:
|
||||
|
@ -166,7 +166,8 @@ aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
|
||||
|
||||
void
|
||||
Sentence::
|
||||
init(string line, std::vector<FactorType> const& factorOrder)
|
||||
init(string line, std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts)
|
||||
{
|
||||
using namespace std;
|
||||
const StaticData &SD = StaticData::Instance();
|
||||
@ -182,7 +183,8 @@ init(string line, std::vector<FactorType> const& factorOrder)
|
||||
aux_interpret_dlt(line); // some poorly documented cache-based stuff
|
||||
|
||||
// if sentences is specified as "<passthrough tag1=""/>"
|
||||
if (SD.IsPassthroughEnabled() || SD.options().nbest.include_passthrough) {
|
||||
if (SD.options().output.PrintPassThrough ||
|
||||
SD.options().nbest.include_passthrough) {
|
||||
string pthru = PassthroughSGML(line,"passthrough");
|
||||
this->SetPassthroughInformation(pthru);
|
||||
}
|
||||
@ -230,12 +232,14 @@ init(string line, std::vector<FactorType> const& factorOrder)
|
||||
|
||||
int
|
||||
Sentence::
|
||||
Read(std::istream& in,const std::vector<FactorType>& factorOrder)
|
||||
Read(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts)
|
||||
{
|
||||
std::string line;
|
||||
if (getline(in, line, '\n').eof())
|
||||
return 0;
|
||||
init(line, factorOrder);
|
||||
init(line, factorOrder, opts);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -366,12 +370,14 @@ CreateFromString(vector<FactorType> const& FOrder, string const& phraseString)
|
||||
}
|
||||
|
||||
Sentence::
|
||||
Sentence(size_t const transId, string const& stext,
|
||||
Sentence(size_t const transId,
|
||||
string const& stext,
|
||||
AllOptions const& opts,
|
||||
vector<FactorType> const* IFO)
|
||||
: InputType(transId)
|
||||
{
|
||||
if (IFO) init(stext, *IFO);
|
||||
else init(stext, StaticData::Instance().GetInputFactorOrder());
|
||||
if (IFO) init(stext, *IFO, opts);
|
||||
else init(stext, StaticData::Instance().GetInputFactorOrder(), opts);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,4 @@
|
||||
// -*- c++ -*-
|
||||
// $Id$
|
||||
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
@ -28,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "Word.h"
|
||||
#include "Phrase.h"
|
||||
#include "InputType.h"
|
||||
#include "parameters/AllOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -66,7 +65,8 @@ protected:
|
||||
public:
|
||||
Sentence();
|
||||
Sentence(size_t const transId, std::string const& stext,
|
||||
std::vector<FactorType> const* IFO = NULL);
|
||||
AllOptions const& opts,
|
||||
std::vector<FactorType> const* IFO = NULL);
|
||||
// Sentence(size_t const transId, std::string const& stext);
|
||||
~Sentence();
|
||||
|
||||
@ -97,7 +97,10 @@ public:
|
||||
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
|
||||
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
|
||||
|
||||
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
|
||||
virtual int
|
||||
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts);
|
||||
|
||||
void Print(std::ostream& out) const;
|
||||
|
||||
TranslationOptionCollection*
|
||||
@ -114,7 +117,8 @@ public:
|
||||
|
||||
|
||||
void
|
||||
init(std::string line, std::vector<FactorType> const& factorOrder);
|
||||
init(std::string line, std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts);
|
||||
|
||||
std::vector<std::map<std::string,std::string> > const&
|
||||
GetDltMeta() const {
|
||||
|
@ -63,7 +63,7 @@ StaticData StaticData::s_instance;
|
||||
StaticData::StaticData()
|
||||
: m_sourceStartPosMattersForRecombination(false)
|
||||
, m_requireSortingAfterSourceContext(false)
|
||||
, m_inputType(SentenceInput)
|
||||
// , m_inputType(SentenceInput)
|
||||
, m_lmEnableOOVFeature(false)
|
||||
, m_isAlwaysCreateDirectTranslationOption(false)
|
||||
, m_currentWeightSetting("default")
|
||||
@ -132,23 +132,11 @@ StaticData
|
||||
const PARAM_VEC *params;
|
||||
|
||||
// input type has to be specified BEFORE loading the phrase tables!
|
||||
m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
|
||||
// m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
|
||||
|
||||
m_parameter->SetParameter(m_continuePartialTranslation,
|
||||
"continue-partial-translation", false );
|
||||
|
||||
std::string s_it = "text input";
|
||||
if (m_inputType == 1) {
|
||||
s_it = "confusion net";
|
||||
}
|
||||
if (m_inputType == 2) {
|
||||
s_it = "word lattice";
|
||||
}
|
||||
if (m_inputType == 3) {
|
||||
s_it = "tree";
|
||||
}
|
||||
VERBOSE(2,"input type is: "<<s_it<<"\n");
|
||||
|
||||
// use of xml in input
|
||||
m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
|
||||
|
||||
@ -181,119 +169,30 @@ StaticData
|
||||
m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
|
||||
|
||||
|
||||
m_parameter->SetParameter(m_recoverPath, "recover-input-path", false);
|
||||
if (m_recoverPath && m_inputType == SentenceInput) {
|
||||
TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
|
||||
m_recoverPath = false;
|
||||
}
|
||||
m_parameter->SetParameter(m_includeLHSInSearchGraph,
|
||||
"include-lhs-in-search-graph", false );
|
||||
|
||||
m_parameter->SetParameter(m_outputHypoScore, "output-hypo-score", false );
|
||||
m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false );
|
||||
m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort);
|
||||
params = m_parameter->GetParam("alignment-output-file");
|
||||
if (params && params->size()) {
|
||||
m_alignmentOutputFile = Scan<std::string>(params->at(0));
|
||||
}
|
||||
|
||||
m_parameter->SetParameter( m_PrintID, "print-id", false );
|
||||
m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false );
|
||||
|
||||
params = m_parameter->GetParam("output-word-graph");
|
||||
m_outputWordGraph = (params && params->size() == 2);
|
||||
|
||||
params = m_parameter->GetParam("output-search-graph");
|
||||
if (params && params->size()) {
|
||||
if (params->size() != 1) {
|
||||
std::cerr << "ERROR: wrong format for switch -output-search-graph file";
|
||||
return false;
|
||||
}
|
||||
m_outputSearchGraph = true;
|
||||
}
|
||||
// ... in extended format
|
||||
else if (m_parameter->GetParam("output-search-graph-extended") &&
|
||||
m_parameter->GetParam("output-search-graph-extended")->size()) {
|
||||
if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) {
|
||||
std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file";
|
||||
return false;
|
||||
}
|
||||
m_outputSearchGraph = true;
|
||||
m_outputSearchGraphExtended = true;
|
||||
} else {
|
||||
m_outputSearchGraph = false;
|
||||
}
|
||||
|
||||
params = m_parameter->GetParam("output-search-graph-slf");
|
||||
if (params && params->size()) {
|
||||
m_outputSearchGraphSLF = true;
|
||||
} else {
|
||||
m_outputSearchGraphSLF = false;
|
||||
}
|
||||
|
||||
params = m_parameter->GetParam("output-search-graph-hypergraph");
|
||||
if (params && params->size()) {
|
||||
m_outputSearchGraphHypergraph = true;
|
||||
} else {
|
||||
m_outputSearchGraphHypergraph = false;
|
||||
}
|
||||
|
||||
#ifdef HAVE_PROTOBUF
|
||||
params = m_parameter->GetParam("output-search-graph-pb");
|
||||
if (params && params->size()) {
|
||||
if (params->size() != 1) {
|
||||
cerr << "ERROR: wrong format for switch -output-search-graph-pb path";
|
||||
return false;
|
||||
}
|
||||
m_outputSearchGraphPB = true;
|
||||
} else
|
||||
m_outputSearchGraphPB = false;
|
||||
#endif
|
||||
|
||||
m_parameter->SetParameter( m_unprunedSearchGraph, "unpruned-search-graph", false );
|
||||
m_parameter->SetParameter( m_includeLHSInSearchGraph, "include-lhs-in-search-graph", false );
|
||||
|
||||
m_parameter->SetParameter<string>(m_outputUnknownsFile, "output-unknowns", "");
|
||||
|
||||
// printing source phrase spans
|
||||
m_parameter->SetParameter( m_reportSegmentation, "report-segmentation", false );
|
||||
m_parameter->SetParameter( m_reportSegmentationEnriched, "report-segmentation-enriched", false );
|
||||
|
||||
// print all factors of output translations
|
||||
m_parameter->SetParameter( m_reportAllFactors, "report-all-factors", false );
|
||||
m_parameter->SetParameter<string>(m_outputUnknownsFile,
|
||||
"output-unknowns", "");
|
||||
|
||||
//Print Translation Options
|
||||
m_parameter->SetParameter(m_printTranslationOptions, "print-translation-option", false );
|
||||
m_parameter->SetParameter(m_printTranslationOptions,
|
||||
"print-translation-option", false );
|
||||
|
||||
//Print All Derivations
|
||||
m_parameter->SetParameter(m_printAllDerivations , "print-all-derivations", false );
|
||||
m_parameter->SetParameter(m_printAllDerivations ,
|
||||
"print-all-derivations", false );
|
||||
|
||||
// additional output
|
||||
m_parameter->SetParameter<string>(m_detailedTranslationReportingFilePath,
|
||||
"translation-details", "");
|
||||
m_parameter->SetParameter<string>(m_detailedTreeFragmentsTranslationReportingFilePath,
|
||||
"tree-translation-details", "");
|
||||
m_parameter->SetParameter<string>(m_detailedAllTranslationReportingFilePath,
|
||||
"translation-all-details", "");
|
||||
m_parameter->SetParameter<long>(m_startTranslationId, "start-translation-id", 0);
|
||||
m_parameter->SetParameter<long>(m_startTranslationId,
|
||||
"start-translation-id", 0);
|
||||
|
||||
//lattice samples
|
||||
params = m_parameter->GetParam("lattice-samples");
|
||||
if (params) {
|
||||
if (params->size() ==2 ) {
|
||||
m_latticeSamplesFilePath = params->at(0);
|
||||
m_latticeSamplesSize = Scan<size_t>(params->at(1));
|
||||
} else {
|
||||
std::cerr <<"wrong format for switch -lattice-samples file size";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
m_latticeSamplesSize = 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_compact_table_options()
|
||||
StaticData::
|
||||
ini_compact_table_options()
|
||||
{
|
||||
// Compact phrase table and reordering model
|
||||
m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false );
|
||||
@ -301,8 +200,8 @@ StaticData
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_lm_options()
|
||||
StaticData::
|
||||
ini_lm_options()
|
||||
{
|
||||
m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1);
|
||||
}
|
||||
@ -349,8 +248,8 @@ StaticData
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_factor_maps()
|
||||
StaticData::
|
||||
ini_factor_maps()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
// factor delimiter
|
||||
@ -380,8 +279,8 @@ StaticData
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_oov_options()
|
||||
StaticData::
|
||||
ini_oov_options()
|
||||
{
|
||||
// unknown word processing
|
||||
m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false );
|
||||
@ -398,8 +297,8 @@ StaticData
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_zombie_options()
|
||||
StaticData::
|
||||
ini_zombie_options()
|
||||
{
|
||||
//Disable discarding
|
||||
m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false);
|
||||
@ -434,20 +333,6 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
// search
|
||||
ini_oov_options();
|
||||
|
||||
// set m_nbest_options.enabled = true if necessary:
|
||||
if (m_options.mbr.enabled
|
||||
|| m_options.mira
|
||||
|| m_options.search.consensus
|
||||
|| m_outputSearchGraph
|
||||
|| m_outputSearchGraphSLF
|
||||
|| m_outputSearchGraphHypergraph
|
||||
#ifdef HAVE_PROTOBUF
|
||||
|| m_outputSearchGraphPB
|
||||
#endif
|
||||
|| m_latticeSamplesFilePath.size()) {
|
||||
m_options.nbest.enabled = true;
|
||||
}
|
||||
|
||||
// S2T decoder
|
||||
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
|
||||
RecursiveCYKPlus);
|
||||
@ -455,7 +340,8 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|
||||
ini_zombie_options(); // probably dead, or maybe not
|
||||
|
||||
m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", NOT_FOUND);
|
||||
m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor",
|
||||
NOT_FOUND);
|
||||
|
||||
// FEATURE FUNCTION INITIALIZATION HAPPENS HERE ===============================
|
||||
initialize_features();
|
||||
@ -507,7 +393,8 @@ void StaticData::SetWeight(const FeatureFunction* sp, float weight)
|
||||
m_allWeights.Assign(sp,weight);
|
||||
}
|
||||
|
||||
void StaticData::SetWeights(const FeatureFunction* sp, const std::vector<float>& weights)
|
||||
void StaticData::SetWeights(const FeatureFunction* sp,
|
||||
const std::vector<float>& weights)
|
||||
{
|
||||
m_allWeights.Resize();
|
||||
m_allWeights.Assign(sp,weights);
|
||||
@ -557,8 +444,10 @@ void StaticData::LoadChartDecodingParameters()
|
||||
LoadNonTerminals();
|
||||
|
||||
// source label overlap
|
||||
m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap", SourceLabelOverlapAdd);
|
||||
m_parameter->SetParameter(m_ruleLimit, "rule-limit", DEFAULT_MAX_TRANS_OPT_SIZE);
|
||||
m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap",
|
||||
SourceLabelOverlapAdd);
|
||||
m_parameter->SetParameter(m_ruleLimit, "rule-limit",
|
||||
DEFAULT_MAX_TRANS_OPT_SIZE);
|
||||
|
||||
}
|
||||
|
||||
@ -596,12 +485,16 @@ void StaticData::LoadDecodeGraphs()
|
||||
}
|
||||
}
|
||||
|
||||
void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const vector<size_t> &maxChartSpans)
|
||||
void
|
||||
StaticData::
|
||||
LoadDecodeGraphsOld(const vector<string> &mappingVector,
|
||||
const vector<size_t> &maxChartSpans)
|
||||
{
|
||||
const vector<PhraseDictionary*>& pts = PhraseDictionary::GetColl();
|
||||
const vector<GenerationDictionary*>& gens = GenerationDictionary::GetColl();
|
||||
|
||||
const std::vector<FeatureFunction*> *featuresRemaining = &FeatureFunction::GetFeatureFunctions();
|
||||
const std::vector<FeatureFunction*> *featuresRemaining
|
||||
= &FeatureFunction::GetFeatureFunctions();
|
||||
DecodeStep *prev = 0;
|
||||
size_t prevDecodeGraphInd = 0;
|
||||
|
||||
@ -620,7 +513,8 @@ void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const
|
||||
// For specifying multiple translation model
|
||||
decodeGraphInd = Scan<size_t>(token[0]);
|
||||
//the vectorList index can only increment by one
|
||||
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1,
|
||||
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd
|
||||
&& decodeGraphInd != prevDecodeGraphInd + 1,
|
||||
"Malformed mapping");
|
||||
if (decodeGraphInd > prevDecodeGraphInd) {
|
||||
prev = NULL;
|
||||
@ -707,7 +601,8 @@ void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVect
|
||||
|
||||
decodeGraphInd = Scan<size_t>(token[0]);
|
||||
//the vectorList index can only increment by one
|
||||
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1,
|
||||
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd
|
||||
&& decodeGraphInd != prevDecodeGraphInd + 1,
|
||||
"Malformed mapping");
|
||||
if (decodeGraphInd > prevDecodeGraphInd) {
|
||||
prev = NULL;
|
||||
@ -783,17 +678,6 @@ void StaticData::ReLoadBleuScoreFeatureParameter(float weight)
|
||||
|
||||
void StaticData::SetExecPath(const std::string &path)
|
||||
{
|
||||
/*
|
||||
namespace fs = boost::filesystem;
|
||||
|
||||
fs::path full_path( fs::initial_path<fs::path>() );
|
||||
|
||||
full_path = fs::system_complete( fs::path( path ) );
|
||||
|
||||
//Without file name
|
||||
m_binPath = full_path.parent_path().string();
|
||||
*/
|
||||
|
||||
// NOT TESTED
|
||||
size_t pos = path.rfind("/");
|
||||
if (pos != string::npos) {
|
||||
@ -810,34 +694,33 @@ const string &StaticData::GetBinDirectory() const
|
||||
float StaticData::GetWeightWordPenalty() const
|
||||
{
|
||||
float weightWP = GetWeight(&WordPenaltyProducer::Instance());
|
||||
//VERBOSE(1, "Read weightWP from translation sytem: " << weightWP << std::endl);
|
||||
return weightWP;
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::InitializeForInput(ttasksptr const& ttask) const
|
||||
StaticData::
|
||||
InitializeForInput(ttasksptr const& ttask) const
|
||||
{
|
||||
const std::vector<FeatureFunction*> &producers
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
for(size_t i=0; i<producers.size(); ++i) {
|
||||
FeatureFunction &ff = *producers[i];
|
||||
if (! IsFeatureFunctionIgnored(ff)) {
|
||||
Timer iTime;
|
||||
iTime.start();
|
||||
ff.InitializeForInput(ttask);
|
||||
VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription() << " )"
|
||||
<< "= " << iTime << endl);
|
||||
VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription()
|
||||
<< " )" << "= " << iTime << endl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
|
||||
StaticData::
|
||||
CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
|
||||
{
|
||||
const std::vector<FeatureFunction*> &producers
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
for(size_t i=0; i<producers.size(); ++i) {
|
||||
FeatureFunction &ff = *producers[i];
|
||||
if (! IsFeatureFunctionIgnored(ff)) {
|
||||
|
@ -82,9 +82,6 @@ protected:
|
||||
// Initial = 0 = can be used when creating poss trans
|
||||
// Other = 1 = used to calculate LM score once all steps have been processed
|
||||
float
|
||||
// m_beamWidth,
|
||||
// m_earlyDiscardingThreshold,
|
||||
// m_translationOptionThreshold,
|
||||
m_wordDeletionWeight;
|
||||
|
||||
|
||||
@ -94,15 +91,9 @@ protected:
|
||||
// -ve = no limit on distortion
|
||||
// 0 = no disortion (monotone in old pharaoh)
|
||||
bool m_reorderingConstraint; //! use additional reordering constraints
|
||||
// bool m_useEarlyDistortionCost;
|
||||
// size_t m_maxHypoStackSize; //! hypothesis-stack size that triggers pruning
|
||||
// size_t m_minHypoStackDiversity; //! minimum number of hypothesis in stack for each source word coverage;
|
||||
BookkeepingOptions m_bookkeeping_options;
|
||||
|
||||
size_t m_latticeSamplesSize;
|
||||
// size_t m_maxNoTransOptPerCoverage;
|
||||
// size_t m_maxNoPartTransOpt;
|
||||
// size_t m_maxPhraseLength;
|
||||
|
||||
std::string m_latticeSamplesFilePath;
|
||||
bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them
|
||||
@ -116,48 +107,31 @@ protected:
|
||||
bool m_printTranslationOptions;
|
||||
|
||||
bool m_sourceStartPosMattersForRecombination;
|
||||
bool m_recoverPath;
|
||||
bool m_outputHypoScore;
|
||||
// bool m_recoverPath;
|
||||
// bool m_outputHypoScore;
|
||||
bool m_requireSortingAfterSourceContext;
|
||||
|
||||
// SearchAlgorithm m_searchAlgorithm;
|
||||
InputTypeEnum m_inputType;
|
||||
// InputTypeEnum m_inputType;
|
||||
|
||||
mutable size_t m_verboseLevel;
|
||||
|
||||
bool m_reportSegmentation;
|
||||
bool m_reportSegmentationEnriched;
|
||||
bool m_reportAllFactors;
|
||||
std::string m_detailedTranslationReportingFilePath;
|
||||
std::string m_detailedTreeFragmentsTranslationReportingFilePath;
|
||||
|
||||
std::string m_detailedAllTranslationReportingFilePath;
|
||||
|
||||
bool m_PrintAlignmentInfo;
|
||||
|
||||
bool m_PrintID;
|
||||
bool m_PrintPassthroughInformation;
|
||||
|
||||
std::string m_alignmentOutputFile;
|
||||
// bool m_reportSegmentation;
|
||||
// bool m_reportSegmentationEnriched;
|
||||
// bool m_reportAllFactors;
|
||||
// std::string m_detailedTranslationReportingFilePath;
|
||||
// std::string m_detailedTreeFragmentsTranslationReportingFilePath;
|
||||
// std::string m_detailedAllTranslationReportingFilePath;
|
||||
// bool m_PrintAlignmentInfo;
|
||||
// bool m_PrintID;
|
||||
// bool m_PrintPassthroughInformation;
|
||||
// std::string m_alignmentOutputFile;
|
||||
|
||||
std::string m_factorDelimiter; //! by default, |, but it can be changed
|
||||
|
||||
XmlInputType m_xmlInputType; //! method for handling sentence XML input
|
||||
std::pair<std::string,std::string> m_xmlBrackets; //! strings to use as XML tags' opening and closing brackets. Default are "<" and ">"
|
||||
|
||||
// bool m_mbr; //! use MBR decoder
|
||||
// bool m_useLatticeMBR; //! use MBR decoder
|
||||
// bool m_mira; // do mira training
|
||||
// bool m_useConsensusDecoding; //! Use Consensus decoding (DeNero et al 2009)
|
||||
// size_t m_mbrSize; //! number of translation candidates considered
|
||||
// float m_mbrScale; //! scaling factor for computing marginal probability of candidate translation
|
||||
// size_t m_lmbrPruning; //! average number of nodes per word wanted in pruned lattice
|
||||
// std::vector<float> m_lmbrThetas; //! theta(s) for lattice mbr calculation
|
||||
// bool m_useLatticeHypSetForLatticeMBR; //! to use nbest as hypothesis set during lattice MBR
|
||||
// float m_lmbrPrecision; //! unigram precision theta - see Tromble et al 08 for more details
|
||||
// float m_lmbrPRatio; //! decaying factor for ngram thetas - see Tromble et al 08 for more details
|
||||
// float m_lmbrMapWeight; //! Weight given to the map solution. See Kumar et al 09 for details
|
||||
|
||||
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
|
||||
bool m_lmEnableOOVFeature;
|
||||
|
||||
@ -167,15 +141,15 @@ protected:
|
||||
bool m_isAlwaysCreateDirectTranslationOption;
|
||||
//! constructor. only the 1 static variable can be created
|
||||
|
||||
bool m_outputWordGraph; //! whether to output word graph
|
||||
bool m_outputSearchGraph; //! whether to output search graph
|
||||
bool m_outputSearchGraphExtended; //! ... in extended format
|
||||
bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF)
|
||||
bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph
|
||||
// bool m_outputWordGraph; //! whether to output word graph
|
||||
// bool m_outputSearchGraph; //! whether to output search graph
|
||||
// bool m_outputSearchGraphExtended; //! ... in extended format
|
||||
// bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF)
|
||||
// bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph
|
||||
#ifdef HAVE_PROTOBUF
|
||||
bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf
|
||||
// bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf
|
||||
#endif
|
||||
bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only)
|
||||
// bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only)
|
||||
bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph
|
||||
std::string m_outputUnknownsFile; //! output unknowns in this file
|
||||
|
||||
@ -190,7 +164,7 @@ protected:
|
||||
Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
|
||||
SourceLabelOverlap m_sourceLabelOverlap;
|
||||
UnknownLHSList m_unknownLHS;
|
||||
WordAlignmentSort m_wordAlignmentSort;
|
||||
// WordAlignmentSort m_wordAlignmentSort;
|
||||
|
||||
int m_threadCount;
|
||||
long m_startTranslationId;
|
||||
@ -229,10 +203,6 @@ protected:
|
||||
|
||||
const StatefulFeatureFunction* m_treeStructure;
|
||||
|
||||
// number of nonterminal labels
|
||||
// size_t m_nonTerminalSize;
|
||||
|
||||
|
||||
void ini_compact_table_options();
|
||||
void ini_consensus_decoding_options();
|
||||
void ini_cube_pruning_options();
|
||||
@ -278,7 +248,8 @@ public:
|
||||
}
|
||||
#endif
|
||||
|
||||
//! Load data into static instance. This function is required as LoadData() is not const
|
||||
//! Load data into static instance. This function is required as
|
||||
// LoadData() is not const
|
||||
static bool LoadDataStatic(Parameter *parameter, const std::string &execPath);
|
||||
|
||||
//! Main function to load everything. Also initialize the Parameter object
|
||||
@ -336,22 +307,6 @@ public:
|
||||
bool IsWordDeletionEnabled() const {
|
||||
return m_wordDeletionEnabled;
|
||||
}
|
||||
// size_t GetMaxHypoStackSize() const {
|
||||
// return m_options.search.stack_size;
|
||||
// }
|
||||
// size_t GetMinHypoStackDiversity() const {
|
||||
// return m_options.search.stack_diversity;
|
||||
// }
|
||||
|
||||
size_t IsPathRecoveryEnabled() const {
|
||||
return m_recoverPath;
|
||||
}
|
||||
bool IsIDEnabled() const {
|
||||
return m_PrintID;
|
||||
}
|
||||
bool IsPassthroughEnabled() const {
|
||||
return m_PrintPassthroughInformation;
|
||||
}
|
||||
|
||||
int GetMaxDistortion() const {
|
||||
return m_options.reordering.max_distortion;
|
||||
@ -384,47 +339,6 @@ public:
|
||||
void SetVerboseLevel(int x) const {
|
||||
m_verboseLevel = x;
|
||||
}
|
||||
char GetReportSegmentation() const {
|
||||
if (m_reportSegmentation) return 1;
|
||||
if (m_reportSegmentationEnriched) return 2;
|
||||
return 0;
|
||||
}
|
||||
void SetReportSegmentation(const int &val) {
|
||||
if (val == 0)
|
||||
m_reportSegmentation = m_reportSegmentationEnriched = false;
|
||||
else if (val == 1)
|
||||
m_reportSegmentation = true;
|
||||
else if (val == 2)
|
||||
m_reportSegmentationEnriched = true;
|
||||
else
|
||||
std::cerr << "Warning: Invalid value for reportSegmentation (0 - 2)! Ignoring";
|
||||
}
|
||||
|
||||
bool GetReportAllFactors() const {
|
||||
return m_reportAllFactors;
|
||||
}
|
||||
|
||||
bool IsDetailedTranslationReportingEnabled() const {
|
||||
return !m_detailedTranslationReportingFilePath.empty();
|
||||
}
|
||||
|
||||
bool IsDetailedAllTranslationReportingEnabled() const {
|
||||
return !m_detailedAllTranslationReportingFilePath.empty();
|
||||
}
|
||||
|
||||
const std::string &GetDetailedTranslationReportingFilePath() const {
|
||||
return m_detailedTranslationReportingFilePath;
|
||||
}
|
||||
bool IsDetailedTreeFragmentsTranslationReportingEnabled() const {
|
||||
return !m_detailedTreeFragmentsTranslationReportingFilePath.empty();
|
||||
}
|
||||
const std::string &GetDetailedTreeFragmentsTranslationReportingFilePath() const {
|
||||
return m_detailedTreeFragmentsTranslationReportingFilePath;
|
||||
}
|
||||
|
||||
// bool IsLabeledNBestList() const {
|
||||
// return m_options.nbest.include_feature_labels;
|
||||
// }
|
||||
|
||||
bool UseMinphrInMemory() const {
|
||||
return m_minphrMemory;
|
||||
@ -434,19 +348,6 @@ public:
|
||||
return m_minlexrMemory;
|
||||
}
|
||||
|
||||
// for mert
|
||||
// size_t GetNBestSize() const {
|
||||
// return m_options.nbest.nbest_size;
|
||||
// }
|
||||
|
||||
// const std::string &GetNBestFilePath() const {
|
||||
// return m_options.nbest.output_file_path;
|
||||
// }
|
||||
|
||||
// bool IsNBestEnabled() const {
|
||||
// return m_options.nbest.enabled;
|
||||
// }
|
||||
|
||||
size_t GetLatticeSamplesSize() const {
|
||||
return m_latticeSamplesSize;
|
||||
}
|
||||
@ -455,22 +356,6 @@ public:
|
||||
return m_latticeSamplesFilePath;
|
||||
}
|
||||
|
||||
// size_t GetNBestFactor() const {
|
||||
// return m_options.nbest.factor;
|
||||
// }
|
||||
bool GetOutputWordGraph() const {
|
||||
return m_outputWordGraph;
|
||||
}
|
||||
|
||||
//! Sets the global score vector weights for a given FeatureFunction.
|
||||
InputTypeEnum GetInputType() const {
|
||||
return m_inputType;
|
||||
}
|
||||
|
||||
// SearchAlgorithm GetSearchAlgorithm() const {
|
||||
// return m_searchAlgorithm;
|
||||
// }
|
||||
|
||||
bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const {
|
||||
if (algo == DefaultSearchAlgorithm)
|
||||
algo = m_options.search.algo;
|
||||
@ -577,33 +462,36 @@ public:
|
||||
return m_lmEnableOOVFeature;
|
||||
}
|
||||
|
||||
bool GetOutputSearchGraph() const {
|
||||
return m_outputSearchGraph;
|
||||
}
|
||||
void SetOutputSearchGraph(bool outputSearchGraph) {
|
||||
m_outputSearchGraph = outputSearchGraph;
|
||||
}
|
||||
bool GetOutputSearchGraphExtended() const {
|
||||
return m_outputSearchGraphExtended;
|
||||
}
|
||||
bool GetOutputSearchGraphSLF() const {
|
||||
return m_outputSearchGraphSLF;
|
||||
}
|
||||
bool GetOutputSearchGraphHypergraph() const {
|
||||
return m_outputSearchGraphHypergraph;
|
||||
}
|
||||
#ifdef HAVE_PROTOBUF
|
||||
bool GetOutputSearchGraphPB() const {
|
||||
return m_outputSearchGraphPB;
|
||||
}
|
||||
#endif
|
||||
// bool GetOutputSearchGraph() const {
|
||||
// return m_outputSearchGraph;
|
||||
// }
|
||||
|
||||
// void SetOutputSearchGraph(bool outputSearchGraph) {
|
||||
// m_outputSearchGraph = outputSearchGraph;
|
||||
// }
|
||||
|
||||
// bool GetOutputSearchGraphExtended() const {
|
||||
// return m_outputSearchGraphExtended;
|
||||
// }
|
||||
// GetOutputSearchGraphSLF() const {
|
||||
// return m_outputSearchGraphSLF;
|
||||
// }
|
||||
// bool GetOutputSearchGraphHypergraph() const {
|
||||
// return m_outputSearchGraphHypergraph;
|
||||
// }
|
||||
|
||||
// #ifdef HAVE_PROTOBUF
|
||||
// bool GetOutputSearchGraphPB() const {
|
||||
// return m_outputSearchGraphPB;
|
||||
// }
|
||||
// #endif
|
||||
const std::string& GetOutputUnknownsFile() const {
|
||||
return m_outputUnknownsFile;
|
||||
}
|
||||
|
||||
bool GetUnprunedSearchGraph() const {
|
||||
return m_unprunedSearchGraph;
|
||||
}
|
||||
// bool GetUnprunedSearchGraph() const {
|
||||
// return m_unprunedSearchGraph;
|
||||
// }
|
||||
|
||||
bool GetIncludeLHSInSearchGraph() const {
|
||||
return m_includeLHSInSearchGraph;
|
||||
@ -640,9 +528,9 @@ public:
|
||||
return m_sourceLabelOverlap;
|
||||
}
|
||||
|
||||
bool GetOutputHypoScore() const {
|
||||
return m_outputHypoScore;
|
||||
}
|
||||
// bool GetOutputHypoScore() const {
|
||||
// return m_outputHypoScore;
|
||||
// }
|
||||
size_t GetRuleLimit() const {
|
||||
return m_ruleLimit;
|
||||
}
|
||||
@ -675,16 +563,16 @@ public:
|
||||
return m_bookkeeping_options.need_alignment_info;
|
||||
// return m_needAlignmentInfo;
|
||||
}
|
||||
const std::string &GetAlignmentOutputFile() const {
|
||||
return m_alignmentOutputFile;
|
||||
}
|
||||
bool PrintAlignmentInfo() const {
|
||||
return m_PrintAlignmentInfo;
|
||||
}
|
||||
// const std::string &GetAlignmentOutputFile() const {
|
||||
// return m_alignmentOutputFile;
|
||||
// }
|
||||
// bool PrintAlignmentInfo() const {
|
||||
// return m_PrintAlignmentInfo;
|
||||
// }
|
||||
|
||||
WordAlignmentSort GetWordAlignmentSort() const {
|
||||
return m_wordAlignmentSort;
|
||||
}
|
||||
// WordAlignmentSort GetWordAlignmentSort() const {
|
||||
// return m_wordAlignmentSort;
|
||||
// }
|
||||
|
||||
bool GetHasAlternateWeightSettings() const {
|
||||
return m_weightSetting.size() > 0;
|
||||
|
@ -26,12 +26,12 @@ void Manager::OutputBest(OutputCollector *collector) const
|
||||
const SHyperedge *best = GetBestSHyperedge();
|
||||
if (best == NULL) {
|
||||
VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
|
||||
if (StaticData::Instance().GetOutputHypoScore()) {
|
||||
if (options().output.ReportHypoScore) {
|
||||
out << "0 ";
|
||||
}
|
||||
out << '\n';
|
||||
} else {
|
||||
if (StaticData::Instance().GetOutputHypoScore()) {
|
||||
if (options().output.ReportHypoScore) {
|
||||
out << best->label.score << " ";
|
||||
}
|
||||
Phrase yield = GetOneBestTargetYield(*best);
|
||||
@ -49,12 +49,10 @@ void Manager::OutputBest(OutputCollector *collector) const
|
||||
void Manager::OutputNBest(OutputCollector *collector) const
|
||||
{
|
||||
if (collector) {
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
long translationId = m_source.GetTranslationId();
|
||||
|
||||
KBestExtractor::KBestVec nBestList;
|
||||
ExtractKBest(staticData.options().nbest.nbest_size, nBestList,
|
||||
staticData.options().nbest.only_distinct);
|
||||
ExtractKBest(options().nbest.nbest_size, nBestList,
|
||||
options().nbest.only_distinct);
|
||||
OutputNBestList(collector, nBestList, translationId);
|
||||
}
|
||||
}
|
||||
@ -111,7 +109,8 @@ void Manager::OutputNBestList(OutputCollector *collector,
|
||||
out << translationId << " ||| ";
|
||||
OutputSurface(out, outputPhrase, outputFactorOrder, false);
|
||||
out << " ||| ";
|
||||
derivation.scoreBreakdown.OutputAllFeatureScores(out);
|
||||
bool with_labels = options().nbest.include_feature_labels;
|
||||
derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels);
|
||||
out << " ||| " << derivation.score;
|
||||
|
||||
// optionally, print word alignments
|
||||
|
@ -66,7 +66,7 @@ template<typename RuleTrie>
|
||||
TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase(
|
||||
const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const StaticData &SD = StaticData::Instance();
|
||||
|
||||
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
|
||||
UnknownWordPenaltyProducer::Instance();
|
||||
@ -82,8 +82,8 @@ TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase(
|
||||
targetPhrase->EvaluateInIsolation(srcPhrase);
|
||||
targetPhrase->SetTargetLHS(&targetLhs);
|
||||
targetPhrase->SetAlignmentInfo("0-0");
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() ||
|
||||
staticData.GetTreeStructure() != NULL) {
|
||||
if (!SD.options().output.detailed_tree_transrep_filepath.empty() ||
|
||||
SD.GetTreeStructure() != NULL) {
|
||||
std::string value = "[ " + targetLhs[0]->GetString().as_string() + " " +
|
||||
oov[0]->GetString().as_string() + " ]";
|
||||
targetPhrase->SetProperty("Tree", value);
|
||||
|
@ -45,7 +45,11 @@ void TabbedSentence::CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
}
|
||||
}
|
||||
|
||||
int TabbedSentence::Read(std::istream& in, const std::vector<FactorType>& factorOrder)
|
||||
int
|
||||
TabbedSentence::
|
||||
Read(std::istream& in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts)
|
||||
{
|
||||
TabbedColumns allColumns;
|
||||
|
||||
@ -58,14 +62,14 @@ int TabbedSentence::Read(std::istream& in, const std::vector<FactorType>& factor
|
||||
if(allColumns.size() < 2) {
|
||||
std::stringstream dummyStream;
|
||||
dummyStream << line << std::endl;
|
||||
return Sentence::Read(dummyStream, factorOrder);
|
||||
return Sentence::Read(dummyStream, factorOrder, opts);
|
||||
} else {
|
||||
m_columns.resize(allColumns.size() - 1);
|
||||
std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
|
||||
|
||||
std::stringstream dummyStream;
|
||||
dummyStream << allColumns[0] << std::endl;
|
||||
return Sentence::Read(dummyStream, factorOrder);
|
||||
return Sentence::Read(dummyStream, factorOrder, opts);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,9 @@ public:
|
||||
virtual void CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
, const std::string &tabbedString);
|
||||
|
||||
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
|
||||
virtual int
|
||||
Read(std::istream& in,const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts);
|
||||
|
||||
const TabbedColumns& GetColumns() const {
|
||||
return m_columns;
|
||||
|
@ -44,7 +44,7 @@ using namespace boost::algorithm;
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
typename PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache;
|
||||
PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache;
|
||||
|
||||
PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line)
|
||||
:PhraseDictionary(line, true)
|
||||
|
@ -25,7 +25,7 @@ namespace Moses
|
||||
{
|
||||
|
||||
|
||||
boost::thread_specific_ptr<typename TargetPhraseCollectionCache::CacheMap>
|
||||
boost::thread_specific_ptr<TargetPhraseCollectionCache::CacheMap>
|
||||
TargetPhraseCollectionCache::m_phraseCache;
|
||||
|
||||
}
|
||||
|
@ -59,6 +59,18 @@ $(TOP)/moses/TranslationModel/UG//mmsapt
|
||||
$(TOP)/util//kenutil
|
||||
;
|
||||
|
||||
exe check-coverage :
|
||||
check-coverage.cc
|
||||
$(TOP)/moses//moses
|
||||
$(TOP)/moses/TranslationModel/UG/generic//generic
|
||||
$(TOP)//boost_iostreams
|
||||
$(TOP)//boost_filesystem
|
||||
$(TOP)//boost_program_options
|
||||
$(TOP)/moses/TranslationModel/UG/mm//mm
|
||||
$(TOP)/moses/TranslationModel/UG//mmsapt
|
||||
$(TOP)/util//kenutil
|
||||
;
|
||||
|
||||
exe sim-pe :
|
||||
sim-pe.cc
|
||||
$(TOP)/moses//moses
|
||||
|
@ -17,7 +17,7 @@ echo $$d
|
||||
endef
|
||||
|
||||
MOSES_ROOT := $(shell $(find_moses_root))
|
||||
|
||||
$(info MOSES_ROOT=${MOSES_ROOT})
|
||||
# ===============================================================================
|
||||
# COMPILATION PREFERENCES
|
||||
# ===============================================================================
|
||||
@ -35,7 +35,9 @@ CXXFLAGS += -DMAX_NUM_FACTORS=4
|
||||
CXXFLAGS += -DKENLM_MAX_ORDER=5
|
||||
CXXFLAGS += -DWITH_THREADS
|
||||
CXXFLAGS += -DNO_MOSES
|
||||
CXXFLAGS += -I${MOSES_ROOT} -I.
|
||||
CXXFLAGS += -DMMT
|
||||
CXXFLAGS += -I$(dir ${MOSES_ROOT})mmt-only
|
||||
CXXFLAGS += -I${MOSES_ROOT} -I. -I${MOSES_ROOT}/opt/include
|
||||
|
||||
ifeq ($(variant),debug)
|
||||
CXXFLAGS += -ggdb -O0
|
||||
@ -45,7 +47,7 @@ else ifeq ($(variant),syntax)
|
||||
CXXFLAGS += -fsyntax-only
|
||||
endif
|
||||
|
||||
# LDFLAGS = -L${MOSES_ROOT}/lib -L ./lib/
|
||||
LDFLAGS = -L${MOSES_ROOT}/opt/lib64 -L./lib/
|
||||
|
||||
# WDIR = build/$(variant)/${HOSTTYPE}/${KERNEL}
|
||||
WDIR = build/$(variant)
|
||||
@ -60,14 +62,22 @@ nil:
|
||||
|
||||
# libraries required
|
||||
|
||||
LIBS = m z bz2 pthread dl ${BOOSTLIBS}
|
||||
#LIBS += tcmalloc
|
||||
BOOSTLIBS := thread system filesystem program_options iostreams
|
||||
BOOSTLIBS := $(addprefix boost_,${BOOSTLIBS})
|
||||
ifdef ($(BOOSTLIBTAG),"")
|
||||
BOOSTLIBS := program_options iostreams thread system filesystem
|
||||
BOOSTLIBS := $(addprefix -lboost_,${BOOSTLIBS})
|
||||
ifeq ($(BOOSTLIBTAG),"")
|
||||
BOOSTLIBS := $(addsuffix ${BOOSTLIBTAG},${BOOSTLIBS})
|
||||
endif
|
||||
|
||||
STATIC_LIBS = m bz2 z dl rt
|
||||
DYNAMIC_LIBS = pthread
|
||||
#DYNAMIC_LIBS += tcmalloc
|
||||
|
||||
LIBS = -Wl,-B$(link)
|
||||
LIBS += -L${MOSES_ROOT}/opt/lib64 ${BOOSTLIBS}
|
||||
LIBS += $(addprefix -l,${STATIC_LIBS})
|
||||
LIBS += -Wl,-Bdynamic
|
||||
LIBS += $(addprefix -l,${DYNAMIC_LIBS})
|
||||
|
||||
cc2obj = $(addsuffix .o,$(patsubst ${MOSES_ROOT}%,$(WDIR)%,\
|
||||
$(patsubst .%,$(WDIR)%,$(basename $1))))
|
||||
cc2exe = $(addprefix ./bin/$(variant)/,$(basename $(notdir $1)))
|
||||
@ -79,7 +89,7 @@ DEP += $(basename $(call cc2obj,$1)).d
|
||||
$(call cc2obj,$1): $1
|
||||
@echo -e "COMPILING $1"
|
||||
@mkdir -p $$(@D)
|
||||
@${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@
|
||||
${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@
|
||||
|
||||
endef
|
||||
|
||||
@ -90,7 +100,7 @@ $(call cc2exe,$1): $(call cc2obj,$1) $(LIBOBJ)
|
||||
ifneq ($(variant),syntax)
|
||||
@echo -e "LINKING $$@"
|
||||
@mkdir -p $${@D}
|
||||
@${CXX} ${CXXFLAGS} -o $$@ $(LIBOBJ) $(addprefix -l,${LIBS}) $$<
|
||||
${CXX} ${CXXFLAGS} -o $$@ $$< $(LIBOBJ) ${LIBS}
|
||||
endif
|
||||
|
||||
endef
|
||||
@ -106,7 +116,8 @@ skip += ug_splice_arglist.cc
|
||||
# skip += ug_lexical_reordering.cc
|
||||
|
||||
# objects from elsewhere in the moses tree that are needed
|
||||
extra = ${MOSES_ROOT}/util/exception.cc
|
||||
extra = ${MOSES_ROOT}/util/exception.cc
|
||||
extra += ${MOSES_ROOT}/util/integer_to_string.cc
|
||||
|
||||
$(foreach f,$(skip),$(eval broken+=$(shell find -name $f)))
|
||||
broken += $(wildcard ./mm/stashed/*)
|
||||
|
81
moses/TranslationModel/UG/check-coverage.cc
Normal file
81
moses/TranslationModel/UG/check-coverage.cc
Normal file
@ -0,0 +1,81 @@
|
||||
// #include "mmsapt.h"
|
||||
// #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
||||
// #include "moses/TranslationTask.h"
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/format.hpp>
|
||||
#include <boost/tokenizer.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include "mm/ug_bitext.h"
|
||||
#include "generic/file_io/ug_stream.h"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
using namespace Moses;
|
||||
using namespace sapt;
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
|
||||
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
|
||||
typedef mmBitext<Token> bitext_t;
|
||||
|
||||
struct mycmp
|
||||
{
|
||||
bool operator() (pair<string,uint32_t> const& a,
|
||||
pair<string,uint32_t> const& b) const
|
||||
{
|
||||
return a.second > b.second;
|
||||
}
|
||||
};
|
||||
|
||||
string
|
||||
basename(string const path, string const suffix)
|
||||
{
|
||||
size_t p = path.find_last_of("/");
|
||||
size_t k = path.size() - suffix.size();
|
||||
cout << path << " " << suffix << endl;
|
||||
cout << path.substr(0,p) << " " << path.substr(k) << endl;
|
||||
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
bitext_t B;
|
||||
B.open(argv[1],argv[2],argv[3]);
|
||||
string line;
|
||||
string ifile = argv[4];
|
||||
string docname = basename(ifile, string(".") + argv[2] + ".gz");
|
||||
boost::iostreams::filtering_istream in;
|
||||
ugdiss::open_input_stream(ifile,in);
|
||||
while(getline(in,line))
|
||||
{
|
||||
cout << line << " [" << docname << "]" << endl;
|
||||
vector<id_type> snt;
|
||||
B.V1->fillIdSeq(line,snt);
|
||||
for (size_t i = 0; i < snt.size(); ++i)
|
||||
{
|
||||
bitext_t::iter m(B.I1.get());
|
||||
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
|
||||
{
|
||||
if (m.ca() > 500) continue;
|
||||
sapt::tsa::ArrayEntry I(m.lower_bound(-1));
|
||||
char const* stop = m.upper_bound(-1);
|
||||
map<string,uint32_t> cnt;
|
||||
while (I.next != stop)
|
||||
{
|
||||
m.root->readEntry(I.next,I);
|
||||
++cnt[B.docname(I.sid)];
|
||||
}
|
||||
cout << setw(8) << int(m.ca()) << " " << B.V1->toString(&snt[i],&snt[k+1]) << endl;
|
||||
typedef pair<string,uint32_t> entry;
|
||||
vector<entry> ranked; ranked.reserve(cnt.size());
|
||||
BOOST_FOREACH(entry const& e, cnt) ranked.push_back(e);
|
||||
sort(ranked.begin(),ranked.end(),mycmp());
|
||||
BOOST_FOREACH(entry const& e, ranked)
|
||||
cout << setw(12) << " " << e.second << " " << e.first << endl;
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
67
moses/TranslationModel/UG/check-coverage2.cc
Normal file
67
moses/TranslationModel/UG/check-coverage2.cc
Normal file
@ -0,0 +1,67 @@
|
||||
// for each word in the input, keep track of the longest matching ngram covering it
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/format.hpp>
|
||||
#include <boost/tokenizer.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include "mm/ug_bitext.h"
|
||||
#include "generic/file_io/ug_stream.h"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
using namespace Moses;
|
||||
using namespace sapt;
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
|
||||
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
|
||||
typedef mmBitext<Token> bitext_t;
|
||||
|
||||
struct mycmp
|
||||
{
|
||||
bool operator() (pair<string,uint32_t> const& a,
|
||||
pair<string,uint32_t> const& b) const
|
||||
{
|
||||
return a.second > b.second;
|
||||
}
|
||||
};
|
||||
|
||||
string
|
||||
basename(string const path, string const suffix)
|
||||
{
|
||||
size_t p = path.find_last_of("/");
|
||||
size_t k = path.size() - suffix.size();
|
||||
cout << path << " " << suffix << endl;
|
||||
cout << path.substr(0,p) << " " << path.substr(k) << endl;
|
||||
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
bitext_t B;
|
||||
B.open(argv[1],argv[2],argv[3]);
|
||||
B.V1->setDynamic(true);
|
||||
string line;
|
||||
string ifile = argv[4];
|
||||
string docname = basename(ifile, string(".") + argv[2] + ".gz");
|
||||
boost::iostreams::filtering_istream in;
|
||||
ugdiss::open_input_stream(ifile,in);
|
||||
while(getline(in,line))
|
||||
{
|
||||
cout << line << " [" << docname << "]" << endl;
|
||||
vector<id_type> snt;
|
||||
B.V1->fillIdSeq(line,snt);
|
||||
vector<size_t> match(snt.size(),0);
|
||||
for (size_t i = 0; i < snt.size(); ++i)
|
||||
{
|
||||
bitext_t::iter m(B.I1.get());
|
||||
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k);
|
||||
for (size_t j = 0; j < m.size(); ++j)
|
||||
|
||||
match[i+j] = max(match[i+j], m.size());
|
||||
}
|
||||
for (size_t i = 0; i < snt.size(); ++i)
|
||||
cout << setw(3) << match[i] << " " << (*B.V1)[snt[i]] << endl;
|
||||
}
|
||||
}
|
70
moses/TranslationModel/UG/check-coverage3.cc
Normal file
70
moses/TranslationModel/UG/check-coverage3.cc
Normal file
@ -0,0 +1,70 @@
|
||||
// #include "mmsapt.h"
|
||||
// #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
||||
// #include "moses/TranslationTask.h"
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/format.hpp>
|
||||
#include <boost/tokenizer.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include "mm/ug_bitext.h"
|
||||
#include "generic/file_io/ug_stream.h"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include "mm/ug_bitext_sampler.h"
|
||||
|
||||
using namespace Moses;
|
||||
using namespace sapt;
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
|
||||
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
|
||||
typedef mmBitext<Token> bitext_t;
|
||||
|
||||
struct mycmp
|
||||
{
|
||||
bool operator() (pair<string,uint32_t> const& a,
|
||||
pair<string,uint32_t> const& b) const
|
||||
{
|
||||
return a.second > b.second;
|
||||
}
|
||||
};
|
||||
|
||||
string
|
||||
basename(string const path, string const suffix)
|
||||
{
|
||||
size_t p = path.find_last_of("/");
|
||||
size_t k = path.size() - suffix.size();
|
||||
cout << path << " " << suffix << endl;
|
||||
cout << path.substr(0,p) << " " << path.substr(k) << endl;
|
||||
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
boost::intrusive_ptr<bitext_t> B(new bitext_t);
|
||||
B->open(argv[1],argv[2],argv[3]);
|
||||
string line;
|
||||
string ifile = argv[4];
|
||||
string docname = basename(ifile, string(".") + argv[2] + ".gz");
|
||||
boost::iostreams::filtering_istream in;
|
||||
ugdiss::open_input_stream(ifile,in);
|
||||
while(getline(in,line))
|
||||
{
|
||||
cout << line << " [" << docname << "]" << endl;
|
||||
vector<id_type> snt;
|
||||
B->V1->fillIdSeq(line,snt);
|
||||
for (size_t i = 0; i < snt.size(); ++i)
|
||||
{
|
||||
bitext_t::iter m(B->I1.get());
|
||||
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
|
||||
{
|
||||
SPTR<SamplingBias const> zilch;
|
||||
BitextSampler<Token> s(B.get(), m, zilch, 1000, 1000,
|
||||
sapt::random_sampling);
|
||||
s();
|
||||
cout << m.size() << " " << s.stats()->trg.size() << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -29,7 +29,7 @@ HOST ?= $(shell hostname)
|
||||
HOSTTYPE ?= $(shell uname -m)
|
||||
KERNEL = $(shell uname -r)
|
||||
|
||||
MOSES_ROOT = ${HOME}/code/mosesdecoder
|
||||
MOSES_ROOT ?= ${HOME}/code/mosesdecoder
|
||||
WDIR = build/${HOSTTYPE}/${KERNEL}/${OPTI}
|
||||
VPATH = ${HOME}/code/mosesdecoder/
|
||||
CXXFLAGS = ${PROF} -ggdb -Wall -O${OPTI} ${INCLUDES}
|
@ -28,8 +28,8 @@ Bitext<Token>::agenda
|
||||
while (j->nextSample(sid,offset))
|
||||
{
|
||||
aln.clear();
|
||||
int po_fwd = Moses::LRModel::NONE;
|
||||
int po_bwd = Moses::LRModel::NONE;
|
||||
int po_fwd = LRModel::NONE;
|
||||
int po_bwd = LRModel::NONE;
|
||||
int docid = j->m_bias ? j->m_bias->GetClass(sid) : -1;
|
||||
bitvector* full_aln = j->fwd ? &full_alignment : NULL;
|
||||
|
||||
|
@ -17,7 +17,7 @@ namespace sapt
|
||||
jstats()
|
||||
: my_rcnt(0), my_cnt2(0), my_wcnt(0), my_bcnt(0)
|
||||
{
|
||||
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
|
||||
for (int i = 0; i <= LRModel::NONE; ++i)
|
||||
ofwd[i] = obwd[i] = 0;
|
||||
my_aln.reserve(1);
|
||||
}
|
||||
@ -30,7 +30,7 @@ namespace sapt
|
||||
my_bcnt = other.bcnt();
|
||||
my_aln = other.aln();
|
||||
indoc = other.indoc;
|
||||
for (int i = 0; i <= Moses::LRModel::NONE; i++)
|
||||
for (int i = 0; i <= LRModel::NONE; i++)
|
||||
{
|
||||
ofwd[i] = other.ofwd[i];
|
||||
obwd[i] = other.obwd[i];
|
||||
@ -41,7 +41,7 @@ namespace sapt
|
||||
jstats::
|
||||
dcnt_fwd(PhraseOrientation const idx) const
|
||||
{
|
||||
assert(idx <= Moses::LRModel::NONE);
|
||||
assert(idx <= LRModel::NONE);
|
||||
return ofwd[idx];
|
||||
}
|
||||
|
||||
@ -49,7 +49,7 @@ namespace sapt
|
||||
jstats::
|
||||
dcnt_bwd(PhraseOrientation const idx) const
|
||||
{
|
||||
assert(idx <= Moses::LRModel::NONE);
|
||||
assert(idx <= LRModel::NONE);
|
||||
return obwd[idx];
|
||||
}
|
||||
|
||||
|
@ -24,8 +24,8 @@ namespace sapt
|
||||
std::vector<std::pair<size_t, std::vector<unsigned char> > > my_aln;
|
||||
// internal word alignment
|
||||
|
||||
uint32_t ofwd[Moses::LRModel::NONE+1]; // forward distortion type counts
|
||||
uint32_t obwd[Moses::LRModel::NONE+1]; // backward distortion type counts
|
||||
uint32_t ofwd[LRModel::NONE+1]; // forward distortion type counts
|
||||
uint32_t obwd[LRModel::NONE+1]; // backward distortion type counts
|
||||
|
||||
public:
|
||||
std::map<uint32_t,uint32_t> indoc;
|
||||
@ -48,8 +48,8 @@ namespace sapt
|
||||
bool valid();
|
||||
uint32_t dcnt_fwd(PhraseOrientation const idx) const;
|
||||
uint32_t dcnt_bwd(PhraseOrientation const idx) const;
|
||||
void fill_lr_vec(Moses::LRModel::Direction const& dir,
|
||||
Moses::LRModel::ModelType const& mdl,
|
||||
void fill_lr_vec(LRModel::Direction const& dir,
|
||||
LRModel::ModelType const& mdl,
|
||||
std::vector<float>& v);
|
||||
};
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ namespace sapt
|
||||
pstats::
|
||||
pstats() : raw_cnt(0), sample_cnt(0), good(0), sum_pairs(0), in_progress(0)
|
||||
{
|
||||
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
|
||||
for (int i = 0; i <= LRModel::NONE; ++i)
|
||||
ofwd[i] = obwd[i] = 0;
|
||||
}
|
||||
|
||||
|
@ -30,8 +30,8 @@ namespace sapt
|
||||
size_t sum_pairs; // total number of target phrases extracted (can be > raw_cnt)
|
||||
size_t in_progress; // how many threads are currently working on this?
|
||||
|
||||
uint32_t ofwd[Moses::LRModel::NONE+1]; // distribution of fwd phrase orientations
|
||||
uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations
|
||||
uint32_t ofwd[LRModel::NONE+1]; // distribution of fwd phrase orientations
|
||||
uint32_t obwd[LRModel::NONE+1]; // distribution of bwd phrase orientations
|
||||
|
||||
indoc_map_t indoc;
|
||||
trg_map_t trg;
|
||||
@ -43,13 +43,13 @@ namespace sapt
|
||||
|
||||
bool
|
||||
add(uint64_t const pid, // target phrase id
|
||||
float const w, // sample weight (1./(# of phrases extractable))
|
||||
float const b, // sample bias score
|
||||
alnvec const& a, // local alignment
|
||||
uint32_t const cnt2, // raw target phrase count
|
||||
uint32_t fwd_o, // fwd. phrase orientation
|
||||
uint32_t bwd_o, // bwd. phrase orientation
|
||||
int const docid); // document where sample was found
|
||||
float const w, // sample weight (1./(# of phrases extractable))
|
||||
float const b, // sample bias score
|
||||
alnvec const& a, // local alignment
|
||||
uint32_t const cnt2, // raw target phrase count
|
||||
uint32_t fwd_o, // fwd. phrase orientation
|
||||
uint32_t bwd_o, // bwd. phrase orientation
|
||||
int const docid); // document where sample was found
|
||||
|
||||
void
|
||||
count_sample(int const docid, // document where sample was found
|
||||
|
@ -74,8 +74,11 @@ BitextSampler : public Moses::reference_counter
|
||||
public:
|
||||
BitextSampler(BitextSampler const& other);
|
||||
BitextSampler const& operator=(BitextSampler const& other);
|
||||
BitextSampler(bitext const* const bitext, typename bitext::iter const& phrase,
|
||||
SPTR<SamplingBias const> const& bias, size_t const min_samples, size_t const max_samples,
|
||||
BitextSampler(bitext const* const bitext,
|
||||
typename bitext::iter const& phrase,
|
||||
SPTR<SamplingBias const> const& bias,
|
||||
size_t const min_samples,
|
||||
size_t const max_samples,
|
||||
sampling_method const method);
|
||||
~BitextSampler();
|
||||
SPTR<pstats> stats();
|
||||
|
@ -227,7 +227,9 @@ namespace sapt
|
||||
|
||||
// Now sort the array
|
||||
if (log) *log << "sorting .... with " << threads << " threads." << std::endl;
|
||||
#ifndef NO_MOSES
|
||||
double start_time = util::WallTime();
|
||||
#endif
|
||||
boost::scoped_ptr<ug::ThreadPool> tpool;
|
||||
tpool.reset(new ug::ThreadPool(threads));
|
||||
|
||||
@ -252,8 +254,10 @@ namespace sapt
|
||||
}
|
||||
}
|
||||
tpool.reset();
|
||||
#ifndef NO_MOSES
|
||||
if (log) *log << "Done sorting after " << util::WallTime() - start_time
|
||||
<< " seconds." << std::endl;
|
||||
#endif
|
||||
this->startArray = reinterpret_cast<char const*>(&(*sufa.begin()));
|
||||
this->endArray = reinterpret_cast<char const*>(&(*sufa.end()));
|
||||
this->numTokens = sufa.size();
|
||||
|
@ -4,7 +4,7 @@ namespace sapt
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
Moses::LRModel::ReorderingType po_other = Moses::LRModel::NONE;
|
||||
LRModel::ReorderingType po_other = LRModel::NONE;
|
||||
// check if min and max in the aligmnet vector v are within the
|
||||
// bounds LFT and RGT and update the actual bounds L and R; update
|
||||
// the total count of alignment links in the underlying phrase
|
||||
@ -83,54 +83,56 @@ namespace sapt
|
||||
return ret;
|
||||
}
|
||||
|
||||
Moses::LRModel::ReorderingType
|
||||
// LRModel::ReorderingType
|
||||
sapt::PhraseOrientation
|
||||
find_po_fwd(vector<vector<ushort> >& a1,
|
||||
vector<vector<ushort> >& a2,
|
||||
size_t s1, size_t e1,
|
||||
size_t s2, size_t e2)
|
||||
{
|
||||
if (e2 == a2.size()) // end of target sentence
|
||||
return Moses::LRModel::M;
|
||||
return LRModel::M;
|
||||
size_t y = e2, L = e2, R = a2.size()-1; // won't change
|
||||
size_t x = e1, T = e1, B = a1.size()-1;
|
||||
if (e1 < a1.size() && expand_block(a1,a2,x,y,T,L,B,R) >= 0)
|
||||
return Moses::LRModel::M;
|
||||
return LRModel::M;
|
||||
B = x = s1-1; T = 0;
|
||||
if (s1 && expand_block(a1,a2,x,y,T,L,B,R) >= 0)
|
||||
return Moses::LRModel::S;
|
||||
return LRModel::S;
|
||||
while (e2 < a2.size() && a2[e2].size() == 0) ++e2;
|
||||
if (e2 == a2.size()) // should never happen, actually
|
||||
return Moses::LRModel::NONE;
|
||||
return LRModel::NONE;
|
||||
if (a2[e2].back() < s1)
|
||||
return Moses::LRModel::DL;
|
||||
return LRModel::DL;
|
||||
if (a2[e2].front() >= e1)
|
||||
return Moses::LRModel::DR;
|
||||
return Moses::LRModel::NONE;
|
||||
return LRModel::DR;
|
||||
return LRModel::NONE;
|
||||
}
|
||||
|
||||
|
||||
Moses::LRModel::ReorderingType
|
||||
// LRModel::ReorderingType
|
||||
PhraseOrientation
|
||||
find_po_bwd(vector<vector<ushort> >& a1,
|
||||
vector<vector<ushort> >& a2,
|
||||
size_t s1, size_t e1,
|
||||
size_t s2, size_t e2)
|
||||
{
|
||||
if (s1 == 0 && s2 == 0) return Moses::LRModel::M;
|
||||
if (s2 == 0) return Moses::LRModel::DR;
|
||||
if (s1 == 0) return Moses::LRModel::DL;
|
||||
if (s1 == 0 && s2 == 0) return LRModel::M;
|
||||
if (s2 == 0) return LRModel::DR;
|
||||
if (s1 == 0) return LRModel::DL;
|
||||
size_t y = s2-1, L = 0, R = s2-1; // won't change
|
||||
size_t x = s1-1, T = 0, B = s1-1;
|
||||
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
|
||||
return Moses::LRModel::M;
|
||||
return LRModel::M;
|
||||
T = x = e1; B = a1.size()-1;
|
||||
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
|
||||
return Moses::LRModel::S;
|
||||
return LRModel::S;
|
||||
while (s2-- && a2[s2].size() == 0);
|
||||
|
||||
Moses::LRModel::ReorderingType ret;
|
||||
LRModel::ReorderingType ret;
|
||||
ret = (a2[s2].size() == 0 ? po_other :
|
||||
a2[s2].back() < s1 ? Moses::LRModel::DR :
|
||||
a2[s2].front() >= e1 ? Moses::LRModel::DL :
|
||||
a2[s2].back() < s1 ? LRModel::DR :
|
||||
a2[s2].front() >= e1 ? LRModel::DL :
|
||||
po_other);
|
||||
#if 0
|
||||
cout << "s1=" << s1 << endl;
|
||||
|
@ -12,7 +12,7 @@ namespace sapt {
|
||||
|
||||
#ifdef NO_MOSES
|
||||
class LRModel{
|
||||
|
||||
public:
|
||||
enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
|
||||
enum Direction { Forward, Backward, Bidirectional };
|
||||
|
||||
|
@ -26,8 +26,8 @@ namespace sapt
|
||||
uint32_t raw1, raw2, sample1, sample2, good1, good2, joint;
|
||||
float cum_bias;
|
||||
std::vector<float> fvals;
|
||||
float dfwd[Moses::LRModel::NONE+1]; // distortion counts // counts or probs?
|
||||
float dbwd[Moses::LRModel::NONE+1]; // distortion counts
|
||||
float dfwd[LRModel::NONE+1]; // distortion counts // counts or probs?
|
||||
float dbwd[LRModel::NONE+1]; // distortion counts
|
||||
std::vector<unsigned char> aln;
|
||||
float score;
|
||||
bool inverse;
|
||||
@ -125,7 +125,7 @@ namespace sapt
|
||||
// }
|
||||
|
||||
// should we do that here or leave the raw counts?
|
||||
for (int i = 0; i <= Moses::LRModel::NONE; i++)
|
||||
for (int i = 0; i <= LRModel::NONE; i++)
|
||||
{
|
||||
PhraseOrientation po = static_cast<PhraseOrientation>(i);
|
||||
dfwd[i] = js.dcnt_fwd(po);
|
||||
@ -201,7 +201,7 @@ namespace sapt
|
||||
, inverse(o.inverse)
|
||||
, indoc(o.indoc)
|
||||
{
|
||||
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
|
||||
for (int i = 0; i <= LRModel::NONE; ++i)
|
||||
{
|
||||
dfwd[i] = o.dfwd[i];
|
||||
dbwd[i] = o.dbwd[i];
|
||||
|
@ -63,7 +63,9 @@ namespace Moses
|
||||
, btfix(new mmbitext)
|
||||
, m_bias_log(NULL)
|
||||
, m_bias_loglevel(0)
|
||||
#ifndef NO_MOSES
|
||||
, m_lr_func(NULL)
|
||||
#endif
|
||||
, m_sampling_method(random_sampling)
|
||||
, bias_key(((char*)this)+3)
|
||||
, cache_key(((char*)this)+2)
|
||||
@ -597,6 +599,7 @@ namespace Moses
|
||||
// Evaluate with all features that can be computed using available factors
|
||||
tp->EvaluateInIsolation(src, m_featuresToApply);
|
||||
|
||||
#ifndef NO_MOSES
|
||||
if (m_lr_func)
|
||||
{
|
||||
LRModel::ModelType mdl = m_lr_func->GetModel().GetModelType();
|
||||
@ -605,6 +608,7 @@ namespace Moses
|
||||
pool.fill_lr_vec(dir, mdl, *scores);
|
||||
tp->SetExtraScores(m_lr_func, scores);
|
||||
}
|
||||
#endif
|
||||
|
||||
return tp;
|
||||
}
|
||||
@ -863,10 +867,10 @@ namespace Moses
|
||||
boost::unique_lock<boost::shared_mutex> ctxlock(context->lock);
|
||||
|
||||
if (localcache) std::cerr << "have local cache " << std::endl;
|
||||
std::cerr << "BOO at " << HERE << std::endl;
|
||||
// std::cerr << "BOO at " << HERE << std::endl;
|
||||
if (!localcache)
|
||||
{
|
||||
std::cerr << "no local cache at " << HERE << std::endl;
|
||||
// std::cerr << "no local cache at " << HERE << std::endl;
|
||||
setup_bias(ttask);
|
||||
if (context->bias)
|
||||
{
|
||||
@ -879,6 +883,7 @@ namespace Moses
|
||||
if (!context->cache1) context->cache1.reset(new pstats::cache_t);
|
||||
if (!context->cache2) context->cache2.reset(new pstats::cache_t);
|
||||
|
||||
#ifndef NO_MOSES
|
||||
if (m_lr_func_name.size() && m_lr_func == NULL)
|
||||
{
|
||||
FeatureFunction* lr = &FeatureFunction::FindFeatureFunction(m_lr_func_name);
|
||||
@ -887,6 +892,7 @@ namespace Moses
|
||||
<< " does not seem to be a lexical reordering function!");
|
||||
// todo: verify that lr_func implements a hierarchical reordering model
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -26,7 +26,9 @@
|
||||
|
||||
#include "moses/TranslationModel/UG/TargetPhraseCollectionCache.h"
|
||||
|
||||
#ifndef NO_MOSES
|
||||
#include "moses/FF/LexicalReordering/LexicalReordering.h"
|
||||
#endif
|
||||
|
||||
#include "moses/InputFileStream.h"
|
||||
#include "moses/FactorTypeSet.h"
|
||||
@ -82,7 +84,9 @@ namespace Moses
|
||||
boost::scoped_ptr<std::ofstream> m_bias_logger; // for logging to a file
|
||||
std::ostream* m_bias_log;
|
||||
int m_bias_loglevel;
|
||||
#ifndef NO_MOSES
|
||||
LexicalReordering* m_lr_func; // associated lexical reordering function
|
||||
#endif
|
||||
std::string m_lr_func_name; // name of associated lexical reordering function
|
||||
sapt::sampling_method m_sampling_method; // sampling method, see ug_bitext_sampler
|
||||
boost::scoped_ptr<ug::ThreadPool> m_thread_pool;
|
||||
|
@ -69,7 +69,7 @@ int main(int argc, char* argv[])
|
||||
while (true)
|
||||
{
|
||||
boost::shared_ptr<Sentence> phrase(new Sentence);
|
||||
if (!phrase->Read(cin,ifo)) break;
|
||||
if (!phrase->Read(cin,ifo, StaticData::Instance().options())) break;
|
||||
boost::shared_ptr<TranslationTask> ttask;
|
||||
ttask = TranslationTask::create(phrase);
|
||||
if (pdta)
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- makefile -*-
|
||||
# # -*- makefile -*-
|
||||
|
||||
MOSES_CODE=/fs/gna0/germann/code/mosesdecoder
|
||||
MOSES_ROOT=/fs/gna0/germann/moses
|
||||
LIBS = $(addprefix -l,moses icuuc icuio icui18n boost_iostreams)
|
||||
ibm1-align: ibm1-align.cc
|
||||
g++ -o $@ -L ${MOSES_ROOT}/lib -I ${MOSES_CODE} $^ ${LIBS} -ggdb
|
||||
# MOSES_CODE=/fs/gna0/germann/code/mosesdecoder
|
||||
# MOSES_ROOT=/fs/gna0/germann/moses
|
||||
# LIBS = $(addprefix -l,moses icuuc icuio icui18n boost_iostreams)
|
||||
# ibm1-align: ibm1-align.cc
|
||||
# g++ -o $@ -L ${MOSES_ROOT}/lib -I ${MOSES_CODE} $^ ${LIBS} -ggdb
|
@ -1,4 +1,4 @@
|
||||
// $Id$
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||
|
||||
#include <list>
|
||||
#include <vector>
|
||||
@ -12,7 +12,7 @@
|
||||
#include "TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
||||
#include "util/exception.hh"
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
#include "TranslationTask.h"
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
@ -41,7 +41,7 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
|
||||
size_t inputSize = input.GetSize();
|
||||
m_inputPathMatrix.resize(inputSize);
|
||||
|
||||
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
|
||||
size_t maxSizePhrase = ttask->options().search.max_phrase_length;
|
||||
maxSizePhrase = std::min(inputSize, maxSizePhrase);
|
||||
|
||||
// 1-word phrases
|
||||
@ -234,8 +234,10 @@ CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t st
|
||||
list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin();
|
||||
const DecodeStep &decodeStep = **iterStep;
|
||||
|
||||
static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslationLEGACY
|
||||
(m_source, *oldPtoc, startPos, endPos, adhereTableLimit, inputPathList);
|
||||
DecodeStepTranslation const& dstep
|
||||
= static_cast<const DecodeStepTranslation&>(decodeStep);
|
||||
dstep.ProcessInitialTransLEGACY(m_source, *oldPtoc, startPos, endPos,
|
||||
adhereTableLimit, inputPathList);
|
||||
|
||||
// do rest of decode steps
|
||||
int indexStep = 0;
|
||||
|
@ -186,7 +186,8 @@ void TranslationTask::Run()
|
||||
|
||||
// report thread number
|
||||
#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
|
||||
VERBOSE(2, "Translating line " << translationId << " in thread id " << pthread_self() << endl);
|
||||
VERBOSE(2, "Translating line " << translationId << " in thread id "
|
||||
<< pthread_self() << endl);
|
||||
#endif
|
||||
|
||||
|
||||
@ -214,8 +215,8 @@ void TranslationTask::Run()
|
||||
OutputCollector* ocoll;
|
||||
Timer additionalReportingTime;
|
||||
additionalReportingTime.start();
|
||||
|
||||
boost::shared_ptr<IOWrapper> const& io = m_ioWrapper;
|
||||
|
||||
manager->OutputBest(io->GetSingleBestOutputCollector());
|
||||
|
||||
// output word graph
|
||||
@ -229,7 +230,7 @@ void TranslationTask::Run()
|
||||
|
||||
// Output search graph in hypergraph format for Kenneth Heafield's
|
||||
// lazy hypergraph decoder; writes to stderr
|
||||
if (StaticData::Instance().GetOutputSearchGraphHypergraph()) {
|
||||
if (options().output.SearchGraphHG.size()) {
|
||||
size_t transId = manager->GetSource().GetTranslationId();
|
||||
string fname = io->GetHypergraphOutputFileName(transId);
|
||||
manager->OutputSearchGraphAsHypergraph(fname, PRECISION);
|
||||
|
@ -237,7 +237,10 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
|
||||
}
|
||||
|
||||
//! populate this InputType with data from in stream
|
||||
int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
|
||||
int
|
||||
TreeInput::
|
||||
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
@ -254,7 +257,7 @@ int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
|
||||
stringstream strme;
|
||||
strme << line << endl;
|
||||
|
||||
Sentence::Read(strme, factorOrder);
|
||||
Sentence::Read(strme, factorOrder, opts);
|
||||
|
||||
// size input chart
|
||||
size_t sourceSize = GetSize();
|
||||
|
@ -53,7 +53,10 @@ public:
|
||||
}
|
||||
|
||||
//! populate this InputType with data from in stream
|
||||
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
|
||||
virtual int
|
||||
Read(std::istream& in,
|
||||
const std::vector<FactorType>& factorOrder,
|
||||
AllOptions const& opts);
|
||||
|
||||
//! Output debugging info to stream out
|
||||
virtual void Print(std::ostream&) const;
|
||||
|
@ -147,7 +147,11 @@ InitializeFromPCNDataType
|
||||
return !cn.empty();
|
||||
}
|
||||
|
||||
int WordLattice::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
|
||||
int
|
||||
WordLattice::
|
||||
Read(std::istream& in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts)
|
||||
{
|
||||
Clear();
|
||||
std::string line;
|
||||
|
@ -43,7 +43,9 @@ public:
|
||||
int InitializeFromPCNDataType(const PCN::CN& cn, const std::vector<FactorType>& factorOrder, const std::string& debug_line = "");
|
||||
/** Read from PLF format (1 lattice per line)
|
||||
*/
|
||||
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
|
||||
int Read(std::istream& in,
|
||||
std::vector<FactorType> const& factorOrder,
|
||||
AllOptions const& opts);
|
||||
|
||||
/** Convert internal representation into an edge matrix
|
||||
* @note edges[1][2] means there is an edge from 1 to 2
|
||||
|
@ -21,6 +21,7 @@ namespace Moses
|
||||
if (!input.init(param)) return false;
|
||||
if (!mbr.init(param)) return false;
|
||||
if (!lmbr.init(param)) return false;
|
||||
if (!output.init(param)) return false;
|
||||
|
||||
param.SetParameter(mira, "mira", false);
|
||||
|
||||
@ -45,12 +46,31 @@ namespace Moses
|
||||
{
|
||||
if (mbr.enabled)
|
||||
{
|
||||
cerr << "Error: Cannot use consensus decoding together with mbr" << endl;
|
||||
cerr << "Error: Cannot use consensus decoding together with mbr"
|
||||
<< endl;
|
||||
return false;
|
||||
}
|
||||
mbr.enabled = true;
|
||||
}
|
||||
|
||||
// RecoverPath should only be used with confusion net or word lattice input
|
||||
if (output.RecoverPath && input.input_type == SentenceInput)
|
||||
{
|
||||
TRACE_ERR("--recover-input-path should only be used with "
|
||||
<<"confusion net or word lattice input!\n");
|
||||
output.RecoverPath = false;
|
||||
}
|
||||
|
||||
// set m_nbest_options.enabled = true if necessary:
|
||||
nbest.enabled = (nbest.enabled || mira || search.consensus
|
||||
|| nbest.nbest_size > 0
|
||||
|| !output.SearchGraph.empty()
|
||||
|| !output.SearchGraphExtended.empty()
|
||||
|| !output.SearchGraphSLF.empty()
|
||||
|| !output.SearchGraphHG.empty()
|
||||
|| !output.SearchGraphPB.empty()
|
||||
|| output.lattice_sample_size != 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -67,9 +87,24 @@ namespace Moses
|
||||
if (!input.update(param)) return false;
|
||||
if (!mbr.update(param)) return false;
|
||||
if (!lmbr.update(param)) return false;
|
||||
return true;
|
||||
if (!output.update(param)) return false;
|
||||
return sanity_check();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool
|
||||
AllOptions::
|
||||
NBestDistinct() const
|
||||
{
|
||||
return (nbest.only_distinct
|
||||
|| mbr.enabled || lmbr.enabled
|
||||
|| output.lattice_sample_size
|
||||
|| !output.SearchGraph.empty()
|
||||
|| !output.SearchGraphExtended.empty()
|
||||
|| !output.SearchGraphSLF.empty()
|
||||
|| !output.SearchGraphHG.empty());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "InputOptions.h"
|
||||
#include "MBR_Options.h"
|
||||
#include "LMBR_Options.h"
|
||||
#include "ReportingOptions.h"
|
||||
namespace Moses
|
||||
{
|
||||
struct
|
||||
@ -24,7 +25,7 @@ namespace Moses
|
||||
InputOptions input;
|
||||
MBR_Options mbr;
|
||||
LMBR_Options lmbr;
|
||||
|
||||
ReportingOptions output;
|
||||
bool mira;
|
||||
|
||||
// StackOptions stack;
|
||||
@ -38,6 +39,8 @@ namespace Moses
|
||||
bool update(std::map<std::string,xmlrpc_c::value>const& param);
|
||||
#endif
|
||||
|
||||
bool NBestDistinct() const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
// -*- mode: c++; cc-style: gnu -*-
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "moses/Parameter.h"
|
||||
#include "NBestOptions.h"
|
||||
|
||||
@ -33,4 +33,21 @@ init(Parameter const& P)
|
||||
enabled = output_file_path.size();
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
NBestOptions::
|
||||
update(std::map<std::string,xmlrpc_c::value>const& param)
|
||||
{
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
params_t::const_iterator si = param.find("nbest");
|
||||
if (si != param.end())
|
||||
nbest_size = xmlrpc_c::value_int(si->second);
|
||||
only_distinct = check(param, "nbest-distinct");
|
||||
enabled = (nbest_size > 0);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -24,6 +24,10 @@ struct NBestOptions : public OptionsBaseClass
|
||||
|
||||
bool init(Parameter const& param);
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool update(std::map<std::string,xmlrpc_c::value>const& param);
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,16 @@ namespace Moses
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
OptionsBaseClass::
|
||||
check(std::map<std::string, xmlrpc_c::value> const& param,
|
||||
std::string const key)
|
||||
{
|
||||
std::map<std::string, xmlrpc_c::value>::const_iterator m;
|
||||
return (param.find(key) != param.end());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -12,6 +12,10 @@ namespace Moses
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
virtual bool
|
||||
update(std::map<std::string,xmlrpc_c::value>const& params);
|
||||
|
||||
bool
|
||||
check(std::map<std::string, xmlrpc_c::value> const& param,
|
||||
std::string const key);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
@ -1,5 +1,4 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#if 0
|
||||
#include "ReportingOptions.h"
|
||||
#include "moses/Parameter.h"
|
||||
|
||||
@ -9,82 +8,70 @@ namespace Moses {
|
||||
ReportingOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
// including factors in the output
|
||||
param.SetParameter(ReportAllFactors, "report-all-factors", false);
|
||||
|
||||
// segmentation reporting
|
||||
ReportSegmentation = (param.GetParam("report-segmentation-enriched")
|
||||
? 2 : param.GetParam("report-segmentation")
|
||||
? 1 : 0);
|
||||
|
||||
// word alignment reporting
|
||||
param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false);
|
||||
param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort);
|
||||
std::string e; // hack to save us param.SetParameter<string>(...)
|
||||
param.SetParameter(AlignmentOutputFile,"alignment-output-file", e);
|
||||
|
||||
// output a word graph
|
||||
PARAM_VEC const* params;
|
||||
|
||||
param.SetParameter(segmentation, "report-segmentation", false );
|
||||
param.SetParameter(segmentation_enriched, "report-segmentation-enriched", false);
|
||||
param.SetParameter(all_factors, "report-all-factors", false );
|
||||
|
||||
// print ...
|
||||
param.SetParameter(id, "print-id", false );
|
||||
param.SetParameter(aln_info, "print-alignment-info", false);
|
||||
param.SetParameter(passthrough, "print-passthrough", false );
|
||||
|
||||
param.SetParameter<string>(detailed_transrep_filepath, "translation-details", "");
|
||||
param.SetParameter<string>(detailed_tree_transrep_filepath,
|
||||
"tree-translation-details", "");
|
||||
param.SetParameter<string>(detailed_all_transrep_filepath,
|
||||
"translation-all-details", "");
|
||||
|
||||
// output search graph
|
||||
param.SetParameter<string>(output,
|
||||
"translation-all-details", "");
|
||||
|
||||
|
||||
|
||||
param.SetParameter(sort_word_alignment, "sort-word-alignment", NoSort);
|
||||
|
||||
|
||||
// Is there a reason why we can't use SetParameter here? [UG]
|
||||
= param.GetParam("alignment-output-file");
|
||||
if (params && params->size()) {
|
||||
m_alignmentOutputFile = Scan<std::string>(params->at(0));
|
||||
}
|
||||
|
||||
params = param.GetParam("output-word-graph");
|
||||
output_word_graph = (params && params->size() == 2);
|
||||
|
||||
// bizarre code ahead! Why do we need to do the checks here?
|
||||
// as adapted from StaticData.cpp
|
||||
params = param.GetParam("output-search-graph");
|
||||
if (params && params->size()) {
|
||||
if (params->size() != 1) {
|
||||
std::cerr << "ERROR: wrong format for switch -output-search-graph file";
|
||||
return false;
|
||||
}
|
||||
output_search_graph = true;
|
||||
}
|
||||
else if (m_parameter->GetParam("output-search-graph-extended") &&
|
||||
m_parameter->GetParam("output-search-graph-extended")->size()) {
|
||||
if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) {
|
||||
std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file";
|
||||
return false;
|
||||
}
|
||||
output_search_graph = true;
|
||||
m_outputSearchGraphExtended = true;
|
||||
} else {
|
||||
m_outputSearchGraph = false;
|
||||
}
|
||||
|
||||
params = m_parameter->GetParam("output-search-graph-slf");
|
||||
output_search_graph_slf = params && params->size();
|
||||
params = m_parameter->GetParam("output-search-graph-hypergraph");
|
||||
output_search_graph_hypergraph = params && params->size();
|
||||
WordGraph = (params && params->size() == 2); // what are the two options?
|
||||
|
||||
// dump the search graph
|
||||
param.SetParameter(SearchGraph, "output-search-graph", e);
|
||||
param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e);
|
||||
param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e);
|
||||
param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e);
|
||||
#ifdef HAVE_PROTOBUF
|
||||
params = m_parameter->GetParam("output-search-graph-pb");
|
||||
if (params && params->size()) {
|
||||
if (params->size() != 1) {
|
||||
cerr << "ERROR: wrong format for switch -output-search-graph-pb path";
|
||||
return false;
|
||||
}
|
||||
m_outputSearchGraphPB = true;
|
||||
} else
|
||||
m_outputSearchGraphPB = false;
|
||||
param.SetParameter(SearchGraphPB, "output-search-graph-pb", e);
|
||||
#endif
|
||||
|
||||
param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false);
|
||||
|
||||
|
||||
// miscellaneous
|
||||
param.SetParameter(RecoverPath, "recover-input-path",false);
|
||||
param.SetParameter(ReportHypoScore, "output-hypo-score",false);
|
||||
param.SetParameter(PrintID, "print-id",false);
|
||||
param.SetParameter(PrintPassThrough, "print-passthrough",false);
|
||||
param.SetParameter(detailed_all_transrep_filepath,
|
||||
"translation-all-details", e);
|
||||
param.SetParameter(detailed_transrep_filepath, "translation-details", e);
|
||||
param.SetParameter(detailed_tree_transrep_filepath,
|
||||
"tree-translation-details", e);
|
||||
|
||||
params = param.GetParam("lattice-samples");
|
||||
if (params) {
|
||||
if (params->size() ==2 ) {
|
||||
lattice_sample_filepath = params->at(0);
|
||||
lattice_sample_size = Scan<size_t>(params->at(1));
|
||||
} else {
|
||||
std::cerr <<"wrong format for switch -lattice-samples file size";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
lattice_sample_size = 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
ReportingOptions::
|
||||
update(std::map<std::string,xmlrpc_c::value>const& param)
|
||||
{
|
||||
ReportAllFactors = check(param, "report-all-factors");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -2,40 +2,59 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "moses/Parameter.h"
|
||||
#include "OptionsBaseClass.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
struct
|
||||
ReportingOptions
|
||||
ReportingOptions : public OptionsBaseClass
|
||||
{
|
||||
bool ReportAllFactors; // m_reportAllFactors;
|
||||
|
||||
WordAlignmentSort sort_word_alignment; // 0: no, 1: target order
|
||||
int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
|
||||
|
||||
bool PrintAlignmentInfo; // m_PrintAlignmentInfo
|
||||
WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
|
||||
std::string AlignmentOutputFile;
|
||||
|
||||
bool segmentation; // m_reportSegmentation;
|
||||
bool segmentation_enriched; // m_reportSegmentationEnriched;
|
||||
bool all_factors; // m_reportAllFactors;
|
||||
bool WordGraph;
|
||||
|
||||
bool output_word_graph;
|
||||
bool output_search_graph;
|
||||
bool output_search_graph_extended;
|
||||
bool output_search_graph_slf;
|
||||
bool output_search_graph_hypergraph;
|
||||
bool output_search_graph_protobuf;
|
||||
std::string SearchGraph;
|
||||
std::string SearchGraphExtended;
|
||||
std::string SearchGraphSLF;
|
||||
std::string SearchGraphHG;
|
||||
std::string SearchGraphPB;
|
||||
bool DontPruneSearchGraph;
|
||||
|
||||
bool RecoverPath; // recover input path?
|
||||
bool ReportHypoScore;
|
||||
|
||||
bool PrintID;
|
||||
bool PrintPassThrough;
|
||||
|
||||
// print ..
|
||||
bool aln_info; // m_PrintAlignmentInfo;
|
||||
bool id; // m_PrintID;
|
||||
bool passthrough; // m_PrintPassthroughInformation;
|
||||
|
||||
// transrep = translation reporting
|
||||
std::string detailed_transrep_filepath;
|
||||
std::string detailed_tree_transrep_filepath;
|
||||
std::string detailed_all_transrep_filepath;
|
||||
|
||||
std::string aln_output_file; // m_alignmentOutputFile;
|
||||
std::string lattice_sample_filepath;
|
||||
size_t lattice_sample_size;
|
||||
|
||||
bool init(Parameter const& param);
|
||||
|
||||
/// do we need to keep the search graph from decoding?
|
||||
bool NeedSearchGraph() const {
|
||||
return !(SearchGraph.empty() && SearchGraphExtended.empty());
|
||||
}
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool update(std::map<std::string,xmlrpc_c::value>const& param);
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -36,6 +36,7 @@ namespace Moses
|
||||
beam_width = TransformScore(beam_width);
|
||||
trans_opt_threshold = TransformScore(trans_opt_threshold);
|
||||
early_discarding_threshold = TransformScore(early_discarding_threshold);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -25,8 +25,7 @@ using Moses::Sentence;
|
||||
boost::shared_ptr<TranslationRequest>
|
||||
TranslationRequest::
|
||||
create(Translator* translator, xmlrpc_c::paramList const& paramList,
|
||||
boost::condition_variable& cond,
|
||||
boost::mutex& mut)
|
||||
boost::condition_variable& cond, boost::mutex& mut)
|
||||
{
|
||||
boost::shared_ptr<TranslationRequest> ret;
|
||||
ret.reset(new TranslationRequest(paramList, cond, mut));
|
||||
@ -60,10 +59,9 @@ Run()
|
||||
Moses::StaticData const& SD = Moses::StaticData::Instance();
|
||||
|
||||
//Make sure alternative paths are retained, if necessary
|
||||
if (m_withGraphInfo || m_nbestSize>0)
|
||||
// why on earth is this a global variable? Is this even thread-safe???? UG
|
||||
(const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
|
||||
|
||||
// if (m_withGraphInfo || m_nbestSize>0)
|
||||
// why on earth is this a global variable? Is this even thread-safe???? UG
|
||||
// (const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
|
||||
// std::stringstream out, graphInfo, transCollOpts;
|
||||
|
||||
if (SD.IsSyntax())
|
||||
@ -170,7 +168,14 @@ outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
|
||||
{
|
||||
TrellisPathList nBestList;
|
||||
vector<xmlrpc_c::value> nBestXml;
|
||||
manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct);
|
||||
manager.CalcNBest(m_options.nbest.nbest_size, nBestList,
|
||||
m_options.nbest.only_distinct);
|
||||
|
||||
StaticData const& SD = StaticData::Instance();
|
||||
manager.OutputNBest(cout, nBestList,
|
||||
SD.GetOutputFactorOrder(),
|
||||
m_source->GetTranslationId(),
|
||||
options().output.ReportSegmentation);
|
||||
|
||||
BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) {
|
||||
vector<const Hypothesis *> const& E = path->GetEdges();
|
||||
@ -180,7 +185,8 @@ outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
|
||||
if (m_withScoreBreakdown) {
|
||||
// should the score breakdown be reported in a more structured manner?
|
||||
ostringstream buf;
|
||||
path->GetScoreBreakdown()->OutputAllFeatureScores(buf);
|
||||
bool with_labels = m_options.nbest.include_feature_labels;
|
||||
path->GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels);
|
||||
nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
|
||||
}
|
||||
|
||||
@ -228,23 +234,23 @@ insertTranslationOptions(Moses::Manager& manager,
|
||||
retData["topt"] = xmlrpc_c::value_array(toptsXml);
|
||||
}
|
||||
|
||||
bool
|
||||
check(std::map<std::string, xmlrpc_c::value> const& params, std::string const key)
|
||||
{
|
||||
std::map<std::string, xmlrpc_c::value>::const_iterator m;
|
||||
return (params.find(key) != params.end());
|
||||
}
|
||||
|
||||
TranslationRequest::
|
||||
TranslationRequest(xmlrpc_c::paramList const& paramList,
|
||||
boost::condition_variable& cond, boost::mutex& mut)
|
||||
: m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList)
|
||||
, m_nbestSize(0)
|
||||
// , m_nbestSize(0)
|
||||
, m_session_id(0)
|
||||
{
|
||||
m_options = StaticData::Instance().options();
|
||||
}
|
||||
|
||||
bool
|
||||
check(std::map<std::string, xmlrpc_c::value> const& param,
|
||||
std::string const key)
|
||||
{
|
||||
std::map<std::string, xmlrpc_c::value>::const_iterator m;
|
||||
return (param.find(key) != param.end());
|
||||
}
|
||||
|
||||
void
|
||||
TranslationRequest::
|
||||
@ -274,10 +280,9 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
|
||||
m_withWordAlignInfo = check(params, "word-align");
|
||||
m_withGraphInfo = check(params, "sg");
|
||||
m_withTopts = check(params, "topt");
|
||||
m_reportAllFactors = check(params, "report-all-factors");
|
||||
m_nbestDistinct = check(params, "nbest-distinct");
|
||||
// m_reportAllFactors = check(params, "report-all-factors");
|
||||
// m_nbestDistinct = check(params, "nbest-distinct");
|
||||
m_withScoreBreakdown = check(params, "add-score-breakdown");
|
||||
m_source.reset(new Sentence(0,m_source_string));
|
||||
si = params.find("lambda");
|
||||
if (si != params.end())
|
||||
{
|
||||
@ -298,9 +303,9 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
|
||||
}
|
||||
}
|
||||
|
||||
si = params.find("nbest");
|
||||
if (si != params.end())
|
||||
m_nbestSize = xmlrpc_c::value_int(si->second);
|
||||
// si = params.find("nbest");
|
||||
// if (si != params.end())
|
||||
// m_nbestSize = xmlrpc_c::value_int(si->second);
|
||||
|
||||
si = params.find("context");
|
||||
if (si != params.end())
|
||||
@ -309,6 +314,8 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
|
||||
VERBOSE(1,"CONTEXT " << context);
|
||||
m_context.reset(new std::vector<std::string>(1,context));
|
||||
}
|
||||
|
||||
|
||||
// // biased sampling for suffix-array-based sampling phrase table?
|
||||
// if ((si = params.find("bias")) != params.end())
|
||||
// {
|
||||
@ -317,6 +324,7 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
|
||||
// for (size_t i = 1; i < tmp.size(); i += 2)
|
||||
// m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
|
||||
// }
|
||||
m_source.reset(new Sentence(0,m_source_string,m_options));
|
||||
} // end of Translationtask::parse_request()
|
||||
|
||||
|
||||
@ -326,7 +334,7 @@ run_chart_decoder()
|
||||
{
|
||||
Moses::TreeInput tinput;
|
||||
istringstream buf(m_source_string + "\n");
|
||||
tinput.Read(buf, StaticData::Instance().GetInputFactorOrder());
|
||||
tinput.Read(buf, StaticData::Instance().GetInputFactorOrder(), m_options);
|
||||
|
||||
Moses::ChartManager manager(this->self());
|
||||
manager.Decode();
|
||||
@ -393,8 +401,13 @@ void
|
||||
TranslationRequest::
|
||||
run_phrase_decoder()
|
||||
{
|
||||
if (m_withGraphInfo || m_options.nbest.nbest_size>0)
|
||||
m_options.output.SearchGraph = "true";
|
||||
|
||||
Manager manager(this->self());
|
||||
// if (m_bias.size()) manager.SetBias(&m_bias);
|
||||
|
||||
|
||||
manager.Decode();
|
||||
|
||||
pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
|
||||
@ -403,10 +416,10 @@ run_phrase_decoder()
|
||||
|
||||
if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
|
||||
if (m_withTopts) insertTranslationOptions(manager,m_retData);
|
||||
if (m_nbestSize) outputNBest(manager, m_retData);
|
||||
if (m_options.nbest.nbest_size) outputNBest(manager, m_retData);
|
||||
|
||||
(const_cast<StaticData&>(Moses::StaticData::Instance()))
|
||||
.SetOutputSearchGraph(false);
|
||||
// (const_cast<StaticData&>(Moses::StaticData::Instance()))
|
||||
// .SetOutputSearchGraph(false);
|
||||
// WTF? one more reason not to have this as global variable! --- UG
|
||||
|
||||
}
|
||||
|
@ -43,9 +43,9 @@ TranslationRequest : public virtual Moses::TranslationTask
|
||||
bool m_withGraphInfo;
|
||||
bool m_withTopts;
|
||||
bool m_reportAllFactors;
|
||||
bool m_nbestDistinct;
|
||||
// bool m_nbestDistinct;
|
||||
bool m_withScoreBreakdown;
|
||||
size_t m_nbestSize;
|
||||
// size_t m_nbestSize;
|
||||
|
||||
uint64_t m_session_id; // 0 means none, 1 means new
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user