Reorganisation of options.

The purpose of this effort is to have options local to the individual translation task,
so that they can be changed in the running system in a multi-threaded system.
This commit is contained in:
Ulrich Germann 2015-08-06 00:51:02 +01:00
parent fc10ad4afb
commit 524109e2ca
49 changed files with 709 additions and 433 deletions

View File

@ -140,6 +140,14 @@ void BaseManager::WriteApplicationContext(std::ostream &out,
}
}
AllOptions const&
BaseManager::
options() const
{
return GetTtask()->options();
}
} // namespace

View File

@ -5,7 +5,7 @@
#include <string>
#include "ScoreComponentCollection.h"
#include "InputType.h"
#include "moses/parameters/AllOptions.h"
namespace Moses
{
class ScoreComponentCollection;
@ -51,6 +51,7 @@ public:
//! the input sentence being decoded
const InputType& GetSource() const;
const ttasksptr GetTtask() const;
AllOptions const& options() const;
virtual void Decode() = 0;
// outputs

View File

@ -53,7 +53,7 @@ ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
ChartCellBase(startPos, endPos), m_manager(manager)
{
const StaticData &staticData = StaticData::Instance();
m_nBestIsEnabled = staticData.IsNBestEnabled();
m_nBestIsEnabled = staticData.options().nbest.enabled;
}
ChartCell::~ChartCell() {}
@ -100,7 +100,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
}
// pluck things out of queue and add to hypo collection
const size_t popLimit = staticData.GetCubePruningPopLimit();
const size_t popLimit = staticData.options().cube.pop_limit;
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
ChartHypothesis *hypo = queue.Pop();
AddHypothesis(hypo);

View File

@ -287,8 +287,11 @@ void ChartHypothesis::CleanupArcList()
* so we'll keep all of arc list if nedd distinct n-best list
*/
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
size_t nBestSize = staticData.options().nbest.nbest_size;
bool distinctNBest = (staticData.options().nbest.only_distinct
|| staticData.UseMBR()
|| staticData.GetOutputSearchGraph()
|| staticData.GetOutputSearchGraphHypergraph());
if (!distinctNBest && m_arcList->size() > nBestSize) {
// prune arc list only if there too many arcs

View File

@ -38,8 +38,8 @@ ChartHypothesisCollection::ChartHypothesisCollection()
const StaticData &staticData = StaticData::Instance();
m_beamWidth = staticData.GetBeamWidth();
m_maxHypoStackSize = staticData.GetMaxHypoStackSize();
m_nBestIsEnabled = staticData.IsNBestEnabled();
m_maxHypoStackSize = staticData.options().search.stack_size;
m_nBestIsEnabled = staticData.options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity();
}

View File

@ -207,7 +207,7 @@ void ChartManager::CalcNBest(
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
// too many translations are identical.
const StaticData &staticData = StaticData::Instance();
const std::size_t nBestFactor = staticData.GetNBestFactor();
const std::size_t nBestFactor = staticData.options().nbest.factor;
std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
// Extract the derivations.
@ -318,13 +318,14 @@ void ChartManager::OutputBest(OutputCollector *collector) const
void ChartManager::OutputNBest(OutputCollector *collector) const
{
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
size_t nBestSize = staticData.options().nbest.nbest_size;
if (nBestSize > 0) {
const size_t translationId = m_source.GetTranslationId();
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
<< staticData.options().nbest.output_file_path << endl);
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
CalcNBest(nBestSize, nBestList,staticData.options().nbest.only_distinct);
OutputNBestList(collector, nBestList, translationId);
IFVERBOSE(2) {
PrintUserTime("N-Best Hypotheses Generation Time:");
@ -348,10 +349,9 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
FixPrecision(out);
}
bool includeWordAlignment =
StaticData::Instance().PrintAlignmentInfoInNbest();
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
NBestOptions const& nbo = StaticData::Instance().options().nbest;
bool includeWordAlignment = nbo.include_alignment_info;
bool PrintNBestTrees = nbo.print_trees;
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {
@ -620,9 +620,9 @@ void ChartManager::OutputDetailedTranslationReport(
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
size_t nBestSize = staticData.GetNBestSize();
size_t nBestSize = staticData.options().nbest.nbest_size;
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
CalcNBest(nBestSize, nBestList, staticData.options().nbest.nbest_size);
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
}

View File

@ -106,7 +106,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
targetPhrase->SetTargetLHS(targetLHS);
targetPhrase->SetAlignmentInfo("0-0");
targetPhrase->EvaluateInIsolation(*unksrc);
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.PrintNBestTrees() || staticData.GetTreeStructure() != NULL) {
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) {
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
}

View File

@ -43,8 +43,10 @@ ConstrainedDecoding::ConstrainedDecoding(const std::string &line)
void ConstrainedDecoding::Load()
{
const StaticData &staticData = StaticData::Instance();
bool addBeginEndWord = (staticData.GetSearchAlgorithm() == CYKPlus) || (staticData.GetSearchAlgorithm() == ChartIncremental);
bool addBeginEndWord
= ((staticData.options().search.algo == CYKPlus)
|| (staticData.options().search.algo == ChartIncremental));
for(size_t i = 0; i < m_paths.size(); ++i) {
InputFileStream constraintFile(m_paths[i]);
std::string line;

View File

@ -19,8 +19,8 @@ HyperParameterAsWeight::HyperParameterAsWeight(const std::string &line)
vector<float> weights = staticData.GetWeights(this);
staticData.m_maxHypoStackSize = weights[0] * 1000;
staticData.m_beamWidth = weights[1] * 10;
staticData.options().search.stack_size = weights[0] * 1000;
staticData.options().search.beam_width = weights[1] * 10;
}

View File

@ -362,8 +362,8 @@ CleanupArcList()
* so we'll keep all of arc list if nedd distinct n-best list
*/
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
bool distinctNBest = (staticData.GetDistinctNBest() ||
size_t nBestSize = staticData.options().nbest.nbest_size;
bool distinctNBest = (staticData.options().nbest.only_distinct ||
staticData.GetLatticeSamplesSize() ||
staticData.UseMBR() ||
staticData.GetOutputSearchGraph() ||

View File

@ -36,7 +36,7 @@ namespace Moses
HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
HypothesisStack(manager)
{
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = -std::numeric_limits<float>::infinity();
}

View File

@ -36,7 +36,7 @@ namespace Moses
HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
HypothesisStack(manager)
{
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = -std::numeric_limits<float>::infinity();
}

View File

@ -96,8 +96,8 @@ IOWrapper::IOWrapper()
const StaticData &staticData = StaticData::Instance();
// context buffering for context-sensitive decoding
m_look_ahead = staticData.GetContextParameters().look_ahead;
m_look_back = staticData.GetContextParameters().look_back;
m_look_ahead = staticData.options().context.look_ahead;
m_look_back = staticData.options().context.look_back;
m_inputType = staticData.GetInputType();
@ -108,8 +108,8 @@ IOWrapper::IOWrapper()
m_inputFactorOrder = &staticData.GetInputFactorOrder();
size_t nBestSize = staticData.GetNBestSize();
string nBestFilePath = staticData.GetNBestFilePath();
size_t nBestSize = staticData.options().nbest.nbest_size;
string nBestFilePath = staticData.options().nbest.output_file_path;
staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", "");
if (m_inputFilePath.empty()) {

View File

@ -208,7 +208,7 @@ Manager::Manager(ttasksptr const& ttask)
: BaseManager(ttask)
, cells_(m_source, ChartCellBaseFactory(), parser_)
, parser_(ttask, cells_)
, n_best_(search::NBestConfig(StaticData::Instance().GetNBestSize()))
, n_best_(search::NBestConfig(StaticData::Instance().options().nbest.nbest_size))
{ }
Manager::~Manager()
@ -223,12 +223,17 @@ namespace
const float log_10 = logf(10);
}
template <class Model, class Best> search::History Manager::PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
template <class Model, class Best>
search::History
Manager::
PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
{
const LanguageModel &abstract = LanguageModel::GetFirstLM();
const float oov_weight = abstract.OOVFeatureEnabled() ? abstract.GetOOVWeight() : 0.0;
const StaticData &data = StaticData::Instance();
search::Config config(abstract.GetWeight() * log_10, data.GetCubePruningPopLimit(), search::NBestConfig(data.GetNBestSize()));
size_t cpl = data.options().cube.pop_limit;
size_t nbs = data.options().nbest.nbest_size;
search::Config config(abstract.GetWeight() * log_10, cpl, search::NBestConfig(nbs));
search::Context<Model> context(config, model);
size_t size = m_source.GetSize();
@ -255,7 +260,7 @@ template <class Model, class Best> search::History Manager::PopulateBest(const M
template <class Model> void Manager::LMCallback(const Model &model, const std::vector<lm::WordIndex> &words)
{
std::size_t nbest = StaticData::Instance().GetNBestSize();
std::size_t nbest = StaticData::Instance().options().nbest.nbest_size;
if (nbest <= 1) {
search::History ret = PopulateBest(model, words, single_best_);
if (ret) {

View File

@ -71,7 +71,7 @@ Manager::Manager(ttasksptr const& ttask)
m_transOptColl = source->CreateTranslationOptionCollection(ttask);
const StaticData &staticData = StaticData::Instance();
SearchAlgorithm searchAlgorithm = staticData.GetSearchAlgorithm();
SearchAlgorithm searchAlgorithm = staticData.options().search.algo;
m_search = Search::CreateSearch(*this, *source, searchAlgorithm,
*m_transOptColl);
@ -264,7 +264,7 @@ void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) co
}
// factor defines stopping point for distinct n-best list if too many candidates identical
size_t nBestFactor = StaticData::Instance().GetNBestFactor();
size_t nBestFactor = StaticData::Instance().options().nbest.factor;
if (nBestFactor < 1) nBestFactor = 1000; // 0 = unlimited
// MAIN loop
@ -288,7 +288,7 @@ void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) co
if(onlyDistinct) {
const size_t nBestFactor = StaticData::Instance().GetNBestFactor();
const size_t nBestFactor = StaticData::Instance().options().nbest.factor;
if (nBestFactor > 0)
contenders.Prune(count * nBestFactor);
} else {
@ -1548,10 +1548,10 @@ void Manager::OutputBest(OutputCollector *collector) const
// lattice MBR
if (staticData.UseLatticeMBR()) {
if (staticData.IsNBestEnabled()) {
if (staticData.options().nbest.enabled) {
//lattice mbr nbest
vector<LatticeMBRSolution> solutions;
size_t n = min(nBestSize, staticData.GetNBestSize());
size_t n = min(nBestSize, staticData.options().nbest.nbest_size);
getLatticeMBRNBest(*this,nBestList,solutions,n);
OutputLatticeMBRNBest(m_latticeNBestOut, solutions, translationId);
} else {
@ -1609,14 +1609,16 @@ void Manager::OutputNBest(OutputCollector *collector) const
long translationId = m_source.GetTranslationId();
if (staticData.UseLatticeMBR()) {
if (staticData.IsNBestEnabled()) {
if (staticData.options().nbest.enabled) {
collector->Write(translationId, m_latticeNBestOut.str());
}
} else {
TrellisPathList nBestList;
ostringstream out;
CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
CalcNBest(staticData.options().nbest.nbest_size, nBestList,
staticData.options().nbest.only_distinct);
OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(),
m_source.GetTranslationId(),
staticData.GetReportSegmentation());
collector->Write(m_source.GetTranslationId(), out.str());
}
@ -1630,9 +1632,10 @@ void Manager::OutputNBest(std::ostream& out
, char reportSegmentation) const
{
const StaticData &staticData = StaticData::Instance();
bool reportAllFactors = staticData.GetReportAllFactorsNBest();
bool includeSegmentation = staticData.NBestIncludesSegmentation();
bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
NBestOptions const& nbo = staticData.options().nbest;
bool reportAllFactors = nbo.include_all_factors;
bool includeSegmentation = nbo.include_segmentation;
bool includeWordAlignment = nbo.include_alignment_info;
TrellisPathList::const_iterator iter;
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {

View File

@ -44,7 +44,7 @@ RuleCube::RuleCube(const ChartTranslationOptions &transOpt,
{
RuleCubeItem *item = new RuleCubeItem(transOpt, allChartCells);
m_covered.insert(item);
if (StaticData::Instance().GetCubePruningLazyScoring()) {
if (StaticData::Instance().options().cube.lazy_scoring) {
item->EstimateScore();
} else {
item->CreateHypothesis(transOpt, manager);
@ -92,7 +92,7 @@ void RuleCube::CreateNeighbor(const RuleCubeItem &item, int dimensionIndex,
if (!result.second) {
delete newItem; // already seen it
} else {
if (StaticData::Instance().GetCubePruningLazyScoring()) {
if (StaticData::Instance().options().cube.lazy_scoring) {
newItem->EstimateScore();
} else {
newItem->CreateHypothesis(m_transOpt, manager);

View File

@ -50,7 +50,7 @@ ChartHypothesis *RuleCubeQueue::Pop()
// pop the most promising item from the cube and get the corresponding
// hypothesis
RuleCubeItem *item = cube->Pop(m_manager);
if (StaticData::Instance().GetCubePruningLazyScoring()) {
if (StaticData::Instance().options().cube.lazy_scoring) {
item->CreateHypothesis(cube->GetTranslationOption(), m_manager);
}
ChartHypothesis *hypo = item->ReleaseHypothesis();

View File

@ -330,7 +330,7 @@ void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
, std::string &lastName ) const
{
const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.IsLabeledNBestList();
bool labeledOutput = staticData.options().nbest.include_feature_labels;
// regular features (not sparse)
if (ff->HasTuneableComponents()) {

View File

@ -9,15 +9,20 @@ namespace Moses
Search::Search(Manager& manager)
: m_manager(manager)
,m_inputPath()
,m_initialTransOpt()
, m_inputPath()
, m_initialTransOpt()
, m_options(manager.options())
, interrupted_flag(0)
{
m_initialTransOpt.SetInputPath(m_inputPath);
}
Search *Search::CreateSearch(Manager& manager, const InputType &source,
SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl)
Search *
Search::
CreateSearch(Manager& manager, const InputType &source,
SearchAlgorithm searchAlgorithm,
const TranslationOptionCollection &transOptColl)
{
switch(searchAlgorithm) {
case Normal:
@ -32,4 +37,18 @@ Search *Search::CreateSearch(Manager& manager, const InputType &source,
}
}
bool
Search::
out_of_time()
{
int const& timelimit = m_options.search.timeout;
if (!timelimit) return false;
double elapsed_time = GetUserTime();
if (elapsed_time <= timelimit) return false;
VERBOSE(1,"Decoding is out of time (" << elapsed_time << ","
<< timelimit << ")" << std::endl);
interrupted_flag = 1;
return true;
}
}

View File

@ -43,6 +43,12 @@ protected:
Manager& m_manager;
InputPath m_inputPath; // for initial hypo
TranslationOption m_initialTransOpt; /**< used to seed 1st hypo */
AllOptions const& m_options;
/** flag indicating that decoder ran out of time (see switch -time-out) */
size_t interrupted_flag;
bool out_of_time();
};
}

View File

@ -48,8 +48,8 @@ SearchCubePruning::SearchCubePruning(Manager& manager, const InputType &source,
std::vector < HypothesisStackCubePruning >::iterator iterStack;
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind) {
HypothesisStackCubePruning *sourceHypoColl = new HypothesisStackCubePruning(m_manager);
sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize());
sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
sourceHypoColl->SetMaxHypoStackSize(m_options.search.stack_size);
sourceHypoColl->SetBeamWidth(m_options.search.beam_width);
m_hypoStackColl[ind] = sourceHypoColl;
}
@ -66,7 +66,8 @@ SearchCubePruning::~SearchCubePruning()
*/
void SearchCubePruning::Decode()
{
const StaticData &staticData = StaticData::Instance();
const StaticData &SD = StaticData::Instance();
AllOptions const& opts = SD.options();
// initial seed hypothesis: nothing translated, no words produced
Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTransOpt);
@ -77,20 +78,22 @@ void SearchCubePruning::Decode()
firstStack.CleanupArcList();
CreateForwardTodos(firstStack);
const size_t PopLimit = StaticData::Instance().GetCubePruningPopLimit();
VERBOSE(3,"Cube Pruning pop limit is " << PopLimit << std::endl)
const size_t PopLimit = StaticData::Instance().options().cube.pop_limit;
VERBOSE(3,"Cube Pruning pop limit is " << PopLimit << std::endl);
const size_t Diversity = StaticData::Instance().GetCubePruningDiversity();
const size_t Diversity = StaticData::Instance().options().cube.diversity;
VERBOSE(3,"Cube Pruning diversity is " << Diversity << std::endl)
// go through each stack
size_t stackNo = 1;
int timelimit = m_options.search.timeout;
std::vector < HypothesisStack* >::iterator iterStack;
for (iterStack = m_hypoStackColl.begin() + 1 ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
// check if decoding ran out of time
double _elapsed_time = GetUserTime();
if (_elapsed_time > staticData.GetTimeoutThreshold()) {
VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
if (timelimit && _elapsed_time > timelimit) {
VERBOSE(1,"Decoding is out of time (" << _elapsed_time << ","
<< timelimit << ")" << std::endl);
return;
}
HypothesisStackCubePruning &sourceHypoColl = *static_cast<HypothesisStackCubePruning*>(*iterStack);
@ -144,7 +147,7 @@ void SearchCubePruning::Decode()
IFVERBOSE(2) {
m_manager.GetSentenceStats().StartTimeStack();
}
sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
sourceHypoColl.PruneToSize(m_options.search.stack_size);
VERBOSE(3,std::endl);
sourceHypoColl.CleanupArcList();
IFVERBOSE(2) {

View File

@ -15,15 +15,21 @@ namespace Moses
* /param source input sentence
* /param transOptColl collection of translation options to be used for this sentence
*/
SearchNormal::SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl)
:Search(manager)
,m_source(source)
,m_hypoStackColl(source.GetSize() + 1)
,interrupted_flag(0)
,m_transOptColl(transOptColl)
SearchNormal::
SearchNormal(Manager& manager, const InputType &source,
const TranslationOptionCollection &transOptColl)
: Search(manager)
, m_source(source)
, m_hypoStackColl(source.GetSize() + 1)
, m_transOptColl(transOptColl)
{
VERBOSE(1, "Translating: " << m_source << endl);
const StaticData &staticData = StaticData::Instance();
// m_beam_width = manager.options().search.beam_width;
// m_stack_size = manager.options().search.stack_size;
// m_stack_diversity = manager.options().search.stack_diversity;
// m_timeout = manager.options().search.timeout;
// m_max_distortion = manager.options().reordering.max_distortion;
// only if constraint decoding (having to match a specified output)
// long sentenceID = source.GetTranslationId();
@ -32,10 +38,9 @@ SearchNormal::SearchNormal(Manager& manager, const InputType &source, const Tran
std::vector < HypothesisStackNormal >::iterator iterStack;
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind) {
HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal(m_manager);
sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize(),
staticData.GetMinHypoStackDiversity());
sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
sourceHypoColl->SetMaxHypoStackSize(this->m_options.search.stack_size,
this->m_options.search.stack_diversity);
sourceHypoColl->SetBeamWidth(this->m_options.search.beam_width);
m_hypoStackColl[ind] = sourceHypoColl;
}
}
@ -45,59 +50,49 @@ SearchNormal::~SearchNormal()
RemoveAllInColl(m_hypoStackColl);
}
bool
SearchNormal::
ProcessOneStack(HypothesisStack* hstack)
{
if (this->out_of_time()) return false;
SentenceStats &stats = m_manager.GetSentenceStats();
HypothesisStackNormal &sourceHypoColl
= *static_cast<HypothesisStackNormal*>(hstack);
// the stack is pruned before processing (lazy pruning):
VERBOSE(3,"processing hypothesis from next stack");
IFVERBOSE(2) stats.StartTimeStack();
sourceHypoColl.PruneToSize(m_options.search.stack_size);
VERBOSE(3,std::endl);
sourceHypoColl.CleanupArcList();
IFVERBOSE(2) stats.StopTimeStack();
// go through each hypothesis on the stack and try to expand it
BOOST_FOREACH(Hypothesis* h, sourceHypoColl)
ProcessOneHypothesis(*h);
return true;
}
/**
* Main decoder loop that translates a sentence by expanding
* hypotheses stack by stack, until the end of the sentence.
*/
void SearchNormal::Decode()
{
const StaticData &staticData = StaticData::Instance();
SentenceStats &stats = m_manager.GetSentenceStats();
// initial seed hypothesis: nothing translated, no words produced
Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTransOpt);
Hypothesis *hypo = Hypothesis::Create(m_manager, m_source, m_initialTransOpt);
m_hypoStackColl[0]->AddPrune(hypo);
// go through each stack
std::vector < HypothesisStack* >::iterator iterStack;
for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
// check if decoding ran out of time
double _elapsed_time = GetUserTime();
if (_elapsed_time > staticData.GetTimeoutThreshold()) {
VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
interrupted_flag = 1;
return;
}
HypothesisStackNormal &sourceHypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
// the stack is pruned before processing (lazy pruning):
VERBOSE(3,"processing hypothesis from next stack");
IFVERBOSE(2) {
stats.StartTimeStack();
}
sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
VERBOSE(3,std::endl);
sourceHypoColl.CleanupArcList();
IFVERBOSE(2) {
stats.StopTimeStack();
}
// go through each hypothesis on the stack and try to expand it
HypothesisStackNormal::const_iterator iterHypo;
for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo) {
Hypothesis &hypothesis = **iterHypo;
ProcessOneHypothesis(hypothesis); // expand the hypothesis
}
// some logging
IFVERBOSE(2) {
OutputHypoStackSize();
}
// this stack is fully expanded;
actual_hypoStack = &sourceHypoColl;
BOOST_FOREACH(HypothesisStack* hstack, m_hypoStackColl) {
if (!ProcessOneStack(hstack)) return;
IFVERBOSE(2) OutputHypoStackSize();
actual_hypoStack = static_cast<HypothesisStackNormal*>(hstack);
}
//OutputHypoStack();
}
@ -111,8 +106,8 @@ SearchNormal::
ProcessOneHypothesis(const Hypothesis &hypothesis)
{
// since we check for reordering limits, its good to have that limit handy
int maxDistortion = StaticData::Instance().GetMaxDistortion();
bool isWordLattice = StaticData::Instance().GetInputType() == WordLatticeInput;
// int maxDistortion = StaticData::Instance().GetMaxDistortion();
bool isWordLattice = m_source.GetType() == WordLatticeInput;
const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos();
@ -122,7 +117,7 @@ ProcessOneHypothesis(const Hypothesis &hypothesis)
ReoConstraint = m_source.GetReorderingConstraint();
// no limit of reordering: only check for overlap
if (maxDistortion < 0) {
if (m_options.reordering.max_distortion < 0) {
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) {
TranslationOptionList const* tol;
@ -152,7 +147,7 @@ ProcessOneHypothesis(const Hypothesis &hypothesis)
if(hypoBitmap.GetValue(startPos)) continue;
size_t maxSize = sourceSize - startPos;
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
size_t maxSizePhrase = m_options.search.max_phrase_length;
maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
@ -178,7 +173,7 @@ ProcessOneHypothesis(const Hypothesis &hypothesis)
WordsRange currentStartRange(startPos, startPos);
if(m_source.ComputeDistortionDistance(prevRange, currentStartRange)
> maxDistortion)
> m_options.reordering.max_distortion)
continue;
TranslationOptionList const* tol;
@ -227,7 +222,7 @@ ProcessOneHypothesis(const Hypothesis &hypothesis)
WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
if (m_source.ComputeDistortionDistance(extRange, bestNextExtension)
> maxDistortion) continue;
> m_options.reordering.max_distortion) continue;
// everything is fine, we're good to go
ExpandAllHypotheses(hypothesis, startPos, endPos);
@ -251,7 +246,7 @@ ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos
// early discarding: check if hypothesis is too bad to build
// this idea is explained in (Moore&Quirk, MT Summit 2007)
float expectedScore = 0.0f;
if (StaticData::Instance().UseEarlyDiscarding()) {
if (m_options.search.UseEarlyDiscarding()) {
// expected score is based on score of current hypothesis
expectedScore = hypothesis.GetScore();
@ -286,7 +281,7 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const Translat
SentenceStats &stats = m_manager.GetSentenceStats();
Hypothesis *newHypo;
if (! staticData.UseEarlyDiscarding()) {
if (! m_options.search.UseEarlyDiscarding()) {
// simple build, no questions asked
IFVERBOSE(2) {
stats.StartTimeBuildHyp();
@ -303,7 +298,7 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const Translat
// worst possible score may have changed -> recompute
size_t wordsTranslated = hypothesis.GetWordsBitmap().GetNumWordsCovered() + transOpt.GetSize();
float allowedScore = m_hypoStackColl[wordsTranslated]->GetWorstScore();
if (staticData.GetMinHypoStackDiversity()) {
if (m_options.search.stack_diversity) {
WordsBitmapID id = hypothesis.GetWordsBitmap().GetIDPlus(transOpt.GetStartPos(), transOpt.GetEndPos());
float allowedScoreForBitmap = m_hypoStackColl[wordsTranslated]->GetWorstScoreForBitmap( id );
allowedScore = std::min( allowedScore, allowedScoreForBitmap );

View File

@ -14,23 +14,38 @@ class Manager;
class InputType;
class TranslationOptionCollection;
/** Functions and variables you need to decoder an input using the phrase-based decoder (NO cube-pruning)
/** Functions and variables you need to decoder an input using the
* phrase-based decoder (NO cube-pruning)
* Instantiated by the Manager class
*/
class SearchNormal: public Search
{
protected:
const InputType &m_source;
std::vector < HypothesisStack* > m_hypoStackColl; /**< stacks to store hypotheses (partial translations) */
//! stacks to store hypotheses (partial translations)
// no of elements = no of words in source + 1
size_t interrupted_flag; /**< flag indicating that decoder ran out of time (see switch -time-out) */
HypothesisStackNormal* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/
const TranslationOptionCollection &m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
std::vector < HypothesisStack* > m_hypoStackColl;
/** actual (full expanded) stack of hypotheses*/
HypothesisStackNormal* actual_hypoStack;
/** pre-computed list of translation options for the phrases in this sentence */
const TranslationOptionCollection &m_transOptColl;
// functions for creating hypotheses
void ProcessOneHypothesis(const Hypothesis &hypothesis);
void ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos);
virtual void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore);
virtual bool
ProcessOneStack(HypothesisStack* hstack);
virtual void
ProcessOneHypothesis(const Hypothesis &hypothesis);
virtual void
ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos);
virtual void
ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt,
float expectedScore);
public:
SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);

View File

@ -3,6 +3,7 @@
#include "Manager.h"
#include "Hypothesis.h"
#include "util/exception.hh"
#include <boost/foreach.hpp>
//#include <google/profiler.h>
@ -14,7 +15,7 @@ SearchNormalBatch::SearchNormalBatch(Manager& manager, const InputType &source,
:SearchNormal(manager, source, transOptColl)
,m_batch_size(10000)
{
m_max_stack_size = StaticData::Instance().GetMaxHypoStackSize();
m_max_stack_size = m_options.search.stack_size;
// Split the feature functions into sets of stateless, stateful
// distributed lm, and stateful non-distributed.
@ -50,47 +51,13 @@ void SearchNormalBatch::Decode()
m_hypoStackColl[0]->AddPrune(hypo);
// go through each stack
std::vector < HypothesisStack* >::iterator iterStack;
for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
// check if decoding ran out of time
double _elapsed_time = GetUserTime();
if (_elapsed_time > staticData.GetTimeoutThreshold()) {
VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
interrupted_flag = 1;
return;
}
HypothesisStackNormal &sourceHypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
// the stack is pruned before processing (lazy pruning):
VERBOSE(3,"processing hypothesis from next stack");
IFVERBOSE(2) {
stats.StartTimeStack();
}
sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
VERBOSE(3,std::endl);
sourceHypoColl.CleanupArcList();
IFVERBOSE(2) {
stats.StopTimeStack();
}
// go through each hypothesis on the stack and try to expand it
HypothesisStackNormal::const_iterator iterHypo;
for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo) {
Hypothesis &hypothesis = **iterHypo;
ProcessOneHypothesis(hypothesis); // expand the hypothesis
}
EvalAndMergePartialHypos();
// some logging
IFVERBOSE(2) {
OutputHypoStackSize();
}
// this stack is fully expanded;
actual_hypoStack = &sourceHypoColl;
BOOST_FOREACH(HypothesisStack* hstack, m_hypoStackColl) {
if (!ProcessOneStack(hstack)) return;
EvalAndMergePartialHypos(); // <= THAT is the difference to SearchNormal!
IFVERBOSE(2) OutputHypoStackSize();
actual_hypoStack = static_cast<HypothesisStackNormal*>(hstack);
}
EvalAndMergePartialHypos();
EvalAndMergePartialHypos(); // <= THAT is the difference to SearchNormal!
}
/**
@ -106,7 +73,8 @@ void SearchNormalBatch::Decode()
void
SearchNormalBatch::
ExpandHypothesis(const Hypothesis &hypothesis,
const TranslationOption &transOpt, float expectedScore)
const TranslationOption &transOpt,
float expectedScore)
{
// Check if the number of partial hypotheses exceeds the batch size.
if (m_partial_hypos.size() >= m_batch_size) {

View File

@ -181,7 +181,7 @@ init(string line, std::vector<FactorType> const& factorOrder)
aux_interpret_dlt(line); // some poorly documented cache-based stuff
// if sentences is specified as "<passthrough tag1=""/>"
if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled()) {
if (SD.IsPassthroughEnabled() || SD.options().nbest.include_passthrough) {
string pthru = PassthroughSGML(line,"passthrough");
this->SetPassthroughInformation(pthru);
}

View File

@ -1,3 +1,4 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
// $Id$
// vim:tabstop=2
@ -63,8 +64,6 @@ StaticData::StaticData()
: m_sourceStartPosMattersForRecombination(false)
, m_requireSortingAfterSourceContext(false)
, m_inputType(SentenceInput)
// , m_onlyDistinctNBest(false)
// , m_needAlignmentInfo(false)
, m_lmEnableOOVFeature(false)
, m_isAlwaysCreateDirectTranslationOption(false)
, m_currentWeightSetting("default")
@ -80,16 +79,6 @@ StaticData::StaticData()
StaticData::~StaticData()
{
RemoveAllInColl(m_decodeGraphs);
/*
const std::vector<FeatureFunction*> &producers = FeatureFunction::GetFeatureFunctions();
for(size_t i=0;i<producers.size();++i) {
FeatureFunction *ff = producers[i];
delete ff;
}
*/
// memory pools
Phrase::FinalizeMemPool();
}
@ -199,39 +188,19 @@ StaticData
}
m_parameter->SetParameter(m_outputHypoScore, "output-hypo-score", false );
//word-to-word alignment
// alignments
m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false );
// if (m_PrintAlignmentInfo) { // => now in BookkeepingOptions::init()
// m_needAlignmentInfo = true;
// }
m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort);
// if (m_PrintAlignmentInfoNbest) { // => now in BookkeepingOptions::init()
// m_needAlignmentInfo = true;
// }
params = m_parameter->GetParam("alignment-output-file");
if (params && params->size()) {
m_alignmentOutputFile = Scan<std::string>(params->at(0));
// m_needAlignmentInfo = true; // => now in BookkeepingOptions::init()
}
m_parameter->SetParameter( m_PrintID, "print-id", false );
m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false );
// m_parameter->SetParameter( m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false ); // => now in BookkeepingOptions::init()
// word graph
params = m_parameter->GetParam("output-word-graph");
if (params && params->size() == 2)
m_outputWordGraph = true;
else
m_outputWordGraph = false;
m_outputWordGraph = (params && params->size() == 2);
// search graph
params = m_parameter->GetParam("output-search-graph");
if (params && params->size()) {
if (params->size() != 1) {
@ -240,6 +209,7 @@ StaticData
}
m_outputSearchGraph = true;
}
// ... in extended format
else if (m_parameter->GetParam("output-search-graph-extended") &&
m_parameter->GetParam("output-search-graph-extended")->size()) {
@ -298,15 +268,14 @@ StaticData
m_parameter->SetParameter(m_printAllDerivations , "print-all-derivations", false );
// additional output
m_parameter->SetParameter<string>(m_detailedTranslationReportingFilePath, "translation-details", "");
m_parameter->SetParameter<string>(m_detailedTreeFragmentsTranslationReportingFilePath, "tree-translation-details", "");
//DIMw
m_parameter->SetParameter<string>(m_detailedAllTranslationReportingFilePath, "translation-all-details", "");
m_parameter->SetParameter<string>(m_detailedTranslationReportingFilePath,
"translation-details", "");
m_parameter->SetParameter<string>(m_detailedTreeFragmentsTranslationReportingFilePath,
"tree-translation-details", "");
m_parameter->SetParameter<string>(m_detailedAllTranslationReportingFilePath,
"translation-all-details", "");
m_parameter->SetParameter<long>(m_startTranslationId, "start-translation-id", 0);
//lattice samples
params = m_parameter->GetParam("lattice-samples");
if (params) {
@ -323,14 +292,6 @@ StaticData
return true;
}
bool
StaticData
::ini_nbest_options()
{
return m_nbest_options.init(*m_parameter);
}
void
StaticData
::ini_compact_table_options()
@ -353,8 +314,8 @@ StaticData
::ini_performance_options()
{
const PARAM_VEC *params;
m_parameter->SetParameter<size_t>(m_timeout_threshold, "time-out", -1);
m_timeout = (GetTimeoutThreshold() == (size_t)-1) ? false : true;
// m_parameter->SetParameter<size_t>(m_timeout_threshold, "time-out", -1);
// m_timeout = (GetTimeoutThreshold() == (size_t)-1) ? false : true;
m_threadCount = 1;
params = m_parameter->GetParam("threads");
@ -388,18 +349,6 @@ StaticData
return true;
}
void
StaticData
::ini_cube_pruning_options()
{
m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit",
DEFAULT_CUBE_PRUNING_POP_LIMIT);
m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity",
DEFAULT_CUBE_PRUNING_DIVERSITY);
m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring",
false);
}
void
StaticData
::ini_factor_maps()
@ -453,45 +402,42 @@ void
StaticData
::ini_distortion_options()
{
// reordering constraints
m_parameter->SetParameter(m_maxDistortion, "distortion-limit", -1);
m_parameter->SetParameter(m_reorderingConstraint, "monotone-at-punctuation", false );
// early distortion cost
m_parameter->SetParameter(m_useEarlyDistortionCost, "early-distortion-cost", false );
// // reordering constraints
// m_parameter->SetParameter(m_maxDistortion, "distortion-limit", -1);
// m_parameter->SetParameter(m_reorderingConstraint, "monotone-at-punctuation", false );
// // early distortion cost
// m_parameter->SetParameter(m_useEarlyDistortionCost, "early-distortion-cost", false );
}
bool
StaticData
::ini_stack_decoding_options()
{
const PARAM_VEC *params;
// settings for pruning
m_parameter->SetParameter(m_maxHypoStackSize, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
// const PARAM_VEC *params;
// // settings for pruning
// m_parameter->SetParameter(m_maxHypoStackSize, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
m_minHypoStackDiversity = 0;
params = m_parameter->GetParam("stack-diversity");
if (params && params->size()) {
if (m_maxDistortion > 15) {
std::cerr << "stack diversity > 0 is not allowed for distortion limits larger than 15";
return false;
}
if (m_inputType == WordLatticeInput) {
std::cerr << "stack diversity > 0 is not allowed for lattice input";
return false;
}
m_minHypoStackDiversity = Scan<size_t>(params->at(0));
}
// m_minHypoStackDiversity = 0;
// params = m_parameter->GetParam("stack-diversity");
// if (params && params->size()) {
// if (m_maxDistortion > 15) {
// std::cerr << "stack diversity > 0 is not allowed for distortion limits larger than 15";
// return false;
// }
// if (m_inputType == WordLatticeInput) {
// std::cerr << "stack diversity > 0 is not allowed for lattice input";
// return false;
// }
// m_minHypoStackDiversity = Scan<size_t>(params->at(0));
// }
m_parameter->SetParameter(m_beamWidth, "beam-threshold", DEFAULT_BEAM_WIDTH);
m_beamWidth = TransformScore(m_beamWidth);
// m_parameter->SetParameter(m_beamWidth, "beam-threshold", DEFAULT_BEAM_WIDTH);
// m_beamWidth = TransformScore(m_beamWidth);
m_parameter->SetParameter(m_earlyDiscardingThreshold, "early-discarding-threshold", DEFAULT_EARLY_DISCARDING_THRESHOLD);
m_earlyDiscardingThreshold = TransformScore(m_earlyDiscardingThreshold);
// m_parameter->SetParameter(m_earlyDiscardingThreshold, "early-discarding-threshold", DEFAULT_EARLY_DISCARDING_THRESHOLD);
// m_earlyDiscardingThreshold = TransformScore(m_earlyDiscardingThreshold);
return true;
}
@ -499,12 +445,12 @@ void
StaticData
::ini_phrase_lookup_options()
{
m_parameter->SetParameter(m_translationOptionThreshold, "translation-option-threshold", DEFAULT_TRANSLATION_OPTION_THRESHOLD);
m_translationOptionThreshold = TransformScore(m_translationOptionThreshold);
// m_parameter->SetParameter(m_translationOptionThreshold, "translation-option-threshold", DEFAULT_TRANSLATION_OPTION_THRESHOLD);
// m_translationOptionThreshold = TransformScore(m_translationOptionThreshold);
m_parameter->SetParameter(m_maxNoTransOptPerCoverage, "max-trans-opt-per-coverage", DEFAULT_MAX_TRANS_OPT_SIZE);
m_parameter->SetParameter(m_maxNoPartTransOpt, "max-partial-trans-opt", DEFAULT_MAX_PART_TRANS_OPT_SIZE);
m_parameter->SetParameter(m_maxPhraseLength, "max-phrase-length", DEFAULT_MAX_PHRASE_LENGTH);
// m_parameter->SetParameter(m_maxNoTransOptPerCoverage, "max-trans-opt-per-coverage", DEFAULT_MAX_TRANS_OPT_SIZE);
// m_parameter->SetParameter(m_maxNoPartTransOpt, "max-partial-trans-opt", DEFAULT_MAX_PART_TRANS_OPT_SIZE);
// m_parameter->SetParameter(m_maxPhraseLength, "max-phrase-length", DEFAULT_MAX_PHRASE_LENGTH);
}
@ -583,10 +529,11 @@ bool StaticData::LoadData(Parameter *parameter)
const PARAM_VEC *params;
m_context_parameters.init(*parameter);
m_options.init(*parameter);
// m_context_parameters.init(*parameter);
// to cube or not to cube
m_parameter->SetParameter(m_searchAlgorithm, "search-algorithm", Normal);
// m_parameter->SetParameter(m_searchAlgorithm, "search-algorithm", Normal);
if (IsSyntax())
LoadChartDecodingParameters();
@ -596,7 +543,7 @@ bool StaticData::LoadData(Parameter *parameter)
ini_factor_maps();
ini_input_options();
m_bookkeeping_options.init(*parameter);
m_nbest_options.init(*parameter); // if (!ini_nbest_options()) return false;
// m_nbest_options.init(*parameter);
if (!ini_output_options()) return false;
// threading etc.
@ -609,7 +556,7 @@ bool StaticData::LoadData(Parameter *parameter)
ini_distortion_options();
if (!ini_stack_decoding_options()) return false;
ini_phrase_lookup_options();
ini_cube_pruning_options();
// ini_cube_pruning_options();
ini_oov_options();
ini_mbr_options();
@ -625,7 +572,7 @@ bool StaticData::LoadData(Parameter *parameter)
|| m_outputSearchGraphPB
#endif
|| m_latticeSamplesFilePath.size()) {
m_nbest_options.enabled = true;
m_options.nbest.enabled = true;
}
// S2T decoder
@ -1272,8 +1219,9 @@ StaticData
// FIXME Does this make sense for F2S? Perhaps it should be changed once
// FIXME the pipeline uses RuleTable consistently.
if (m_searchAlgorithm == SyntaxS2T || m_searchAlgorithm == SyntaxT2S ||
m_searchAlgorithm == SyntaxT2S_SCFG || m_searchAlgorithm == SyntaxF2S) {
SearchAlgorithm algo = m_options.search.algo;
if (algo == SyntaxS2T || algo == SyntaxT2S ||
algo == SyntaxT2S_SCFG || algo == SyntaxF2S) {
// Automatically override PhraseDictionary{Memory,Scope3}. This will
// have to change if the FF parameters diverge too much in the future,
// but for now it makes switching between the old and new decoders much

View File

@ -1,4 +1,4 @@
// -*- c++ -*-
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
// $Id$
/***********************************************************************
@ -44,8 +44,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/FF/Factory.h"
#include "moses/PP/Factory.h"
#include "moses/parameters/ContextParameters.h"
#include "moses/parameters/NBestOptions.h"
#include "moses/parameters/AllOptions.h"
#include "moses/parameters/BookkeepingOptions.h"
namespace Moses
@ -70,11 +69,10 @@ class StaticData
friend class HyperParameterAsWeight;
private:
static StaticData s_instance;
static StaticData s_instance;
protected:
Parameter *m_parameter;
ContextParameters m_context_parameters;
AllOptions m_options;
std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
mutable ScoreComponentCollection m_allWeights;
@ -84,34 +82,29 @@ protected:
// Initial = 0 = can be used when creating poss trans
// Other = 1 = used to calculate LM score once all steps have been processed
float
m_beamWidth,
m_earlyDiscardingThreshold,
m_translationOptionThreshold,
// m_beamWidth,
// m_earlyDiscardingThreshold,
// m_translationOptionThreshold,
m_wordDeletionWeight;
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
int m_maxDistortion;
// int m_maxDistortion;
// do it differently from old pharaoh
// -ve = no limit on distortion
// 0 = no disortion (monotone in old pharaoh)
bool m_reorderingConstraint; //! use additional reordering constraints
bool m_useEarlyDistortionCost;
size_t m_maxHypoStackSize; //! hypothesis-stack size that triggers pruning
size_t m_minHypoStackDiversity; //! minimum number of hypothesis in stack for each source word coverage;
NBestOptions m_nbest_options;
// bool m_useEarlyDistortionCost;
// size_t m_maxHypoStackSize; //! hypothesis-stack size that triggers pruning
// size_t m_minHypoStackDiversity; //! minimum number of hypothesis in stack for each source word coverage;
BookkeepingOptions m_bookkeeping_options;
// size_t m_nBestSize;
// size_t m_nBestFactor;
size_t m_latticeSamplesSize;
size_t m_maxNoTransOptPerCoverage;
size_t m_maxNoPartTransOpt;
size_t m_maxPhraseLength;
// size_t m_maxNoTransOptPerCoverage;
// size_t m_maxNoPartTransOpt;
// size_t m_maxPhraseLength;
// std::string m_nBestFilePath;
std::string m_latticeSamplesFilePath;
// bool m_labeledNBestList,m_nBestIncludesSegmentation;
bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them
bool m_markUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = mark and (ignore) them
std::string m_unknownWordPrefix;
@ -127,7 +120,7 @@ protected:
bool m_outputHypoScore;
bool m_requireSortingAfterSourceContext;
SearchAlgorithm m_searchAlgorithm;
// SearchAlgorithm m_searchAlgorithm;
InputTypeEnum m_inputType;
mutable size_t m_verboseLevel;
@ -135,21 +128,15 @@ protected:
bool m_reportSegmentation;
bool m_reportSegmentationEnriched;
bool m_reportAllFactors;
// bool m_reportAllFactorsNBest;
std::string m_detailedTranslationReportingFilePath;
std::string m_detailedTreeFragmentsTranslationReportingFilePath;
//DIMw
std::string m_detailedAllTranslationReportingFilePath;
// bool m_onlyDistinctNBest;
bool m_PrintAlignmentInfo;
// bool m_needAlignmentInfo; // => BookkeepingOptions
// bool m_PrintAlignmentInfoNbest;
bool m_PrintID;
bool m_PrintPassthroughInformation;
// bool m_PrintPassthroughInformationInNBest;
std::string m_alignmentOutputFile;
@ -174,8 +161,8 @@ protected:
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
bool m_lmEnableOOVFeature;
bool m_timeout; //! use timeout
size_t m_timeout_threshold; //! seconds after which time out is activated
// bool m_timeout; //! use timeout
// size_t m_timeout_threshold; //! seconds after which time out is activated
bool m_isAlwaysCreateDirectTranslationOption;
//! constructor. only the 1 static variable can be created
@ -192,9 +179,6 @@ protected:
bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph
std::string m_outputUnknownsFile; //! output unknowns in this file
size_t m_cubePruningPopLimit;
size_t m_cubePruningDiversity;
bool m_cubePruningLazyScoring;
size_t m_ruleLimit;
// Whether to load compact phrase table and reordering table into memory
@ -221,7 +205,6 @@ protected:
bool m_useLegacyPT;
bool m_defaultNonTermOnlyForEmptyRange;
S2TParsingAlgorithm m_s2tParsingAlgorithm;
// bool m_printNBestTrees;
FeatureRegistry m_registry;
PhrasePropertyFactory m_phrasePropertyFactory;
@ -260,7 +243,6 @@ protected:
void ini_lmbr_options();
void ini_mbr_options();
void ini_mira_options();
bool ini_nbest_options();
void ini_oov_options();
bool ini_output_options();
bool ini_performance_options();
@ -307,9 +289,14 @@ public:
return *m_parameter;
}
const ContextParameters&
GetContextParameters() const {
return m_context_parameters;
AllOptions const&
options() const {
return m_options;
}
AllOptions&
options() {
return m_options;
}
const std::vector<FactorType> &GetInputFactorOrder() const {
@ -338,32 +325,24 @@ public:
return m_disableDiscarding;
}
inline size_t GetMaxNoTransOptPerCoverage() const {
return m_maxNoTransOptPerCoverage;
return m_options.search.max_trans_opt_per_cov;
}
inline size_t GetMaxNoPartTransOpt() const {
return m_maxNoPartTransOpt;
return m_options.search.max_partial_trans_opt;
}
inline size_t GetMaxPhraseLength() const {
return m_maxPhraseLength;
return m_options.search.max_phrase_length;
}
bool IsWordDeletionEnabled() const {
return m_wordDeletionEnabled;
}
size_t GetMaxHypoStackSize() const {
return m_maxHypoStackSize;
}
size_t GetMinHypoStackDiversity() const {
return m_minHypoStackDiversity;
}
size_t GetCubePruningPopLimit() const {
return m_cubePruningPopLimit;
}
size_t GetCubePruningDiversity() const {
return m_cubePruningDiversity;
}
bool GetCubePruningLazyScoring() const {
return m_cubePruningLazyScoring;
}
// size_t GetMaxHypoStackSize() const {
// return m_options.search.stack_size;
// }
// size_t GetMinHypoStackDiversity() const {
// return m_options.search.stack_diversity;
// }
size_t IsPathRecoveryEnabled() const {
return m_recoverPath;
}
@ -373,30 +352,30 @@ public:
bool IsPassthroughEnabled() const {
return m_PrintPassthroughInformation;
}
bool IsPassthroughInNBestEnabled() const {
return m_nbest_options.include_passthrough;
// return m_PrintPassthroughInformationInNBest;
}
int GetMaxDistortion() const {
return m_maxDistortion;
return m_options.reordering.max_distortion;
}
bool UseReorderingConstraint() const {
return m_reorderingConstraint;
}
float GetBeamWidth() const {
return m_beamWidth;
return m_options.search.beam_width;
}
float GetEarlyDiscardingThreshold() const {
return m_earlyDiscardingThreshold;
return m_options.search.early_discarding_threshold;
}
bool UseEarlyDiscarding() const {
return m_earlyDiscardingThreshold != -std::numeric_limits<float>::infinity();
return m_options.search.early_discarding_threshold
!= -std::numeric_limits<float>::infinity();
}
bool UseEarlyDistortionCost() const {
return m_useEarlyDistortionCost;
return m_options.reordering.use_early_distortion_cost;
// return m_useEarlyDistortionCost;
}
float GetTranslationOptionThreshold() const {
return m_translationOptionThreshold;
return m_options.search.trans_opt_threshold;
}
size_t GetVerboseLevel() const {
@ -420,13 +399,11 @@ public:
else
std::cerr << "Warning: Invalid value for reportSegmentation (0 - 2)! Ignoring";
}
bool GetReportAllFactors() const {
return m_reportAllFactors;
}
bool GetReportAllFactorsNBest() const {
return m_nbest_options.include_all_factors;
// return m_reportAllFactorsNBest;
}
bool IsDetailedTranslationReportingEnabled() const {
return !m_detailedTranslationReportingFilePath.empty();
}
@ -444,10 +421,10 @@ public:
const std::string &GetDetailedTreeFragmentsTranslationReportingFilePath() const {
return m_detailedTreeFragmentsTranslationReportingFilePath;
}
bool IsLabeledNBestList() const {
return m_nbest_options.include_feature_labels;
// return m_labeledNBestList;
}
// bool IsLabeledNBestList() const {
// return m_options.nbest.include_feature_labels;
// }
bool UseMinphrInMemory() const {
return m_minphrMemory;
@ -458,26 +435,17 @@ public:
}
// for mert
size_t GetNBestSize() const {
return m_nbest_options.nbest_size;
// return m_nBestSize;
}
// size_t GetNBestSize() const {
// return m_options.nbest.nbest_size;
// }
const std::string &GetNBestFilePath() const {
return m_nbest_options.output_file_path;
// return m_nBestFilePath;
}
// const std::string &GetNBestFilePath() const {
// return m_options.nbest.output_file_path;
// }
bool IsNBestEnabled() const {
return m_nbest_options.enabled;
// return (!m_nBestFilePath.empty() || m_mbr || m_useLatticeMBR || m_mira ||
// m_outputSearchGraph || m_outputSearchGraphSLF ||
// m_outputSearchGraphHypergraph || m_useConsensusDecoding ||
// #ifdef HAVE_PROTOBUF
// m_outputSearchGraphPB ||
// #endif
// !m_latticeSamplesFilePath.empty());
}
// bool IsNBestEnabled() const {
// return m_options.nbest.enabled;
// }
size_t GetLatticeSamplesSize() const {
return m_latticeSamplesSize;
@ -487,10 +455,9 @@ public:
return m_latticeSamplesFilePath;
}
size_t GetNBestFactor() const {
return m_nbest_options.factor;
// return m_nBestFactor;
}
// size_t GetNBestFactor() const {
// return m_options.nbest.factor;
// }
bool GetOutputWordGraph() const {
return m_outputWordGraph;
}
@ -499,22 +466,15 @@ public:
InputTypeEnum GetInputType() const {
return m_inputType;
}
SearchAlgorithm GetSearchAlgorithm() const {
return m_searchAlgorithm;
}
// bool IsSyntax() const {
// return m_searchAlgorithm == CYKPlus ||
// m_searchAlgorithm == ChartIncremental ||
// m_searchAlgorithm == SyntaxS2T ||
// m_searchAlgorithm == SyntaxT2S ||
// m_searchAlgorithm == SyntaxT2S_SCFG ||
// m_searchAlgorithm == SyntaxF2S;
// SearchAlgorithm GetSearchAlgorithm() const {
// return m_searchAlgorithm;
// }
bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const {
if (algo == DefaultSearchAlgorithm)
algo = m_searchAlgorithm;
algo = m_options.search.algo;
return (algo == CYKPlus || algo == ChartIncremental ||
algo == SyntaxS2T || algo == SyntaxT2S ||
algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
@ -546,10 +506,9 @@ public:
//Weights for feature with fixed number of values
void SetWeights(const FeatureFunction* sp, const std::vector<float>& weights);
bool GetDistinctNBest() const {
return m_nbest_options.only_distinct;
// return m_onlyDistinctNBest;
}
// bool GetDistinctNBest() const {
// return m_options.nbest.only_distinct;
// }
const std::string& GetFactorDelimiter() const {
return m_factorDelimiter;
}
@ -603,12 +562,12 @@ public:
return m_lmbrMapWeight;
}
bool UseTimeout() const {
return m_timeout;
}
size_t GetTimeoutThreshold() const {
return m_timeout_threshold;
}
// bool UseTimeout() const {
// return m_timeout;
// }
// size_t GetTimeoutThreshold() const {
// return m_timeout_threshold;
// }
size_t GetLMCacheCleanupThreshold() const {
return m_lmcache_cleanup_threshold;
@ -722,19 +681,11 @@ public:
bool PrintAlignmentInfo() const {
return m_PrintAlignmentInfo;
}
bool PrintAlignmentInfoInNbest() const {
return m_nbest_options.include_alignment_info;
// return m_PrintAlignmentInfoNbest;
}
WordAlignmentSort GetWordAlignmentSort() const {
return m_wordAlignmentSort;
}
bool NBestIncludesSegmentation() const {
return m_nbest_options.include_segmentation;
// return m_nBestIncludesSegmentation;
}
bool GetHasAlternateWeightSettings() const {
return m_weightSetting.size() > 0;
}
@ -872,11 +823,6 @@ public:
return m_s2tParsingAlgorithm;
}
bool PrintNBestTrees() const {
return m_nbest_options.print_trees;
// return m_printNBestTrees;
}
bool RequireSortingAfterSourceContext() const {
return m_requireSortingAfterSourceContext;
}

View File

@ -59,9 +59,9 @@ void Manager<RuleMatcher>::Decode()
const StaticData &staticData = StaticData::Instance();
// Get various pruning-related constants.
const std::size_t popLimit = staticData.GetCubePruningPopLimit();
const std::size_t popLimit = staticData.options().cube.pop_limit;
const std::size_t ruleLimit = staticData.GetRuleLimit();
const std::size_t stackLimit = staticData.GetMaxHypoStackSize();
const std::size_t stackLimit = staticData.options().search.stack_size;
// Initialize the stacks.
InitializeStacks();
@ -254,7 +254,7 @@ void Manager<RuleMatcher>::ExtractKBest(
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
// too many translations are identical.
const StaticData &staticData = StaticData::Instance();
const std::size_t nBestFactor = staticData.GetNBestFactor();
const std::size_t nBestFactor = staticData.options().nbest.factor;
std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
// Extract the derivations.

View File

@ -52,8 +52,8 @@ void Manager::OutputNBest(OutputCollector *collector) const
long translationId = m_source.GetTranslationId();
KBestExtractor::KBestVec nBestList;
ExtractKBest(staticData.GetNBestSize(), nBestList,
staticData.GetDistinctNBest());
ExtractKBest(staticData.options().nbest.nbest_size, nBestList,
staticData.options().nbest.only_distinct);
OutputNBestList(collector, nBestList, translationId);
}
}
@ -90,8 +90,8 @@ void Manager::OutputNBestList(OutputCollector *collector,
FixPrecision(out);
}
bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
bool PrintNBestTrees = staticData.PrintNBestTrees();
bool includeWordAlignment = staticData.options().nbest.include_alignment_info;
bool PrintNBestTrees = staticData.options().nbest.print_trees; // PrintNBestTrees();
for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {

View File

@ -31,14 +31,14 @@ void RuleTableFF::Load()
SetFeaturesToApply();
const StaticData &staticData = StaticData::Instance();
if (staticData.GetSearchAlgorithm() == SyntaxF2S ||
staticData.GetSearchAlgorithm() == SyntaxT2S) {
if (staticData.options().search.algo == SyntaxF2S ||
staticData.options().search.algo == SyntaxT2S) {
F2S::HyperTree *trie = new F2S::HyperTree(this);
F2S::HyperTreeLoader loader;
loader.Load(m_input, m_output, m_filePath, *this, *trie,
m_sourceTerminalSet);
m_table = trie;
} else if (staticData.GetSearchAlgorithm() == SyntaxS2T) {
} else if (staticData.options().search.algo == SyntaxS2T) {
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
if (algorithm == RecursiveCYKPlus) {
S2T::RuleTrieCYKPlus *trie = new S2T::RuleTrieCYKPlus(this);
@ -53,7 +53,7 @@ void RuleTableFF::Load()
} else {
UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
}
} else if (staticData.GetSearchAlgorithm() == SyntaxT2S_SCFG) {
} else if (staticData.options().search.algo == SyntaxT2S_SCFG) {
T2S::RuleTrie *trie = new T2S::RuleTrie(this);
T2S::RuleTrieLoader loader;
loader.Load(m_input, m_output, m_filePath, *this, *trie);

View File

@ -162,9 +162,9 @@ void Manager<Parser>::Decode()
const StaticData &staticData = StaticData::Instance();
// Get various pruning-related constants.
const std::size_t popLimit = staticData.GetCubePruningPopLimit();
const std::size_t popLimit = staticData.options().cube.pop_limit;
const std::size_t ruleLimit = staticData.GetRuleLimit();
const std::size_t stackLimit = staticData.GetMaxHypoStackSize();
const std::size_t stackLimit = staticData.options().search.stack_size;
// Initialise the PChart and SChart.
InitializeCharts();
@ -302,7 +302,7 @@ void Manager<Parser>::ExtractKBest(
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
// too many translations are identical.
const StaticData &staticData = StaticData::Instance();
const std::size_t nBestFactor = staticData.GetNBestFactor();
const std::size_t nBestFactor = staticData.options().nbest.factor;
std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
// Extract the derivations.

View File

@ -96,9 +96,9 @@ void Manager<RuleMatcher>::Decode()
const StaticData &staticData = StaticData::Instance();
// Get various pruning-related constants.
const std::size_t popLimit = staticData.GetCubePruningPopLimit();
const std::size_t popLimit = this->options().cube.pop_limit;
const std::size_t ruleLimit = staticData.GetRuleLimit();
const std::size_t stackLimit = staticData.GetMaxHypoStackSize();
const std::size_t stackLimit = this->options().search.stack_size;
// Initialize the stacks.
InitializeStacks();
@ -214,7 +214,7 @@ void Manager<RuleMatcher>::ExtractKBest(
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
// too many translations are identical.
const StaticData &staticData = StaticData::Instance();
const std::size_t nBestFactor = staticData.GetNBestFactor();
const std::size_t nBestFactor = staticData.options().nbest.factor;
std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
// Extract the derivations.

View File

@ -100,7 +100,9 @@ TranslationTask
::TranslationTask(boost::shared_ptr<InputType> const& source,
boost::shared_ptr<IOWrapper> const& ioWrapper)
: m_source(source) , m_ioWrapper(ioWrapper)
{ }
{
m_options = StaticData::Instance().options();
}
TranslationTask::~TranslationTask()
{ }
@ -112,7 +114,7 @@ TranslationTask
{
boost::shared_ptr<BaseManager> manager;
StaticData const& staticData = StaticData::Instance();
if (algo == DefaultSearchAlgorithm) algo = staticData.GetSearchAlgorithm();
if (algo == DefaultSearchAlgorithm) algo = staticData.options().search.algo;
if (!staticData.IsSyntax(algo))
manager.reset(new Manager(this->self())); // phrase-based
@ -154,6 +156,13 @@ TranslationTask
return manager;
}
AllOptions const&
TranslationTask::
options() const
{
return m_options;
}
void TranslationTask::Run()
{
UTIL_THROW_IF2(!m_source || !m_ioWrapper,

View File

@ -43,8 +43,8 @@ class TranslationTask : public Moses::Task
operator=(TranslationTask const& other) {
return *this;
}
protected:
AllOptions m_options;
boost::weak_ptr<TranslationTask> m_self; // weak ptr to myself
boost::shared_ptr<ContextScope> m_scope; // sores local info
// pointer to ContextScope, which stores context-specific information
@ -134,6 +134,7 @@ public:
void SetContextWeights(std::string const& context_weights);
void ReSetContextWeights(std::map<std::string, float> const& new_weights);
AllOptions const& options() const;
protected:
boost::shared_ptr<Moses::InputType> m_source;

View File

@ -0,0 +1,31 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "AllOptions.h"
namespace Moses
{
AllOptions::
AllOptions(Parameter const& param)
{
init(param);
}
bool
AllOptions::
init(Parameter const& param)
{
if (!search.init(param)) return false;
if (!cube.init(param)) return false;
if (!nbest.init(param)) return false;
if (!reordering.init(param)) return false;
if (!context.init(param)) return false;
if (!input.init(param)) return false;
return sanity_check();
}
bool
AllOptions::
sanity_check()
{
return true;
}
}

View File

@ -0,0 +1,31 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include "moses/Parameter.h"
#include "SearchOptions.h"
#include "CubePruningOptions.h"
#include "NBestOptions.h"
#include "ReorderingOptions.h"
#include "ContextParameters.h"
#include "InputOptions.h"
namespace Moses
{
struct
AllOptions
{
SearchOptions search;
CubePruningOptions cube;
NBestOptions nbest;
ReorderingOptions reordering;
ContextParameters context;
InputOptions input;
// StackOptions stack;
// BeamSearchOptions beam;
bool init(Parameter const& param);
bool sanity_check();
AllOptions() {}
AllOptions(Parameter const& param);
};
}

View File

@ -0,0 +1,15 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include "moses/Parameter.h"
namespace Moses
{
struct
BeamSearchOptions
{
bool init(Parameter const& param);
BeamSearchOptions(Parameter const& param);
};
}

View File

@ -9,9 +9,9 @@ ContextParameters()
: look_ahead(0), look_back(0)
{ }
void
bool
ContextParameters::
init(Parameter& params)
init(Parameter const& params)
{
look_back = look_ahead = 0;
params.SetParameter(context_string, "context-string", std::string(""));
@ -19,12 +19,12 @@ init(Parameter& params)
params.SetParameter(context_window, "context-window", std::string(""));
if (context_window == "")
return;
return true;
if (context_window.substr(0,3) == "all")
{
look_back = look_ahead = std::numeric_limits<size_t>::max();
return;
return true;
}
size_t p = context_window.find_first_of("0123456789");
@ -47,5 +47,6 @@ init(Parameter& params)
else
UTIL_THROW2("Invalid specification of context window.");
}
return true;
}
}

View File

@ -12,7 +12,7 @@ class ContextParameters
{
public:
ContextParameters();
void init(Parameter& params);
bool init(Parameter const& params);
size_t look_ahead; // # of words to look ahead for context-sensitive decoding
size_t look_back; // # of works to look back for context-sensitive decoding
std::string context_string; // fixed context string specified on command line

View File

@ -0,0 +1,19 @@
// -*- mode: c++; cc-style: gnu -*-
#include "CubePruningOptions.h"
namespace Moses
{
bool
CubePruningOptions::
init(Parameter const& param)
{
param.SetParameter(pop_limit, "cube-pruning-pop-limit",
DEFAULT_CUBE_PRUNING_POP_LIMIT);
param.SetParameter(diversity, "cube-pruning-diversity",
DEFAULT_CUBE_PRUNING_DIVERSITY);
param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false);
return true;
}
}

View File

@ -0,0 +1,20 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include "moses/Parameter.h"
namespace Moses
{
struct
CubePruningOptions
{
size_t pop_limit;
size_t diversity;
bool lazy_scoring;
bool init(Parameter const& param);
CubePruningOptions(Parameter const& param);
CubePruningOptions() {};
};
}

View File

@ -0,0 +1,65 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "InputOptions.h"
#include <vector>
#include <iostream>
#include "moses/StaticData.h"
namespace Moses {
InputOptions::
InputOptions()
{
xml_brackets.first = "<";
xml_brackets.second = ">";
input_type = SentenceInput;
}
bool
InputOptions::
init(Parameter const& param)
{
param.SetParameter(input_type, "inputtype", SentenceInput);
if (input_type == SentenceInput)
{ VERBOSE(2, "input type is: text input"); }
else if (input_type == ConfusionNetworkInput)
{ VERBOSE(2, "input type is: confusion net"); }
else if (input_type == WordLatticeInput)
{ VERBOSE(2, "input type is: word lattice"); }
else if (input_type == TreeInputType)
{ VERBOSE(2, "input type is: tree"); }
else if (input_type == TabbedSentenceInput)
{ VERBOSE(2, "input type is: tabbed sentence"); }
else if (input_type == ForestInputType)
{ VERBOSE(2, "input type is: forest"); }
param.SetParameter(continue_partial_translation,
"continue-partial-translation", false);
param.SetParameter(default_non_term_only_for_empty_range,
"default-non-term-for-empty-range-only", false);
param.SetParameter<XmlInputType>(xml_policy, "xml-input", XmlPassThrough);
// specify XML tags opening and closing brackets for XML option
// Do we really want this to be configurable???? UG
const PARAM_VEC *pspec;
pspec = param.GetParam("xml-brackets");
if (pspec && pspec->size())
{
std::vector<std::string> brackets = Tokenize(pspec->at(0));
if(brackets.size()!=2)
{
std::cerr << "invalid xml-brackets value, "
<< "must specify exactly 2 blank-delimited strings "
<< "for XML tags opening and closing brackets" << std::endl;
exit(1);
}
xml_brackets.first= brackets[0];
xml_brackets.second=brackets[1];
VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
<< xml_brackets.first << " and "
<< xml_brackets.second << std::endl);
}
return true;
}
}

View File

@ -0,0 +1,25 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include "moses/Parameter.h"
#include <string>
namespace Moses
{
struct
InputOptions
{
bool continue_partial_translation;
bool default_non_term_only_for_empty_range; // whatever that means
InputTypeEnum input_type;
XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
std::pair<std::string,std::string> xml_brackets;
// strings to use as XML tags' opening and closing brackets.
// Default are "<" and ">"
bool init(Parameter const& param);
InputOptions();
};
}

View File

@ -1,4 +1,5 @@
// -*- mode: c++; cc-style: gnu -*-
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
namespace Moses

View File

@ -0,0 +1,21 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "ReorderingOptions.h"
namespace Moses {
ReorderingOptions::
ReorderingOptions(Parameter const& param)
{
init(param);
}
bool
ReorderingOptions::
init(Parameter const& param)
{
param.SetParameter(max_distortion, "distortion-limit", -1);
param.SetParameter(monotone_at_punct, "monotone-at-punctuation", false);
param.SetParameter(use_early_distortion_cost, "early-distortion-cost", false);
return true;
}
}

View File

@ -0,0 +1,20 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include "moses/Parameter.h"
namespace Moses
{
struct
ReorderingOptions
{
int max_distortion;
bool monotone_at_punct;
bool use_early_distortion_cost;
bool init(Parameter const& param);
ReorderingOptions(Parameter const& param);
ReorderingOptions() {}
};
}

View File

@ -0,0 +1,50 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#include "SearchOptions.h"
namespace Moses
{
SearchOptions::
SearchOptions(Parameter const& param)
: stack_diversity(0)
{
init(param);
}
bool
SearchOptions::
init(Parameter const& param)
{
param.SetParameter(algo, "search-algorithm", Normal);
param.SetParameter(stack_size, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
param.SetParameter(stack_diversity, "stack-diversity", size_t(0));
param.SetParameter(beam_width, "beam-threshold", DEFAULT_BEAM_WIDTH);
param.SetParameter(early_discarding_threshold, "early-discarding-threshold",
DEFAULT_EARLY_DISCARDING_THRESHOLD);
param.SetParameter(timeout, "time-out", 0);
param.SetParameter(max_phrase_length, "max-phrase-length",
DEFAULT_MAX_PHRASE_LENGTH);
param.SetParameter(trans_opt_threshold, "translation-option-threshold",
DEFAULT_TRANSLATION_OPTION_THRESHOLD);
param.SetParameter(max_trans_opt_per_cov, "max-trans-opt-per-coverage",
DEFAULT_MAX_TRANS_OPT_SIZE);
param.SetParameter(max_partial_trans_opt, "max-partial-trans-opt",
DEFAULT_MAX_PART_TRANS_OPT_SIZE);
// transformation to log of a few scores
beam_width = TransformScore(beam_width);
trans_opt_threshold = TransformScore(trans_opt_threshold);
early_discarding_threshold = TransformScore(early_discarding_threshold);
return true;
}
bool
is_syntax(SearchAlgorithm algo)
{
return (algo == CYKPlus || algo == ChartIncremental ||
algo == SyntaxS2T || algo == SyntaxT2S ||
algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
}
}

View File

@ -0,0 +1,44 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
#include <string>
#include "moses/Parameter.h"
namespace Moses
{
bool is_syntax(SearchAlgorithm algo);
struct
SearchOptions
{
SearchAlgorithm algo;
// stack decoding
size_t stack_size; // maxHypoStackSize;
size_t stack_diversity; // minHypoStackDiversity;
size_t max_phrase_length;
size_t max_trans_opt_per_cov;
size_t max_partial_trans_opt;
// beam search
float beam_width;
int timeout;
// reordering options
// bool reorderingConstraint; //! use additional reordering constraints
// bool useEarlyDistortionCost;
float early_discarding_threshold;
float trans_opt_threshold;
bool init(Parameter const& param);
SearchOptions(Parameter const& param);
SearchOptions() {}
bool UseEarlyDiscarding() const {
return early_discarding_threshold != -std::numeric_limits<float>::infinity();
}
};
}

View File

@ -57,7 +57,7 @@ die "Cannot locate input at $input" unless (-f $input);
my $local_moses_ini = MosesRegressionTesting::get_localized_moses_ini($conf, $data_dir, $results_dir);
my ($nbestfile,$nbestsize) = MosesRegressionTesting::get_nbestlist($conf);
if (defined($nbestsize) && $nbestsize > 0){
if (defined($nbestsize) && $nbestsize > 0) {
$NBEST=$nbestsize;
}