mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-08-16 15:00:33 +03:00
Reorganisation of options.
The purpose of this effort is to have options local to the individual translation task, so that they can be changed in the running system in a multi-threaded system.
This commit is contained in:
parent
fc10ad4afb
commit
524109e2ca
@ -140,6 +140,14 @@ void BaseManager::WriteApplicationContext(std::ostream &out,
|
||||
}
|
||||
}
|
||||
|
||||
AllOptions const&
|
||||
BaseManager::
|
||||
options() const
|
||||
{
|
||||
return GetTtask()->options();
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <string>
|
||||
#include "ScoreComponentCollection.h"
|
||||
#include "InputType.h"
|
||||
|
||||
#include "moses/parameters/AllOptions.h"
|
||||
namespace Moses
|
||||
{
|
||||
class ScoreComponentCollection;
|
||||
@ -51,6 +51,7 @@ public:
|
||||
//! the input sentence being decoded
|
||||
const InputType& GetSource() const;
|
||||
const ttasksptr GetTtask() const;
|
||||
AllOptions const& options() const;
|
||||
|
||||
virtual void Decode() = 0;
|
||||
// outputs
|
||||
|
@ -53,7 +53,7 @@ ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
|
||||
ChartCellBase(startPos, endPos), m_manager(manager)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
m_nBestIsEnabled = staticData.IsNBestEnabled();
|
||||
m_nBestIsEnabled = staticData.options().nbest.enabled;
|
||||
}
|
||||
|
||||
ChartCell::~ChartCell() {}
|
||||
@ -100,7 +100,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
|
||||
}
|
||||
|
||||
// pluck things out of queue and add to hypo collection
|
||||
const size_t popLimit = staticData.GetCubePruningPopLimit();
|
||||
const size_t popLimit = staticData.options().cube.pop_limit;
|
||||
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
|
||||
ChartHypothesis *hypo = queue.Pop();
|
||||
AddHypothesis(hypo);
|
||||
|
@ -287,8 +287,11 @@ void ChartHypothesis::CleanupArcList()
|
||||
* so we'll keep all of arc list if nedd distinct n-best list
|
||||
*/
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
bool distinctNBest = (staticData.options().nbest.only_distinct
|
||||
|| staticData.UseMBR()
|
||||
|| staticData.GetOutputSearchGraph()
|
||||
|| staticData.GetOutputSearchGraphHypergraph());
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize) {
|
||||
// prune arc list only if there too many arcs
|
||||
|
@ -38,8 +38,8 @@ ChartHypothesisCollection::ChartHypothesisCollection()
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
m_beamWidth = staticData.GetBeamWidth();
|
||||
m_maxHypoStackSize = staticData.GetMaxHypoStackSize();
|
||||
m_nBestIsEnabled = staticData.IsNBestEnabled();
|
||||
m_maxHypoStackSize = staticData.options().search.stack_size;
|
||||
m_nBestIsEnabled = staticData.options().nbest.enabled;
|
||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
|
@ -207,7 +207,7 @@ void ChartManager::CalcNBest(
|
||||
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
||||
// too many translations are identical.
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::size_t nBestFactor = staticData.GetNBestFactor();
|
||||
const std::size_t nBestFactor = staticData.options().nbest.factor;
|
||||
std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
|
||||
|
||||
// Extract the derivations.
|
||||
@ -318,13 +318,14 @@ void ChartManager::OutputBest(OutputCollector *collector) const
|
||||
void ChartManager::OutputNBest(OutputCollector *collector) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
if (nBestSize > 0) {
|
||||
const size_t translationId = m_source.GetTranslationId();
|
||||
|
||||
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
|
||||
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
|
||||
<< staticData.options().nbest.output_file_path << endl);
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
|
||||
CalcNBest(nBestSize, nBestList,staticData.options().nbest.only_distinct);
|
||||
OutputNBestList(collector, nBestList, translationId);
|
||||
IFVERBOSE(2) {
|
||||
PrintUserTime("N-Best Hypotheses Generation Time:");
|
||||
@ -348,10 +349,9 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
|
||||
FixPrecision(out);
|
||||
}
|
||||
|
||||
bool includeWordAlignment =
|
||||
StaticData::Instance().PrintAlignmentInfoInNbest();
|
||||
|
||||
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
|
||||
NBestOptions const& nbo = StaticData::Instance().options().nbest;
|
||||
bool includeWordAlignment = nbo.include_alignment_info;
|
||||
bool PrintNBestTrees = nbo.print_trees;
|
||||
|
||||
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
|
||||
p != nBestList.end(); ++p) {
|
||||
@ -620,9 +620,9 @@ void ChartManager::OutputDetailedTranslationReport(
|
||||
|
||||
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
|
||||
const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
|
||||
CalcNBest(nBestSize, nBestList, staticData.options().nbest.nbest_size);
|
||||
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
|
||||
targetPhrase->SetTargetLHS(targetLHS);
|
||||
targetPhrase->SetAlignmentInfo("0-0");
|
||||
targetPhrase->EvaluateInIsolation(*unksrc);
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.PrintNBestTrees() || staticData.GetTreeStructure() != NULL) {
|
||||
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) {
|
||||
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
|
||||
}
|
||||
|
||||
|
@ -43,8 +43,10 @@ ConstrainedDecoding::ConstrainedDecoding(const std::string &line)
|
||||
void ConstrainedDecoding::Load()
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
bool addBeginEndWord = (staticData.GetSearchAlgorithm() == CYKPlus) || (staticData.GetSearchAlgorithm() == ChartIncremental);
|
||||
|
||||
bool addBeginEndWord
|
||||
= ((staticData.options().search.algo == CYKPlus)
|
||||
|| (staticData.options().search.algo == ChartIncremental));
|
||||
|
||||
for(size_t i = 0; i < m_paths.size(); ++i) {
|
||||
InputFileStream constraintFile(m_paths[i]);
|
||||
std::string line;
|
||||
|
@ -19,8 +19,8 @@ HyperParameterAsWeight::HyperParameterAsWeight(const std::string &line)
|
||||
|
||||
vector<float> weights = staticData.GetWeights(this);
|
||||
|
||||
staticData.m_maxHypoStackSize = weights[0] * 1000;
|
||||
staticData.m_beamWidth = weights[1] * 10;
|
||||
staticData.options().search.stack_size = weights[0] * 1000;
|
||||
staticData.options().search.beam_width = weights[1] * 10;
|
||||
|
||||
}
|
||||
|
||||
|
@ -362,8 +362,8 @@ CleanupArcList()
|
||||
* so we'll keep all of arc list if nedd distinct n-best list
|
||||
*/
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
bool distinctNBest = (staticData.GetDistinctNBest() ||
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
bool distinctNBest = (staticData.options().nbest.only_distinct ||
|
||||
staticData.GetLatticeSamplesSize() ||
|
||||
staticData.UseMBR() ||
|
||||
staticData.GetOutputSearchGraph() ||
|
||||
|
@ -36,7 +36,7 @@ namespace Moses
|
||||
HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
|
||||
HypothesisStack(manager)
|
||||
{
|
||||
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
|
||||
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
|
||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||
m_worstScore = -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ namespace Moses
|
||||
HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
|
||||
HypothesisStack(manager)
|
||||
{
|
||||
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
|
||||
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
|
||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||
m_worstScore = -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
@ -96,8 +96,8 @@ IOWrapper::IOWrapper()
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// context buffering for context-sensitive decoding
|
||||
m_look_ahead = staticData.GetContextParameters().look_ahead;
|
||||
m_look_back = staticData.GetContextParameters().look_back;
|
||||
m_look_ahead = staticData.options().context.look_ahead;
|
||||
m_look_back = staticData.options().context.look_back;
|
||||
|
||||
m_inputType = staticData.GetInputType();
|
||||
|
||||
@ -108,8 +108,8 @@ IOWrapper::IOWrapper()
|
||||
|
||||
m_inputFactorOrder = &staticData.GetInputFactorOrder();
|
||||
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
string nBestFilePath = staticData.GetNBestFilePath();
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
string nBestFilePath = staticData.options().nbest.output_file_path;
|
||||
|
||||
staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", "");
|
||||
if (m_inputFilePath.empty()) {
|
||||
|
@ -208,7 +208,7 @@ Manager::Manager(ttasksptr const& ttask)
|
||||
: BaseManager(ttask)
|
||||
, cells_(m_source, ChartCellBaseFactory(), parser_)
|
||||
, parser_(ttask, cells_)
|
||||
, n_best_(search::NBestConfig(StaticData::Instance().GetNBestSize()))
|
||||
, n_best_(search::NBestConfig(StaticData::Instance().options().nbest.nbest_size))
|
||||
{ }
|
||||
|
||||
Manager::~Manager()
|
||||
@ -223,12 +223,17 @@ namespace
|
||||
const float log_10 = logf(10);
|
||||
}
|
||||
|
||||
template <class Model, class Best> search::History Manager::PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
|
||||
template <class Model, class Best>
|
||||
search::History
|
||||
Manager::
|
||||
PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
|
||||
{
|
||||
const LanguageModel &abstract = LanguageModel::GetFirstLM();
|
||||
const float oov_weight = abstract.OOVFeatureEnabled() ? abstract.GetOOVWeight() : 0.0;
|
||||
const StaticData &data = StaticData::Instance();
|
||||
search::Config config(abstract.GetWeight() * log_10, data.GetCubePruningPopLimit(), search::NBestConfig(data.GetNBestSize()));
|
||||
size_t cpl = data.options().cube.pop_limit;
|
||||
size_t nbs = data.options().nbest.nbest_size;
|
||||
search::Config config(abstract.GetWeight() * log_10, cpl, search::NBestConfig(nbs));
|
||||
search::Context<Model> context(config, model);
|
||||
|
||||
size_t size = m_source.GetSize();
|
||||
@ -255,7 +260,7 @@ template <class Model, class Best> search::History Manager::PopulateBest(const M
|
||||
|
||||
template <class Model> void Manager::LMCallback(const Model &model, const std::vector<lm::WordIndex> &words)
|
||||
{
|
||||
std::size_t nbest = StaticData::Instance().GetNBestSize();
|
||||
std::size_t nbest = StaticData::Instance().options().nbest.nbest_size;
|
||||
if (nbest <= 1) {
|
||||
search::History ret = PopulateBest(model, words, single_best_);
|
||||
if (ret) {
|
||||
|
@ -71,7 +71,7 @@ Manager::Manager(ttasksptr const& ttask)
|
||||
m_transOptColl = source->CreateTranslationOptionCollection(ttask);
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
SearchAlgorithm searchAlgorithm = staticData.GetSearchAlgorithm();
|
||||
SearchAlgorithm searchAlgorithm = staticData.options().search.algo;
|
||||
m_search = Search::CreateSearch(*this, *source, searchAlgorithm,
|
||||
*m_transOptColl);
|
||||
|
||||
@ -264,7 +264,7 @@ void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) co
|
||||
}
|
||||
|
||||
// factor defines stopping point for distinct n-best list if too many candidates identical
|
||||
size_t nBestFactor = StaticData::Instance().GetNBestFactor();
|
||||
size_t nBestFactor = StaticData::Instance().options().nbest.factor;
|
||||
if (nBestFactor < 1) nBestFactor = 1000; // 0 = unlimited
|
||||
|
||||
// MAIN loop
|
||||
@ -288,7 +288,7 @@ void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) co
|
||||
|
||||
|
||||
if(onlyDistinct) {
|
||||
const size_t nBestFactor = StaticData::Instance().GetNBestFactor();
|
||||
const size_t nBestFactor = StaticData::Instance().options().nbest.factor;
|
||||
if (nBestFactor > 0)
|
||||
contenders.Prune(count * nBestFactor);
|
||||
} else {
|
||||
@ -1548,10 +1548,10 @@ void Manager::OutputBest(OutputCollector *collector) const
|
||||
|
||||
// lattice MBR
|
||||
if (staticData.UseLatticeMBR()) {
|
||||
if (staticData.IsNBestEnabled()) {
|
||||
if (staticData.options().nbest.enabled) {
|
||||
//lattice mbr nbest
|
||||
vector<LatticeMBRSolution> solutions;
|
||||
size_t n = min(nBestSize, staticData.GetNBestSize());
|
||||
size_t n = min(nBestSize, staticData.options().nbest.nbest_size);
|
||||
getLatticeMBRNBest(*this,nBestList,solutions,n);
|
||||
OutputLatticeMBRNBest(m_latticeNBestOut, solutions, translationId);
|
||||
} else {
|
||||
@ -1609,14 +1609,16 @@ void Manager::OutputNBest(OutputCollector *collector) const
|
||||
long translationId = m_source.GetTranslationId();
|
||||
|
||||
if (staticData.UseLatticeMBR()) {
|
||||
if (staticData.IsNBestEnabled()) {
|
||||
if (staticData.options().nbest.enabled) {
|
||||
collector->Write(translationId, m_latticeNBestOut.str());
|
||||
}
|
||||
} else {
|
||||
TrellisPathList nBestList;
|
||||
ostringstream out;
|
||||
CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
|
||||
OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
|
||||
CalcNBest(staticData.options().nbest.nbest_size, nBestList,
|
||||
staticData.options().nbest.only_distinct);
|
||||
OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(),
|
||||
m_source.GetTranslationId(),
|
||||
staticData.GetReportSegmentation());
|
||||
collector->Write(m_source.GetTranslationId(), out.str());
|
||||
}
|
||||
@ -1630,9 +1632,10 @@ void Manager::OutputNBest(std::ostream& out
|
||||
, char reportSegmentation) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
bool reportAllFactors = staticData.GetReportAllFactorsNBest();
|
||||
bool includeSegmentation = staticData.NBestIncludesSegmentation();
|
||||
bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
|
||||
NBestOptions const& nbo = staticData.options().nbest;
|
||||
bool reportAllFactors = nbo.include_all_factors;
|
||||
bool includeSegmentation = nbo.include_segmentation;
|
||||
bool includeWordAlignment = nbo.include_alignment_info;
|
||||
|
||||
TrellisPathList::const_iterator iter;
|
||||
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
|
||||
|
@ -44,7 +44,7 @@ RuleCube::RuleCube(const ChartTranslationOptions &transOpt,
|
||||
{
|
||||
RuleCubeItem *item = new RuleCubeItem(transOpt, allChartCells);
|
||||
m_covered.insert(item);
|
||||
if (StaticData::Instance().GetCubePruningLazyScoring()) {
|
||||
if (StaticData::Instance().options().cube.lazy_scoring) {
|
||||
item->EstimateScore();
|
||||
} else {
|
||||
item->CreateHypothesis(transOpt, manager);
|
||||
@ -92,7 +92,7 @@ void RuleCube::CreateNeighbor(const RuleCubeItem &item, int dimensionIndex,
|
||||
if (!result.second) {
|
||||
delete newItem; // already seen it
|
||||
} else {
|
||||
if (StaticData::Instance().GetCubePruningLazyScoring()) {
|
||||
if (StaticData::Instance().options().cube.lazy_scoring) {
|
||||
newItem->EstimateScore();
|
||||
} else {
|
||||
newItem->CreateHypothesis(m_transOpt, manager);
|
||||
|
@ -50,7 +50,7 @@ ChartHypothesis *RuleCubeQueue::Pop()
|
||||
// pop the most promising item from the cube and get the corresponding
|
||||
// hypothesis
|
||||
RuleCubeItem *item = cube->Pop(m_manager);
|
||||
if (StaticData::Instance().GetCubePruningLazyScoring()) {
|
||||
if (StaticData::Instance().options().cube.lazy_scoring) {
|
||||
item->CreateHypothesis(cube->GetTranslationOption(), m_manager);
|
||||
}
|
||||
ChartHypothesis *hypo = item->ReleaseHypothesis();
|
||||
|
@ -330,7 +330,7 @@ void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
|
||||
, std::string &lastName ) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
bool labeledOutput = staticData.IsLabeledNBestList();
|
||||
bool labeledOutput = staticData.options().nbest.include_feature_labels;
|
||||
|
||||
// regular features (not sparse)
|
||||
if (ff->HasTuneableComponents()) {
|
||||
|
@ -9,15 +9,20 @@ namespace Moses
|
||||
|
||||
Search::Search(Manager& manager)
|
||||
: m_manager(manager)
|
||||
,m_inputPath()
|
||||
,m_initialTransOpt()
|
||||
, m_inputPath()
|
||||
, m_initialTransOpt()
|
||||
, m_options(manager.options())
|
||||
, interrupted_flag(0)
|
||||
{
|
||||
m_initialTransOpt.SetInputPath(m_inputPath);
|
||||
}
|
||||
|
||||
|
||||
Search *Search::CreateSearch(Manager& manager, const InputType &source,
|
||||
SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl)
|
||||
Search *
|
||||
Search::
|
||||
CreateSearch(Manager& manager, const InputType &source,
|
||||
SearchAlgorithm searchAlgorithm,
|
||||
const TranslationOptionCollection &transOptColl)
|
||||
{
|
||||
switch(searchAlgorithm) {
|
||||
case Normal:
|
||||
@ -32,4 +37,18 @@ Search *Search::CreateSearch(Manager& manager, const InputType &source,
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Search::
|
||||
out_of_time()
|
||||
{
|
||||
int const& timelimit = m_options.search.timeout;
|
||||
if (!timelimit) return false;
|
||||
double elapsed_time = GetUserTime();
|
||||
if (elapsed_time <= timelimit) return false;
|
||||
VERBOSE(1,"Decoding is out of time (" << elapsed_time << ","
|
||||
<< timelimit << ")" << std::endl);
|
||||
interrupted_flag = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -43,6 +43,12 @@ protected:
|
||||
Manager& m_manager;
|
||||
InputPath m_inputPath; // for initial hypo
|
||||
TranslationOption m_initialTransOpt; /**< used to seed 1st hypo */
|
||||
AllOptions const& m_options;
|
||||
|
||||
/** flag indicating that decoder ran out of time (see switch -time-out) */
|
||||
size_t interrupted_flag;
|
||||
|
||||
bool out_of_time();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -48,8 +48,8 @@ SearchCubePruning::SearchCubePruning(Manager& manager, const InputType &source,
|
||||
std::vector < HypothesisStackCubePruning >::iterator iterStack;
|
||||
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind) {
|
||||
HypothesisStackCubePruning *sourceHypoColl = new HypothesisStackCubePruning(m_manager);
|
||||
sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize());
|
||||
sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
|
||||
sourceHypoColl->SetMaxHypoStackSize(m_options.search.stack_size);
|
||||
sourceHypoColl->SetBeamWidth(m_options.search.beam_width);
|
||||
|
||||
m_hypoStackColl[ind] = sourceHypoColl;
|
||||
}
|
||||
@ -66,7 +66,8 @@ SearchCubePruning::~SearchCubePruning()
|
||||
*/
|
||||
void SearchCubePruning::Decode()
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const StaticData &SD = StaticData::Instance();
|
||||
AllOptions const& opts = SD.options();
|
||||
|
||||
// initial seed hypothesis: nothing translated, no words produced
|
||||
Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTransOpt);
|
||||
@ -77,20 +78,22 @@ void SearchCubePruning::Decode()
|
||||
firstStack.CleanupArcList();
|
||||
CreateForwardTodos(firstStack);
|
||||
|
||||
const size_t PopLimit = StaticData::Instance().GetCubePruningPopLimit();
|
||||
VERBOSE(3,"Cube Pruning pop limit is " << PopLimit << std::endl)
|
||||
const size_t PopLimit = StaticData::Instance().options().cube.pop_limit;
|
||||
VERBOSE(3,"Cube Pruning pop limit is " << PopLimit << std::endl);
|
||||
|
||||
const size_t Diversity = StaticData::Instance().GetCubePruningDiversity();
|
||||
const size_t Diversity = StaticData::Instance().options().cube.diversity;
|
||||
VERBOSE(3,"Cube Pruning diversity is " << Diversity << std::endl)
|
||||
|
||||
// go through each stack
|
||||
size_t stackNo = 1;
|
||||
int timelimit = m_options.search.timeout;
|
||||
std::vector < HypothesisStack* >::iterator iterStack;
|
||||
for (iterStack = m_hypoStackColl.begin() + 1 ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
|
||||
// check if decoding ran out of time
|
||||
double _elapsed_time = GetUserTime();
|
||||
if (_elapsed_time > staticData.GetTimeoutThreshold()) {
|
||||
VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
|
||||
if (timelimit && _elapsed_time > timelimit) {
|
||||
VERBOSE(1,"Decoding is out of time (" << _elapsed_time << ","
|
||||
<< timelimit << ")" << std::endl);
|
||||
return;
|
||||
}
|
||||
HypothesisStackCubePruning &sourceHypoColl = *static_cast<HypothesisStackCubePruning*>(*iterStack);
|
||||
@ -144,7 +147,7 @@ void SearchCubePruning::Decode()
|
||||
IFVERBOSE(2) {
|
||||
m_manager.GetSentenceStats().StartTimeStack();
|
||||
}
|
||||
sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
|
||||
sourceHypoColl.PruneToSize(m_options.search.stack_size);
|
||||
VERBOSE(3,std::endl);
|
||||
sourceHypoColl.CleanupArcList();
|
||||
IFVERBOSE(2) {
|
||||
|
@ -15,15 +15,21 @@ namespace Moses
|
||||
* /param source input sentence
|
||||
* /param transOptColl collection of translation options to be used for this sentence
|
||||
*/
|
||||
SearchNormal::SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl)
|
||||
:Search(manager)
|
||||
,m_source(source)
|
||||
,m_hypoStackColl(source.GetSize() + 1)
|
||||
,interrupted_flag(0)
|
||||
,m_transOptColl(transOptColl)
|
||||
SearchNormal::
|
||||
SearchNormal(Manager& manager, const InputType &source,
|
||||
const TranslationOptionCollection &transOptColl)
|
||||
: Search(manager)
|
||||
, m_source(source)
|
||||
, m_hypoStackColl(source.GetSize() + 1)
|
||||
, m_transOptColl(transOptColl)
|
||||
{
|
||||
VERBOSE(1, "Translating: " << m_source << endl);
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// m_beam_width = manager.options().search.beam_width;
|
||||
// m_stack_size = manager.options().search.stack_size;
|
||||
// m_stack_diversity = manager.options().search.stack_diversity;
|
||||
// m_timeout = manager.options().search.timeout;
|
||||
// m_max_distortion = manager.options().reordering.max_distortion;
|
||||
|
||||
// only if constraint decoding (having to match a specified output)
|
||||
// long sentenceID = source.GetTranslationId();
|
||||
@ -32,10 +38,9 @@ SearchNormal::SearchNormal(Manager& manager, const InputType &source, const Tran
|
||||
std::vector < HypothesisStackNormal >::iterator iterStack;
|
||||
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind) {
|
||||
HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal(m_manager);
|
||||
sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize(),
|
||||
staticData.GetMinHypoStackDiversity());
|
||||
sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
|
||||
|
||||
sourceHypoColl->SetMaxHypoStackSize(this->m_options.search.stack_size,
|
||||
this->m_options.search.stack_diversity);
|
||||
sourceHypoColl->SetBeamWidth(this->m_options.search.beam_width);
|
||||
m_hypoStackColl[ind] = sourceHypoColl;
|
||||
}
|
||||
}
|
||||
@ -45,59 +50,49 @@ SearchNormal::~SearchNormal()
|
||||
RemoveAllInColl(m_hypoStackColl);
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
SearchNormal::
|
||||
ProcessOneStack(HypothesisStack* hstack)
|
||||
{
|
||||
if (this->out_of_time()) return false;
|
||||
SentenceStats &stats = m_manager.GetSentenceStats();
|
||||
HypothesisStackNormal &sourceHypoColl
|
||||
= *static_cast<HypothesisStackNormal*>(hstack);
|
||||
|
||||
// the stack is pruned before processing (lazy pruning):
|
||||
VERBOSE(3,"processing hypothesis from next stack");
|
||||
IFVERBOSE(2) stats.StartTimeStack();
|
||||
sourceHypoColl.PruneToSize(m_options.search.stack_size);
|
||||
VERBOSE(3,std::endl);
|
||||
sourceHypoColl.CleanupArcList();
|
||||
IFVERBOSE(2) stats.StopTimeStack();
|
||||
|
||||
// go through each hypothesis on the stack and try to expand it
|
||||
BOOST_FOREACH(Hypothesis* h, sourceHypoColl)
|
||||
ProcessOneHypothesis(*h);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Main decoder loop that translates a sentence by expanding
|
||||
* hypotheses stack by stack, until the end of the sentence.
|
||||
*/
|
||||
void SearchNormal::Decode()
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
SentenceStats &stats = m_manager.GetSentenceStats();
|
||||
|
||||
// initial seed hypothesis: nothing translated, no words produced
|
||||
Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTransOpt);
|
||||
Hypothesis *hypo = Hypothesis::Create(m_manager, m_source, m_initialTransOpt);
|
||||
m_hypoStackColl[0]->AddPrune(hypo);
|
||||
|
||||
// go through each stack
|
||||
std::vector < HypothesisStack* >::iterator iterStack;
|
||||
for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
|
||||
// check if decoding ran out of time
|
||||
double _elapsed_time = GetUserTime();
|
||||
if (_elapsed_time > staticData.GetTimeoutThreshold()) {
|
||||
VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
|
||||
interrupted_flag = 1;
|
||||
return;
|
||||
}
|
||||
HypothesisStackNormal &sourceHypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
|
||||
|
||||
// the stack is pruned before processing (lazy pruning):
|
||||
VERBOSE(3,"processing hypothesis from next stack");
|
||||
IFVERBOSE(2) {
|
||||
stats.StartTimeStack();
|
||||
}
|
||||
sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
|
||||
VERBOSE(3,std::endl);
|
||||
sourceHypoColl.CleanupArcList();
|
||||
IFVERBOSE(2) {
|
||||
stats.StopTimeStack();
|
||||
}
|
||||
|
||||
// go through each hypothesis on the stack and try to expand it
|
||||
HypothesisStackNormal::const_iterator iterHypo;
|
||||
for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo) {
|
||||
Hypothesis &hypothesis = **iterHypo;
|
||||
ProcessOneHypothesis(hypothesis); // expand the hypothesis
|
||||
}
|
||||
// some logging
|
||||
IFVERBOSE(2) {
|
||||
OutputHypoStackSize();
|
||||
}
|
||||
|
||||
// this stack is fully expanded;
|
||||
actual_hypoStack = &sourceHypoColl;
|
||||
|
||||
BOOST_FOREACH(HypothesisStack* hstack, m_hypoStackColl) {
|
||||
if (!ProcessOneStack(hstack)) return;
|
||||
IFVERBOSE(2) OutputHypoStackSize();
|
||||
actual_hypoStack = static_cast<HypothesisStackNormal*>(hstack);
|
||||
}
|
||||
//OutputHypoStack();
|
||||
}
|
||||
|
||||
|
||||
@ -111,8 +106,8 @@ SearchNormal::
|
||||
ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
{
|
||||
// since we check for reordering limits, its good to have that limit handy
|
||||
int maxDistortion = StaticData::Instance().GetMaxDistortion();
|
||||
bool isWordLattice = StaticData::Instance().GetInputType() == WordLatticeInput;
|
||||
// int maxDistortion = StaticData::Instance().GetMaxDistortion();
|
||||
bool isWordLattice = m_source.GetType() == WordLatticeInput;
|
||||
|
||||
const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
|
||||
const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos();
|
||||
@ -122,7 +117,7 @@ ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
ReoConstraint = m_source.GetReorderingConstraint();
|
||||
|
||||
// no limit of reordering: only check for overlap
|
||||
if (maxDistortion < 0) {
|
||||
if (m_options.reordering.max_distortion < 0) {
|
||||
|
||||
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) {
|
||||
TranslationOptionList const* tol;
|
||||
@ -152,7 +147,7 @@ ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
if(hypoBitmap.GetValue(startPos)) continue;
|
||||
|
||||
size_t maxSize = sourceSize - startPos;
|
||||
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
|
||||
size_t maxSizePhrase = m_options.search.max_phrase_length;
|
||||
maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
|
||||
size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
|
||||
|
||||
@ -178,7 +173,7 @@ ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
|
||||
WordsRange currentStartRange(startPos, startPos);
|
||||
if(m_source.ComputeDistortionDistance(prevRange, currentStartRange)
|
||||
> maxDistortion)
|
||||
> m_options.reordering.max_distortion)
|
||||
continue;
|
||||
|
||||
TranslationOptionList const* tol;
|
||||
@ -227,7 +222,7 @@ ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
|
||||
|
||||
if (m_source.ComputeDistortionDistance(extRange, bestNextExtension)
|
||||
> maxDistortion) continue;
|
||||
> m_options.reordering.max_distortion) continue;
|
||||
|
||||
// everything is fine, we're good to go
|
||||
ExpandAllHypotheses(hypothesis, startPos, endPos);
|
||||
@ -251,7 +246,7 @@ ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos
|
||||
// early discarding: check if hypothesis is too bad to build
|
||||
// this idea is explained in (Moore&Quirk, MT Summit 2007)
|
||||
float expectedScore = 0.0f;
|
||||
if (StaticData::Instance().UseEarlyDiscarding()) {
|
||||
if (m_options.search.UseEarlyDiscarding()) {
|
||||
// expected score is based on score of current hypothesis
|
||||
expectedScore = hypothesis.GetScore();
|
||||
|
||||
@ -286,7 +281,7 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const Translat
|
||||
SentenceStats &stats = m_manager.GetSentenceStats();
|
||||
|
||||
Hypothesis *newHypo;
|
||||
if (! staticData.UseEarlyDiscarding()) {
|
||||
if (! m_options.search.UseEarlyDiscarding()) {
|
||||
// simple build, no questions asked
|
||||
IFVERBOSE(2) {
|
||||
stats.StartTimeBuildHyp();
|
||||
@ -303,7 +298,7 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const Translat
|
||||
// worst possible score may have changed -> recompute
|
||||
size_t wordsTranslated = hypothesis.GetWordsBitmap().GetNumWordsCovered() + transOpt.GetSize();
|
||||
float allowedScore = m_hypoStackColl[wordsTranslated]->GetWorstScore();
|
||||
if (staticData.GetMinHypoStackDiversity()) {
|
||||
if (m_options.search.stack_diversity) {
|
||||
WordsBitmapID id = hypothesis.GetWordsBitmap().GetIDPlus(transOpt.GetStartPos(), transOpt.GetEndPos());
|
||||
float allowedScoreForBitmap = m_hypoStackColl[wordsTranslated]->GetWorstScoreForBitmap( id );
|
||||
allowedScore = std::min( allowedScore, allowedScoreForBitmap );
|
||||
|
@ -14,23 +14,38 @@ class Manager;
|
||||
class InputType;
|
||||
class TranslationOptionCollection;
|
||||
|
||||
/** Functions and variables you need to decoder an input using the phrase-based decoder (NO cube-pruning)
|
||||
/** Functions and variables you need to decoder an input using the
|
||||
* phrase-based decoder (NO cube-pruning)
|
||||
* Instantiated by the Manager class
|
||||
*/
|
||||
class SearchNormal: public Search
|
||||
{
|
||||
protected:
|
||||
const InputType &m_source;
|
||||
std::vector < HypothesisStack* > m_hypoStackColl; /**< stacks to store hypotheses (partial translations) */
|
||||
//! stacks to store hypotheses (partial translations)
|
||||
// no of elements = no of words in source + 1
|
||||
size_t interrupted_flag; /**< flag indicating that decoder ran out of time (see switch -time-out) */
|
||||
HypothesisStackNormal* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/
|
||||
const TranslationOptionCollection &m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
|
||||
std::vector < HypothesisStack* > m_hypoStackColl;
|
||||
|
||||
/** actual (full expanded) stack of hypotheses*/
|
||||
HypothesisStackNormal* actual_hypoStack;
|
||||
|
||||
/** pre-computed list of translation options for the phrases in this sentence */
|
||||
const TranslationOptionCollection &m_transOptColl;
|
||||
|
||||
// functions for creating hypotheses
|
||||
void ProcessOneHypothesis(const Hypothesis &hypothesis);
|
||||
void ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos);
|
||||
virtual void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore);
|
||||
|
||||
virtual bool
|
||||
ProcessOneStack(HypothesisStack* hstack);
|
||||
|
||||
virtual void
|
||||
ProcessOneHypothesis(const Hypothesis &hypothesis);
|
||||
|
||||
virtual void
|
||||
ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos);
|
||||
|
||||
virtual void
|
||||
ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt,
|
||||
float expectedScore);
|
||||
|
||||
public:
|
||||
SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "Manager.h"
|
||||
#include "Hypothesis.h"
|
||||
#include "util/exception.hh"
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
//#include <google/profiler.h>
|
||||
|
||||
@ -14,7 +15,7 @@ SearchNormalBatch::SearchNormalBatch(Manager& manager, const InputType &source,
|
||||
:SearchNormal(manager, source, transOptColl)
|
||||
,m_batch_size(10000)
|
||||
{
|
||||
m_max_stack_size = StaticData::Instance().GetMaxHypoStackSize();
|
||||
m_max_stack_size = m_options.search.stack_size;
|
||||
|
||||
// Split the feature functions into sets of stateless, stateful
|
||||
// distributed lm, and stateful non-distributed.
|
||||
@ -50,47 +51,13 @@ void SearchNormalBatch::Decode()
|
||||
m_hypoStackColl[0]->AddPrune(hypo);
|
||||
|
||||
// go through each stack
|
||||
std::vector < HypothesisStack* >::iterator iterStack;
|
||||
for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
|
||||
// check if decoding ran out of time
|
||||
double _elapsed_time = GetUserTime();
|
||||
if (_elapsed_time > staticData.GetTimeoutThreshold()) {
|
||||
VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
|
||||
interrupted_flag = 1;
|
||||
return;
|
||||
}
|
||||
HypothesisStackNormal &sourceHypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
|
||||
|
||||
// the stack is pruned before processing (lazy pruning):
|
||||
VERBOSE(3,"processing hypothesis from next stack");
|
||||
IFVERBOSE(2) {
|
||||
stats.StartTimeStack();
|
||||
}
|
||||
sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
|
||||
VERBOSE(3,std::endl);
|
||||
sourceHypoColl.CleanupArcList();
|
||||
IFVERBOSE(2) {
|
||||
stats.StopTimeStack();
|
||||
}
|
||||
|
||||
// go through each hypothesis on the stack and try to expand it
|
||||
HypothesisStackNormal::const_iterator iterHypo;
|
||||
for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo) {
|
||||
Hypothesis &hypothesis = **iterHypo;
|
||||
ProcessOneHypothesis(hypothesis); // expand the hypothesis
|
||||
}
|
||||
EvalAndMergePartialHypos();
|
||||
|
||||
// some logging
|
||||
IFVERBOSE(2) {
|
||||
OutputHypoStackSize();
|
||||
}
|
||||
|
||||
// this stack is fully expanded;
|
||||
actual_hypoStack = &sourceHypoColl;
|
||||
BOOST_FOREACH(HypothesisStack* hstack, m_hypoStackColl) {
|
||||
if (!ProcessOneStack(hstack)) return;
|
||||
EvalAndMergePartialHypos(); // <= THAT is the difference to SearchNormal!
|
||||
IFVERBOSE(2) OutputHypoStackSize();
|
||||
actual_hypoStack = static_cast<HypothesisStackNormal*>(hstack);
|
||||
}
|
||||
|
||||
EvalAndMergePartialHypos();
|
||||
EvalAndMergePartialHypos(); // <= THAT is the difference to SearchNormal!
|
||||
}
|
||||
|
||||
/**
|
||||
@ -106,7 +73,8 @@ void SearchNormalBatch::Decode()
|
||||
void
|
||||
SearchNormalBatch::
|
||||
ExpandHypothesis(const Hypothesis &hypothesis,
|
||||
const TranslationOption &transOpt, float expectedScore)
|
||||
const TranslationOption &transOpt,
|
||||
float expectedScore)
|
||||
{
|
||||
// Check if the number of partial hypotheses exceeds the batch size.
|
||||
if (m_partial_hypos.size() >= m_batch_size) {
|
||||
|
@ -181,7 +181,7 @@ init(string line, std::vector<FactorType> const& factorOrder)
|
||||
aux_interpret_dlt(line); // some poorly documented cache-based stuff
|
||||
|
||||
// if sentences is specified as "<passthrough tag1=""/>"
|
||||
if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled()) {
|
||||
if (SD.IsPassthroughEnabled() || SD.options().nbest.include_passthrough) {
|
||||
string pthru = PassthroughSGML(line,"passthrough");
|
||||
this->SetPassthroughInformation(pthru);
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
// $Id$
|
||||
// vim:tabstop=2
|
||||
|
||||
@ -63,8 +64,6 @@ StaticData::StaticData()
|
||||
: m_sourceStartPosMattersForRecombination(false)
|
||||
, m_requireSortingAfterSourceContext(false)
|
||||
, m_inputType(SentenceInput)
|
||||
// , m_onlyDistinctNBest(false)
|
||||
// , m_needAlignmentInfo(false)
|
||||
, m_lmEnableOOVFeature(false)
|
||||
, m_isAlwaysCreateDirectTranslationOption(false)
|
||||
, m_currentWeightSetting("default")
|
||||
@ -80,16 +79,6 @@ StaticData::StaticData()
|
||||
StaticData::~StaticData()
|
||||
{
|
||||
RemoveAllInColl(m_decodeGraphs);
|
||||
|
||||
/*
|
||||
const std::vector<FeatureFunction*> &producers = FeatureFunction::GetFeatureFunctions();
|
||||
for(size_t i=0;i<producers.size();++i) {
|
||||
FeatureFunction *ff = producers[i];
|
||||
delete ff;
|
||||
}
|
||||
*/
|
||||
|
||||
// memory pools
|
||||
Phrase::FinalizeMemPool();
|
||||
}
|
||||
|
||||
@ -199,39 +188,19 @@ StaticData
|
||||
}
|
||||
|
||||
m_parameter->SetParameter(m_outputHypoScore, "output-hypo-score", false );
|
||||
|
||||
//word-to-word alignment
|
||||
// alignments
|
||||
m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false );
|
||||
|
||||
// if (m_PrintAlignmentInfo) { // => now in BookkeepingOptions::init()
|
||||
// m_needAlignmentInfo = true;
|
||||
// }
|
||||
|
||||
m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort);
|
||||
|
||||
// if (m_PrintAlignmentInfoNbest) { // => now in BookkeepingOptions::init()
|
||||
// m_needAlignmentInfo = true;
|
||||
// }
|
||||
|
||||
params = m_parameter->GetParam("alignment-output-file");
|
||||
if (params && params->size()) {
|
||||
m_alignmentOutputFile = Scan<std::string>(params->at(0));
|
||||
// m_needAlignmentInfo = true; // => now in BookkeepingOptions::init()
|
||||
}
|
||||
|
||||
m_parameter->SetParameter( m_PrintID, "print-id", false );
|
||||
m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false );
|
||||
// m_parameter->SetParameter( m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false ); // => now in BookkeepingOptions::init()
|
||||
|
||||
// word graph
|
||||
params = m_parameter->GetParam("output-word-graph");
|
||||
if (params && params->size() == 2)
|
||||
m_outputWordGraph = true;
|
||||
else
|
||||
m_outputWordGraph = false;
|
||||
m_outputWordGraph = (params && params->size() == 2);
|
||||
|
||||
// search graph
|
||||
params = m_parameter->GetParam("output-search-graph");
|
||||
if (params && params->size()) {
|
||||
if (params->size() != 1) {
|
||||
@ -240,6 +209,7 @@ StaticData
|
||||
}
|
||||
m_outputSearchGraph = true;
|
||||
}
|
||||
|
||||
// ... in extended format
|
||||
else if (m_parameter->GetParam("output-search-graph-extended") &&
|
||||
m_parameter->GetParam("output-search-graph-extended")->size()) {
|
||||
@ -298,15 +268,14 @@ StaticData
|
||||
m_parameter->SetParameter(m_printAllDerivations , "print-all-derivations", false );
|
||||
|
||||
// additional output
|
||||
m_parameter->SetParameter<string>(m_detailedTranslationReportingFilePath, "translation-details", "");
|
||||
m_parameter->SetParameter<string>(m_detailedTreeFragmentsTranslationReportingFilePath, "tree-translation-details", "");
|
||||
|
||||
//DIMw
|
||||
m_parameter->SetParameter<string>(m_detailedAllTranslationReportingFilePath, "translation-all-details", "");
|
||||
|
||||
m_parameter->SetParameter<string>(m_detailedTranslationReportingFilePath,
|
||||
"translation-details", "");
|
||||
m_parameter->SetParameter<string>(m_detailedTreeFragmentsTranslationReportingFilePath,
|
||||
"tree-translation-details", "");
|
||||
m_parameter->SetParameter<string>(m_detailedAllTranslationReportingFilePath,
|
||||
"translation-all-details", "");
|
||||
m_parameter->SetParameter<long>(m_startTranslationId, "start-translation-id", 0);
|
||||
|
||||
|
||||
//lattice samples
|
||||
params = m_parameter->GetParam("lattice-samples");
|
||||
if (params) {
|
||||
@ -323,14 +292,6 @@ StaticData
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
StaticData
|
||||
::ini_nbest_options()
|
||||
{
|
||||
return m_nbest_options.init(*m_parameter);
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_compact_table_options()
|
||||
@ -353,8 +314,8 @@ StaticData
|
||||
::ini_performance_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
m_parameter->SetParameter<size_t>(m_timeout_threshold, "time-out", -1);
|
||||
m_timeout = (GetTimeoutThreshold() == (size_t)-1) ? false : true;
|
||||
// m_parameter->SetParameter<size_t>(m_timeout_threshold, "time-out", -1);
|
||||
// m_timeout = (GetTimeoutThreshold() == (size_t)-1) ? false : true;
|
||||
|
||||
m_threadCount = 1;
|
||||
params = m_parameter->GetParam("threads");
|
||||
@ -388,18 +349,6 @@ StaticData
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_cube_pruning_options()
|
||||
{
|
||||
m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit",
|
||||
DEFAULT_CUBE_PRUNING_POP_LIMIT);
|
||||
m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity",
|
||||
DEFAULT_CUBE_PRUNING_DIVERSITY);
|
||||
m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring",
|
||||
false);
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_factor_maps()
|
||||
@ -453,45 +402,42 @@ void
|
||||
StaticData
|
||||
::ini_distortion_options()
|
||||
{
|
||||
// reordering constraints
|
||||
m_parameter->SetParameter(m_maxDistortion, "distortion-limit", -1);
|
||||
|
||||
m_parameter->SetParameter(m_reorderingConstraint, "monotone-at-punctuation", false );
|
||||
|
||||
// early distortion cost
|
||||
m_parameter->SetParameter(m_useEarlyDistortionCost, "early-distortion-cost", false );
|
||||
|
||||
// // reordering constraints
|
||||
// m_parameter->SetParameter(m_maxDistortion, "distortion-limit", -1);
|
||||
|
||||
// m_parameter->SetParameter(m_reorderingConstraint, "monotone-at-punctuation", false );
|
||||
|
||||
// // early distortion cost
|
||||
// m_parameter->SetParameter(m_useEarlyDistortionCost, "early-distortion-cost", false );
|
||||
}
|
||||
|
||||
bool
|
||||
StaticData
|
||||
::ini_stack_decoding_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
// settings for pruning
|
||||
m_parameter->SetParameter(m_maxHypoStackSize, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
|
||||
// const PARAM_VEC *params;
|
||||
// // settings for pruning
|
||||
// m_parameter->SetParameter(m_maxHypoStackSize, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
|
||||
|
||||
m_minHypoStackDiversity = 0;
|
||||
params = m_parameter->GetParam("stack-diversity");
|
||||
if (params && params->size()) {
|
||||
if (m_maxDistortion > 15) {
|
||||
std::cerr << "stack diversity > 0 is not allowed for distortion limits larger than 15";
|
||||
return false;
|
||||
}
|
||||
if (m_inputType == WordLatticeInput) {
|
||||
std::cerr << "stack diversity > 0 is not allowed for lattice input";
|
||||
return false;
|
||||
}
|
||||
m_minHypoStackDiversity = Scan<size_t>(params->at(0));
|
||||
}
|
||||
// m_minHypoStackDiversity = 0;
|
||||
// params = m_parameter->GetParam("stack-diversity");
|
||||
// if (params && params->size()) {
|
||||
// if (m_maxDistortion > 15) {
|
||||
// std::cerr << "stack diversity > 0 is not allowed for distortion limits larger than 15";
|
||||
// return false;
|
||||
// }
|
||||
// if (m_inputType == WordLatticeInput) {
|
||||
// std::cerr << "stack diversity > 0 is not allowed for lattice input";
|
||||
// return false;
|
||||
// }
|
||||
// m_minHypoStackDiversity = Scan<size_t>(params->at(0));
|
||||
// }
|
||||
|
||||
m_parameter->SetParameter(m_beamWidth, "beam-threshold", DEFAULT_BEAM_WIDTH);
|
||||
m_beamWidth = TransformScore(m_beamWidth);
|
||||
// m_parameter->SetParameter(m_beamWidth, "beam-threshold", DEFAULT_BEAM_WIDTH);
|
||||
// m_beamWidth = TransformScore(m_beamWidth);
|
||||
|
||||
m_parameter->SetParameter(m_earlyDiscardingThreshold, "early-discarding-threshold", DEFAULT_EARLY_DISCARDING_THRESHOLD);
|
||||
m_earlyDiscardingThreshold = TransformScore(m_earlyDiscardingThreshold);
|
||||
// m_parameter->SetParameter(m_earlyDiscardingThreshold, "early-discarding-threshold", DEFAULT_EARLY_DISCARDING_THRESHOLD);
|
||||
// m_earlyDiscardingThreshold = TransformScore(m_earlyDiscardingThreshold);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -499,12 +445,12 @@ void
|
||||
StaticData
|
||||
::ini_phrase_lookup_options()
|
||||
{
|
||||
m_parameter->SetParameter(m_translationOptionThreshold, "translation-option-threshold", DEFAULT_TRANSLATION_OPTION_THRESHOLD);
|
||||
m_translationOptionThreshold = TransformScore(m_translationOptionThreshold);
|
||||
// m_parameter->SetParameter(m_translationOptionThreshold, "translation-option-threshold", DEFAULT_TRANSLATION_OPTION_THRESHOLD);
|
||||
// m_translationOptionThreshold = TransformScore(m_translationOptionThreshold);
|
||||
|
||||
m_parameter->SetParameter(m_maxNoTransOptPerCoverage, "max-trans-opt-per-coverage", DEFAULT_MAX_TRANS_OPT_SIZE);
|
||||
m_parameter->SetParameter(m_maxNoPartTransOpt, "max-partial-trans-opt", DEFAULT_MAX_PART_TRANS_OPT_SIZE);
|
||||
m_parameter->SetParameter(m_maxPhraseLength, "max-phrase-length", DEFAULT_MAX_PHRASE_LENGTH);
|
||||
// m_parameter->SetParameter(m_maxNoTransOptPerCoverage, "max-trans-opt-per-coverage", DEFAULT_MAX_TRANS_OPT_SIZE);
|
||||
// m_parameter->SetParameter(m_maxNoPartTransOpt, "max-partial-trans-opt", DEFAULT_MAX_PART_TRANS_OPT_SIZE);
|
||||
// m_parameter->SetParameter(m_maxPhraseLength, "max-phrase-length", DEFAULT_MAX_PHRASE_LENGTH);
|
||||
|
||||
}
|
||||
|
||||
@ -583,10 +529,11 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|
||||
const PARAM_VEC *params;
|
||||
|
||||
m_context_parameters.init(*parameter);
|
||||
m_options.init(*parameter);
|
||||
// m_context_parameters.init(*parameter);
|
||||
|
||||
// to cube or not to cube
|
||||
m_parameter->SetParameter(m_searchAlgorithm, "search-algorithm", Normal);
|
||||
// m_parameter->SetParameter(m_searchAlgorithm, "search-algorithm", Normal);
|
||||
|
||||
if (IsSyntax())
|
||||
LoadChartDecodingParameters();
|
||||
@ -596,7 +543,7 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
ini_factor_maps();
|
||||
ini_input_options();
|
||||
m_bookkeeping_options.init(*parameter);
|
||||
m_nbest_options.init(*parameter); // if (!ini_nbest_options()) return false;
|
||||
// m_nbest_options.init(*parameter);
|
||||
if (!ini_output_options()) return false;
|
||||
|
||||
// threading etc.
|
||||
@ -609,7 +556,7 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
ini_distortion_options();
|
||||
if (!ini_stack_decoding_options()) return false;
|
||||
ini_phrase_lookup_options();
|
||||
ini_cube_pruning_options();
|
||||
// ini_cube_pruning_options();
|
||||
|
||||
ini_oov_options();
|
||||
ini_mbr_options();
|
||||
@ -625,7 +572,7 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|| m_outputSearchGraphPB
|
||||
#endif
|
||||
|| m_latticeSamplesFilePath.size()) {
|
||||
m_nbest_options.enabled = true;
|
||||
m_options.nbest.enabled = true;
|
||||
}
|
||||
|
||||
// S2T decoder
|
||||
@ -1272,8 +1219,9 @@ StaticData
|
||||
|
||||
// FIXME Does this make sense for F2S? Perhaps it should be changed once
|
||||
// FIXME the pipeline uses RuleTable consistently.
|
||||
if (m_searchAlgorithm == SyntaxS2T || m_searchAlgorithm == SyntaxT2S ||
|
||||
m_searchAlgorithm == SyntaxT2S_SCFG || m_searchAlgorithm == SyntaxF2S) {
|
||||
SearchAlgorithm algo = m_options.search.algo;
|
||||
if (algo == SyntaxS2T || algo == SyntaxT2S ||
|
||||
algo == SyntaxT2S_SCFG || algo == SyntaxF2S) {
|
||||
// Automatically override PhraseDictionary{Memory,Scope3}. This will
|
||||
// have to change if the FF parameters diverge too much in the future,
|
||||
// but for now it makes switching between the old and new decoders much
|
||||
|
@ -1,4 +1,4 @@
|
||||
// -*- c++ -*-
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
@ -44,8 +44,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "moses/FF/Factory.h"
|
||||
#include "moses/PP/Factory.h"
|
||||
|
||||
#include "moses/parameters/ContextParameters.h"
|
||||
#include "moses/parameters/NBestOptions.h"
|
||||
#include "moses/parameters/AllOptions.h"
|
||||
#include "moses/parameters/BookkeepingOptions.h"
|
||||
|
||||
namespace Moses
|
||||
@ -70,11 +69,10 @@ class StaticData
|
||||
friend class HyperParameterAsWeight;
|
||||
|
||||
private:
|
||||
static StaticData s_instance;
|
||||
static StaticData s_instance;
|
||||
protected:
|
||||
Parameter *m_parameter;
|
||||
|
||||
ContextParameters m_context_parameters;
|
||||
AllOptions m_options;
|
||||
|
||||
std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
|
||||
mutable ScoreComponentCollection m_allWeights;
|
||||
@ -84,34 +82,29 @@ protected:
|
||||
// Initial = 0 = can be used when creating poss trans
|
||||
// Other = 1 = used to calculate LM score once all steps have been processed
|
||||
float
|
||||
m_beamWidth,
|
||||
m_earlyDiscardingThreshold,
|
||||
m_translationOptionThreshold,
|
||||
// m_beamWidth,
|
||||
// m_earlyDiscardingThreshold,
|
||||
// m_translationOptionThreshold,
|
||||
m_wordDeletionWeight;
|
||||
|
||||
|
||||
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
|
||||
int m_maxDistortion;
|
||||
// int m_maxDistortion;
|
||||
// do it differently from old pharaoh
|
||||
// -ve = no limit on distortion
|
||||
// 0 = no disortion (monotone in old pharaoh)
|
||||
bool m_reorderingConstraint; //! use additional reordering constraints
|
||||
bool m_useEarlyDistortionCost;
|
||||
size_t m_maxHypoStackSize; //! hypothesis-stack size that triggers pruning
|
||||
size_t m_minHypoStackDiversity; //! minimum number of hypothesis in stack for each source word coverage;
|
||||
NBestOptions m_nbest_options;
|
||||
// bool m_useEarlyDistortionCost;
|
||||
// size_t m_maxHypoStackSize; //! hypothesis-stack size that triggers pruning
|
||||
// size_t m_minHypoStackDiversity; //! minimum number of hypothesis in stack for each source word coverage;
|
||||
BookkeepingOptions m_bookkeeping_options;
|
||||
// size_t m_nBestSize;
|
||||
// size_t m_nBestFactor;
|
||||
|
||||
size_t m_latticeSamplesSize;
|
||||
size_t m_maxNoTransOptPerCoverage;
|
||||
size_t m_maxNoPartTransOpt;
|
||||
size_t m_maxPhraseLength;
|
||||
// size_t m_maxNoTransOptPerCoverage;
|
||||
// size_t m_maxNoPartTransOpt;
|
||||
// size_t m_maxPhraseLength;
|
||||
|
||||
// std::string m_nBestFilePath;
|
||||
std::string m_latticeSamplesFilePath;
|
||||
// bool m_labeledNBestList,m_nBestIncludesSegmentation;
|
||||
bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them
|
||||
bool m_markUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = mark and (ignore) them
|
||||
std::string m_unknownWordPrefix;
|
||||
@ -127,7 +120,7 @@ protected:
|
||||
bool m_outputHypoScore;
|
||||
bool m_requireSortingAfterSourceContext;
|
||||
|
||||
SearchAlgorithm m_searchAlgorithm;
|
||||
// SearchAlgorithm m_searchAlgorithm;
|
||||
InputTypeEnum m_inputType;
|
||||
|
||||
mutable size_t m_verboseLevel;
|
||||
@ -135,21 +128,15 @@ protected:
|
||||
bool m_reportSegmentation;
|
||||
bool m_reportSegmentationEnriched;
|
||||
bool m_reportAllFactors;
|
||||
// bool m_reportAllFactorsNBest;
|
||||
std::string m_detailedTranslationReportingFilePath;
|
||||
std::string m_detailedTreeFragmentsTranslationReportingFilePath;
|
||||
|
||||
//DIMw
|
||||
std::string m_detailedAllTranslationReportingFilePath;
|
||||
|
||||
// bool m_onlyDistinctNBest;
|
||||
bool m_PrintAlignmentInfo;
|
||||
// bool m_needAlignmentInfo; // => BookkeepingOptions
|
||||
// bool m_PrintAlignmentInfoNbest;
|
||||
|
||||
bool m_PrintID;
|
||||
bool m_PrintPassthroughInformation;
|
||||
// bool m_PrintPassthroughInformationInNBest;
|
||||
|
||||
std::string m_alignmentOutputFile;
|
||||
|
||||
@ -174,8 +161,8 @@ protected:
|
||||
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
|
||||
bool m_lmEnableOOVFeature;
|
||||
|
||||
bool m_timeout; //! use timeout
|
||||
size_t m_timeout_threshold; //! seconds after which time out is activated
|
||||
// bool m_timeout; //! use timeout
|
||||
// size_t m_timeout_threshold; //! seconds after which time out is activated
|
||||
|
||||
bool m_isAlwaysCreateDirectTranslationOption;
|
||||
//! constructor. only the 1 static variable can be created
|
||||
@ -192,9 +179,6 @@ protected:
|
||||
bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph
|
||||
std::string m_outputUnknownsFile; //! output unknowns in this file
|
||||
|
||||
size_t m_cubePruningPopLimit;
|
||||
size_t m_cubePruningDiversity;
|
||||
bool m_cubePruningLazyScoring;
|
||||
size_t m_ruleLimit;
|
||||
|
||||
// Whether to load compact phrase table and reordering table into memory
|
||||
@ -221,7 +205,6 @@ protected:
|
||||
bool m_useLegacyPT;
|
||||
bool m_defaultNonTermOnlyForEmptyRange;
|
||||
S2TParsingAlgorithm m_s2tParsingAlgorithm;
|
||||
// bool m_printNBestTrees;
|
||||
|
||||
FeatureRegistry m_registry;
|
||||
PhrasePropertyFactory m_phrasePropertyFactory;
|
||||
@ -260,7 +243,6 @@ protected:
|
||||
void ini_lmbr_options();
|
||||
void ini_mbr_options();
|
||||
void ini_mira_options();
|
||||
bool ini_nbest_options();
|
||||
void ini_oov_options();
|
||||
bool ini_output_options();
|
||||
bool ini_performance_options();
|
||||
@ -307,9 +289,14 @@ public:
|
||||
return *m_parameter;
|
||||
}
|
||||
|
||||
const ContextParameters&
|
||||
GetContextParameters() const {
|
||||
return m_context_parameters;
|
||||
AllOptions const&
|
||||
options() const {
|
||||
return m_options;
|
||||
}
|
||||
|
||||
AllOptions&
|
||||
options() {
|
||||
return m_options;
|
||||
}
|
||||
|
||||
const std::vector<FactorType> &GetInputFactorOrder() const {
|
||||
@ -338,32 +325,24 @@ public:
|
||||
return m_disableDiscarding;
|
||||
}
|
||||
inline size_t GetMaxNoTransOptPerCoverage() const {
|
||||
return m_maxNoTransOptPerCoverage;
|
||||
return m_options.search.max_trans_opt_per_cov;
|
||||
}
|
||||
inline size_t GetMaxNoPartTransOpt() const {
|
||||
return m_maxNoPartTransOpt;
|
||||
return m_options.search.max_partial_trans_opt;
|
||||
}
|
||||
inline size_t GetMaxPhraseLength() const {
|
||||
return m_maxPhraseLength;
|
||||
return m_options.search.max_phrase_length;
|
||||
}
|
||||
bool IsWordDeletionEnabled() const {
|
||||
return m_wordDeletionEnabled;
|
||||
}
|
||||
size_t GetMaxHypoStackSize() const {
|
||||
return m_maxHypoStackSize;
|
||||
}
|
||||
size_t GetMinHypoStackDiversity() const {
|
||||
return m_minHypoStackDiversity;
|
||||
}
|
||||
size_t GetCubePruningPopLimit() const {
|
||||
return m_cubePruningPopLimit;
|
||||
}
|
||||
size_t GetCubePruningDiversity() const {
|
||||
return m_cubePruningDiversity;
|
||||
}
|
||||
bool GetCubePruningLazyScoring() const {
|
||||
return m_cubePruningLazyScoring;
|
||||
}
|
||||
// size_t GetMaxHypoStackSize() const {
|
||||
// return m_options.search.stack_size;
|
||||
// }
|
||||
// size_t GetMinHypoStackDiversity() const {
|
||||
// return m_options.search.stack_diversity;
|
||||
// }
|
||||
|
||||
size_t IsPathRecoveryEnabled() const {
|
||||
return m_recoverPath;
|
||||
}
|
||||
@ -373,30 +352,30 @@ public:
|
||||
bool IsPassthroughEnabled() const {
|
||||
return m_PrintPassthroughInformation;
|
||||
}
|
||||
bool IsPassthroughInNBestEnabled() const {
|
||||
return m_nbest_options.include_passthrough;
|
||||
// return m_PrintPassthroughInformationInNBest;
|
||||
}
|
||||
|
||||
int GetMaxDistortion() const {
|
||||
return m_maxDistortion;
|
||||
return m_options.reordering.max_distortion;
|
||||
}
|
||||
bool UseReorderingConstraint() const {
|
||||
return m_reorderingConstraint;
|
||||
}
|
||||
float GetBeamWidth() const {
|
||||
return m_beamWidth;
|
||||
return m_options.search.beam_width;
|
||||
}
|
||||
float GetEarlyDiscardingThreshold() const {
|
||||
return m_earlyDiscardingThreshold;
|
||||
return m_options.search.early_discarding_threshold;
|
||||
}
|
||||
|
||||
bool UseEarlyDiscarding() const {
|
||||
return m_earlyDiscardingThreshold != -std::numeric_limits<float>::infinity();
|
||||
return m_options.search.early_discarding_threshold
|
||||
!= -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
bool UseEarlyDistortionCost() const {
|
||||
return m_useEarlyDistortionCost;
|
||||
return m_options.reordering.use_early_distortion_cost;
|
||||
// return m_useEarlyDistortionCost;
|
||||
}
|
||||
float GetTranslationOptionThreshold() const {
|
||||
return m_translationOptionThreshold;
|
||||
return m_options.search.trans_opt_threshold;
|
||||
}
|
||||
|
||||
size_t GetVerboseLevel() const {
|
||||
@ -420,13 +399,11 @@ public:
|
||||
else
|
||||
std::cerr << "Warning: Invalid value for reportSegmentation (0 - 2)! Ignoring";
|
||||
}
|
||||
|
||||
bool GetReportAllFactors() const {
|
||||
return m_reportAllFactors;
|
||||
}
|
||||
bool GetReportAllFactorsNBest() const {
|
||||
return m_nbest_options.include_all_factors;
|
||||
// return m_reportAllFactorsNBest;
|
||||
}
|
||||
|
||||
bool IsDetailedTranslationReportingEnabled() const {
|
||||
return !m_detailedTranslationReportingFilePath.empty();
|
||||
}
|
||||
@ -444,10 +421,10 @@ public:
|
||||
const std::string &GetDetailedTreeFragmentsTranslationReportingFilePath() const {
|
||||
return m_detailedTreeFragmentsTranslationReportingFilePath;
|
||||
}
|
||||
bool IsLabeledNBestList() const {
|
||||
return m_nbest_options.include_feature_labels;
|
||||
// return m_labeledNBestList;
|
||||
}
|
||||
|
||||
// bool IsLabeledNBestList() const {
|
||||
// return m_options.nbest.include_feature_labels;
|
||||
// }
|
||||
|
||||
bool UseMinphrInMemory() const {
|
||||
return m_minphrMemory;
|
||||
@ -458,26 +435,17 @@ public:
|
||||
}
|
||||
|
||||
// for mert
|
||||
size_t GetNBestSize() const {
|
||||
return m_nbest_options.nbest_size;
|
||||
// return m_nBestSize;
|
||||
}
|
||||
// size_t GetNBestSize() const {
|
||||
// return m_options.nbest.nbest_size;
|
||||
// }
|
||||
|
||||
const std::string &GetNBestFilePath() const {
|
||||
return m_nbest_options.output_file_path;
|
||||
// return m_nBestFilePath;
|
||||
}
|
||||
// const std::string &GetNBestFilePath() const {
|
||||
// return m_options.nbest.output_file_path;
|
||||
// }
|
||||
|
||||
bool IsNBestEnabled() const {
|
||||
return m_nbest_options.enabled;
|
||||
// return (!m_nBestFilePath.empty() || m_mbr || m_useLatticeMBR || m_mira ||
|
||||
// m_outputSearchGraph || m_outputSearchGraphSLF ||
|
||||
// m_outputSearchGraphHypergraph || m_useConsensusDecoding ||
|
||||
// #ifdef HAVE_PROTOBUF
|
||||
// m_outputSearchGraphPB ||
|
||||
// #endif
|
||||
// !m_latticeSamplesFilePath.empty());
|
||||
}
|
||||
// bool IsNBestEnabled() const {
|
||||
// return m_options.nbest.enabled;
|
||||
// }
|
||||
|
||||
size_t GetLatticeSamplesSize() const {
|
||||
return m_latticeSamplesSize;
|
||||
@ -487,10 +455,9 @@ public:
|
||||
return m_latticeSamplesFilePath;
|
||||
}
|
||||
|
||||
size_t GetNBestFactor() const {
|
||||
return m_nbest_options.factor;
|
||||
// return m_nBestFactor;
|
||||
}
|
||||
// size_t GetNBestFactor() const {
|
||||
// return m_options.nbest.factor;
|
||||
// }
|
||||
bool GetOutputWordGraph() const {
|
||||
return m_outputWordGraph;
|
||||
}
|
||||
@ -499,22 +466,15 @@ public:
|
||||
InputTypeEnum GetInputType() const {
|
||||
return m_inputType;
|
||||
}
|
||||
SearchAlgorithm GetSearchAlgorithm() const {
|
||||
return m_searchAlgorithm;
|
||||
}
|
||||
|
||||
// bool IsSyntax() const {
|
||||
// return m_searchAlgorithm == CYKPlus ||
|
||||
// m_searchAlgorithm == ChartIncremental ||
|
||||
// m_searchAlgorithm == SyntaxS2T ||
|
||||
// m_searchAlgorithm == SyntaxT2S ||
|
||||
// m_searchAlgorithm == SyntaxT2S_SCFG ||
|
||||
// m_searchAlgorithm == SyntaxF2S;
|
||||
// SearchAlgorithm GetSearchAlgorithm() const {
|
||||
// return m_searchAlgorithm;
|
||||
// }
|
||||
|
||||
bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const {
|
||||
if (algo == DefaultSearchAlgorithm)
|
||||
algo = m_searchAlgorithm;
|
||||
algo = m_options.search.algo;
|
||||
|
||||
return (algo == CYKPlus || algo == ChartIncremental ||
|
||||
algo == SyntaxS2T || algo == SyntaxT2S ||
|
||||
algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
|
||||
@ -546,10 +506,9 @@ public:
|
||||
//Weights for feature with fixed number of values
|
||||
void SetWeights(const FeatureFunction* sp, const std::vector<float>& weights);
|
||||
|
||||
bool GetDistinctNBest() const {
|
||||
return m_nbest_options.only_distinct;
|
||||
// return m_onlyDistinctNBest;
|
||||
}
|
||||
// bool GetDistinctNBest() const {
|
||||
// return m_options.nbest.only_distinct;
|
||||
// }
|
||||
const std::string& GetFactorDelimiter() const {
|
||||
return m_factorDelimiter;
|
||||
}
|
||||
@ -603,12 +562,12 @@ public:
|
||||
return m_lmbrMapWeight;
|
||||
}
|
||||
|
||||
bool UseTimeout() const {
|
||||
return m_timeout;
|
||||
}
|
||||
size_t GetTimeoutThreshold() const {
|
||||
return m_timeout_threshold;
|
||||
}
|
||||
// bool UseTimeout() const {
|
||||
// return m_timeout;
|
||||
// }
|
||||
// size_t GetTimeoutThreshold() const {
|
||||
// return m_timeout_threshold;
|
||||
// }
|
||||
|
||||
size_t GetLMCacheCleanupThreshold() const {
|
||||
return m_lmcache_cleanup_threshold;
|
||||
@ -722,19 +681,11 @@ public:
|
||||
bool PrintAlignmentInfo() const {
|
||||
return m_PrintAlignmentInfo;
|
||||
}
|
||||
bool PrintAlignmentInfoInNbest() const {
|
||||
return m_nbest_options.include_alignment_info;
|
||||
// return m_PrintAlignmentInfoNbest;
|
||||
}
|
||||
|
||||
WordAlignmentSort GetWordAlignmentSort() const {
|
||||
return m_wordAlignmentSort;
|
||||
}
|
||||
|
||||
bool NBestIncludesSegmentation() const {
|
||||
return m_nbest_options.include_segmentation;
|
||||
// return m_nBestIncludesSegmentation;
|
||||
}
|
||||
|
||||
bool GetHasAlternateWeightSettings() const {
|
||||
return m_weightSetting.size() > 0;
|
||||
}
|
||||
@ -872,11 +823,6 @@ public:
|
||||
return m_s2tParsingAlgorithm;
|
||||
}
|
||||
|
||||
bool PrintNBestTrees() const {
|
||||
return m_nbest_options.print_trees;
|
||||
// return m_printNBestTrees;
|
||||
}
|
||||
|
||||
bool RequireSortingAfterSourceContext() const {
|
||||
return m_requireSortingAfterSourceContext;
|
||||
}
|
||||
|
@ -59,9 +59,9 @@ void Manager<RuleMatcher>::Decode()
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// Get various pruning-related constants.
|
||||
const std::size_t popLimit = staticData.GetCubePruningPopLimit();
|
||||
const std::size_t popLimit = staticData.options().cube.pop_limit;
|
||||
const std::size_t ruleLimit = staticData.GetRuleLimit();
|
||||
const std::size_t stackLimit = staticData.GetMaxHypoStackSize();
|
||||
const std::size_t stackLimit = staticData.options().search.stack_size;
|
||||
|
||||
// Initialize the stacks.
|
||||
InitializeStacks();
|
||||
@ -254,7 +254,7 @@ void Manager<RuleMatcher>::ExtractKBest(
|
||||
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
||||
// too many translations are identical.
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::size_t nBestFactor = staticData.GetNBestFactor();
|
||||
const std::size_t nBestFactor = staticData.options().nbest.factor;
|
||||
std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
|
||||
|
||||
// Extract the derivations.
|
||||
|
@ -52,8 +52,8 @@ void Manager::OutputNBest(OutputCollector *collector) const
|
||||
long translationId = m_source.GetTranslationId();
|
||||
|
||||
KBestExtractor::KBestVec nBestList;
|
||||
ExtractKBest(staticData.GetNBestSize(), nBestList,
|
||||
staticData.GetDistinctNBest());
|
||||
ExtractKBest(staticData.options().nbest.nbest_size, nBestList,
|
||||
staticData.options().nbest.only_distinct);
|
||||
OutputNBestList(collector, nBestList, translationId);
|
||||
}
|
||||
}
|
||||
@ -90,8 +90,8 @@ void Manager::OutputNBestList(OutputCollector *collector,
|
||||
FixPrecision(out);
|
||||
}
|
||||
|
||||
bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
|
||||
bool PrintNBestTrees = staticData.PrintNBestTrees();
|
||||
bool includeWordAlignment = staticData.options().nbest.include_alignment_info;
|
||||
bool PrintNBestTrees = staticData.options().nbest.print_trees; // PrintNBestTrees();
|
||||
|
||||
for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
|
||||
p != nBestList.end(); ++p) {
|
||||
|
@ -31,14 +31,14 @@ void RuleTableFF::Load()
|
||||
SetFeaturesToApply();
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
if (staticData.GetSearchAlgorithm() == SyntaxF2S ||
|
||||
staticData.GetSearchAlgorithm() == SyntaxT2S) {
|
||||
if (staticData.options().search.algo == SyntaxF2S ||
|
||||
staticData.options().search.algo == SyntaxT2S) {
|
||||
F2S::HyperTree *trie = new F2S::HyperTree(this);
|
||||
F2S::HyperTreeLoader loader;
|
||||
loader.Load(m_input, m_output, m_filePath, *this, *trie,
|
||||
m_sourceTerminalSet);
|
||||
m_table = trie;
|
||||
} else if (staticData.GetSearchAlgorithm() == SyntaxS2T) {
|
||||
} else if (staticData.options().search.algo == SyntaxS2T) {
|
||||
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
|
||||
if (algorithm == RecursiveCYKPlus) {
|
||||
S2T::RuleTrieCYKPlus *trie = new S2T::RuleTrieCYKPlus(this);
|
||||
@ -53,7 +53,7 @@ void RuleTableFF::Load()
|
||||
} else {
|
||||
UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
|
||||
}
|
||||
} else if (staticData.GetSearchAlgorithm() == SyntaxT2S_SCFG) {
|
||||
} else if (staticData.options().search.algo == SyntaxT2S_SCFG) {
|
||||
T2S::RuleTrie *trie = new T2S::RuleTrie(this);
|
||||
T2S::RuleTrieLoader loader;
|
||||
loader.Load(m_input, m_output, m_filePath, *this, *trie);
|
||||
|
@ -162,9 +162,9 @@ void Manager<Parser>::Decode()
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// Get various pruning-related constants.
|
||||
const std::size_t popLimit = staticData.GetCubePruningPopLimit();
|
||||
const std::size_t popLimit = staticData.options().cube.pop_limit;
|
||||
const std::size_t ruleLimit = staticData.GetRuleLimit();
|
||||
const std::size_t stackLimit = staticData.GetMaxHypoStackSize();
|
||||
const std::size_t stackLimit = staticData.options().search.stack_size;
|
||||
|
||||
// Initialise the PChart and SChart.
|
||||
InitializeCharts();
|
||||
@ -302,7 +302,7 @@ void Manager<Parser>::ExtractKBest(
|
||||
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
||||
// too many translations are identical.
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::size_t nBestFactor = staticData.GetNBestFactor();
|
||||
const std::size_t nBestFactor = staticData.options().nbest.factor;
|
||||
std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
|
||||
|
||||
// Extract the derivations.
|
||||
|
@ -96,9 +96,9 @@ void Manager<RuleMatcher>::Decode()
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// Get various pruning-related constants.
|
||||
const std::size_t popLimit = staticData.GetCubePruningPopLimit();
|
||||
const std::size_t popLimit = this->options().cube.pop_limit;
|
||||
const std::size_t ruleLimit = staticData.GetRuleLimit();
|
||||
const std::size_t stackLimit = staticData.GetMaxHypoStackSize();
|
||||
const std::size_t stackLimit = this->options().search.stack_size;
|
||||
|
||||
// Initialize the stacks.
|
||||
InitializeStacks();
|
||||
@ -214,7 +214,7 @@ void Manager<RuleMatcher>::ExtractKBest(
|
||||
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
||||
// too many translations are identical.
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::size_t nBestFactor = staticData.GetNBestFactor();
|
||||
const std::size_t nBestFactor = staticData.options().nbest.factor;
|
||||
std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
|
||||
|
||||
// Extract the derivations.
|
||||
|
@ -100,7 +100,9 @@ TranslationTask
|
||||
::TranslationTask(boost::shared_ptr<InputType> const& source,
|
||||
boost::shared_ptr<IOWrapper> const& ioWrapper)
|
||||
: m_source(source) , m_ioWrapper(ioWrapper)
|
||||
{ }
|
||||
{
|
||||
m_options = StaticData::Instance().options();
|
||||
}
|
||||
|
||||
TranslationTask::~TranslationTask()
|
||||
{ }
|
||||
@ -112,7 +114,7 @@ TranslationTask
|
||||
{
|
||||
boost::shared_ptr<BaseManager> manager;
|
||||
StaticData const& staticData = StaticData::Instance();
|
||||
if (algo == DefaultSearchAlgorithm) algo = staticData.GetSearchAlgorithm();
|
||||
if (algo == DefaultSearchAlgorithm) algo = staticData.options().search.algo;
|
||||
|
||||
if (!staticData.IsSyntax(algo))
|
||||
manager.reset(new Manager(this->self())); // phrase-based
|
||||
@ -154,6 +156,13 @@ TranslationTask
|
||||
return manager;
|
||||
}
|
||||
|
||||
AllOptions const&
|
||||
TranslationTask::
|
||||
options() const
|
||||
{
|
||||
return m_options;
|
||||
}
|
||||
|
||||
void TranslationTask::Run()
|
||||
{
|
||||
UTIL_THROW_IF2(!m_source || !m_ioWrapper,
|
||||
|
@ -43,8 +43,8 @@ class TranslationTask : public Moses::Task
|
||||
operator=(TranslationTask const& other) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
AllOptions m_options;
|
||||
boost::weak_ptr<TranslationTask> m_self; // weak ptr to myself
|
||||
boost::shared_ptr<ContextScope> m_scope; // sores local info
|
||||
// pointer to ContextScope, which stores context-specific information
|
||||
@ -134,6 +134,7 @@ public:
|
||||
void SetContextWeights(std::string const& context_weights);
|
||||
void ReSetContextWeights(std::map<std::string, float> const& new_weights);
|
||||
|
||||
AllOptions const& options() const;
|
||||
|
||||
protected:
|
||||
boost::shared_ptr<Moses::InputType> m_source;
|
||||
|
31
moses/parameters/AllOptions.cpp
Normal file
31
moses/parameters/AllOptions.cpp
Normal file
@ -0,0 +1,31 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "AllOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
AllOptions::
|
||||
AllOptions(Parameter const& param)
|
||||
{
|
||||
init(param);
|
||||
}
|
||||
|
||||
bool
|
||||
AllOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
if (!search.init(param)) return false;
|
||||
if (!cube.init(param)) return false;
|
||||
if (!nbest.init(param)) return false;
|
||||
if (!reordering.init(param)) return false;
|
||||
if (!context.init(param)) return false;
|
||||
if (!input.init(param)) return false;
|
||||
return sanity_check();
|
||||
}
|
||||
|
||||
bool
|
||||
AllOptions::
|
||||
sanity_check()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
31
moses/parameters/AllOptions.h
Normal file
31
moses/parameters/AllOptions.h
Normal file
@ -0,0 +1,31 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "moses/Parameter.h"
|
||||
#include "SearchOptions.h"
|
||||
#include "CubePruningOptions.h"
|
||||
#include "NBestOptions.h"
|
||||
#include "ReorderingOptions.h"
|
||||
#include "ContextParameters.h"
|
||||
#include "InputOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
struct
|
||||
AllOptions
|
||||
{
|
||||
SearchOptions search;
|
||||
CubePruningOptions cube;
|
||||
NBestOptions nbest;
|
||||
ReorderingOptions reordering;
|
||||
ContextParameters context;
|
||||
InputOptions input;
|
||||
// StackOptions stack;
|
||||
// BeamSearchOptions beam;
|
||||
bool init(Parameter const& param);
|
||||
bool sanity_check();
|
||||
AllOptions() {}
|
||||
AllOptions(Parameter const& param);
|
||||
};
|
||||
|
||||
}
|
15
moses/parameters/BeamSearchOptions.h
Normal file
15
moses/parameters/BeamSearchOptions.h
Normal file
@ -0,0 +1,15 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "moses/Parameter.h"
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
struct
|
||||
BeamSearchOptions
|
||||
{
|
||||
bool init(Parameter const& param);
|
||||
BeamSearchOptions(Parameter const& param);
|
||||
};
|
||||
|
||||
}
|
@ -9,9 +9,9 @@ ContextParameters()
|
||||
: look_ahead(0), look_back(0)
|
||||
{ }
|
||||
|
||||
void
|
||||
bool
|
||||
ContextParameters::
|
||||
init(Parameter& params)
|
||||
init(Parameter const& params)
|
||||
{
|
||||
look_back = look_ahead = 0;
|
||||
params.SetParameter(context_string, "context-string", std::string(""));
|
||||
@ -19,12 +19,12 @@ init(Parameter& params)
|
||||
params.SetParameter(context_window, "context-window", std::string(""));
|
||||
|
||||
if (context_window == "")
|
||||
return;
|
||||
return true;
|
||||
|
||||
if (context_window.substr(0,3) == "all")
|
||||
{
|
||||
look_back = look_ahead = std::numeric_limits<size_t>::max();
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t p = context_window.find_first_of("0123456789");
|
||||
@ -47,5 +47,6 @@ init(Parameter& params)
|
||||
else
|
||||
UTIL_THROW2("Invalid specification of context window.");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ class ContextParameters
|
||||
{
|
||||
public:
|
||||
ContextParameters();
|
||||
void init(Parameter& params);
|
||||
bool init(Parameter const& params);
|
||||
size_t look_ahead; // # of words to look ahead for context-sensitive decoding
|
||||
size_t look_back; // # of works to look back for context-sensitive decoding
|
||||
std::string context_string; // fixed context string specified on command line
|
||||
|
19
moses/parameters/CubePruningOptions.cpp
Normal file
19
moses/parameters/CubePruningOptions.cpp
Normal file
@ -0,0 +1,19 @@
|
||||
// -*- mode: c++; cc-style: gnu -*-
|
||||
#include "CubePruningOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
bool
|
||||
CubePruningOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(pop_limit, "cube-pruning-pop-limit",
|
||||
DEFAULT_CUBE_PRUNING_POP_LIMIT);
|
||||
param.SetParameter(diversity, "cube-pruning-diversity",
|
||||
DEFAULT_CUBE_PRUNING_DIVERSITY);
|
||||
param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
20
moses/parameters/CubePruningOptions.h
Normal file
20
moses/parameters/CubePruningOptions.h
Normal file
@ -0,0 +1,20 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "moses/Parameter.h"
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
struct
|
||||
CubePruningOptions
|
||||
{
|
||||
size_t pop_limit;
|
||||
size_t diversity;
|
||||
bool lazy_scoring;
|
||||
|
||||
bool init(Parameter const& param);
|
||||
CubePruningOptions(Parameter const& param);
|
||||
CubePruningOptions() {};
|
||||
};
|
||||
|
||||
}
|
65
moses/parameters/InputOptions.cpp
Normal file
65
moses/parameters/InputOptions.cpp
Normal file
@ -0,0 +1,65 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "InputOptions.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "moses/StaticData.h"
|
||||
|
||||
namespace Moses {
|
||||
|
||||
InputOptions::
|
||||
InputOptions()
|
||||
{
|
||||
xml_brackets.first = "<";
|
||||
xml_brackets.second = ">";
|
||||
input_type = SentenceInput;
|
||||
}
|
||||
|
||||
bool
|
||||
InputOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(input_type, "inputtype", SentenceInput);
|
||||
if (input_type == SentenceInput)
|
||||
{ VERBOSE(2, "input type is: text input"); }
|
||||
else if (input_type == ConfusionNetworkInput)
|
||||
{ VERBOSE(2, "input type is: confusion net"); }
|
||||
else if (input_type == WordLatticeInput)
|
||||
{ VERBOSE(2, "input type is: word lattice"); }
|
||||
else if (input_type == TreeInputType)
|
||||
{ VERBOSE(2, "input type is: tree"); }
|
||||
else if (input_type == TabbedSentenceInput)
|
||||
{ VERBOSE(2, "input type is: tabbed sentence"); }
|
||||
else if (input_type == ForestInputType)
|
||||
{ VERBOSE(2, "input type is: forest"); }
|
||||
|
||||
param.SetParameter(continue_partial_translation,
|
||||
"continue-partial-translation", false);
|
||||
param.SetParameter(default_non_term_only_for_empty_range,
|
||||
"default-non-term-for-empty-range-only", false);
|
||||
|
||||
param.SetParameter<XmlInputType>(xml_policy, "xml-input", XmlPassThrough);
|
||||
|
||||
// specify XML tags opening and closing brackets for XML option
|
||||
// Do we really want this to be configurable???? UG
|
||||
const PARAM_VEC *pspec;
|
||||
pspec = param.GetParam("xml-brackets");
|
||||
if (pspec && pspec->size())
|
||||
{
|
||||
std::vector<std::string> brackets = Tokenize(pspec->at(0));
|
||||
if(brackets.size()!=2)
|
||||
{
|
||||
std::cerr << "invalid xml-brackets value, "
|
||||
<< "must specify exactly 2 blank-delimited strings "
|
||||
<< "for XML tags opening and closing brackets" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
xml_brackets.first= brackets[0];
|
||||
xml_brackets.second=brackets[1];
|
||||
VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
|
||||
<< xml_brackets.first << " and "
|
||||
<< xml_brackets.second << std::endl);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
25
moses/parameters/InputOptions.h
Normal file
25
moses/parameters/InputOptions.h
Normal file
@ -0,0 +1,25 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "moses/Parameter.h"
|
||||
#include <string>
|
||||
namespace Moses
|
||||
{
|
||||
struct
|
||||
InputOptions
|
||||
{
|
||||
bool continue_partial_translation;
|
||||
bool default_non_term_only_for_empty_range; // whatever that means
|
||||
InputTypeEnum input_type;
|
||||
XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
|
||||
|
||||
std::pair<std::string,std::string> xml_brackets;
|
||||
// strings to use as XML tags' opening and closing brackets.
|
||||
// Default are "<" and ">"
|
||||
|
||||
bool init(Parameter const& param);
|
||||
InputOptions();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
// -*- mode: c++; cc-style: gnu -*-
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
|
||||
namespace Moses
|
||||
|
21
moses/parameters/ReorderingOptions.cpp
Normal file
21
moses/parameters/ReorderingOptions.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "ReorderingOptions.h"
|
||||
|
||||
namespace Moses {
|
||||
|
||||
ReorderingOptions::
|
||||
ReorderingOptions(Parameter const& param)
|
||||
{
|
||||
init(param);
|
||||
}
|
||||
|
||||
bool
|
||||
ReorderingOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(max_distortion, "distortion-limit", -1);
|
||||
param.SetParameter(monotone_at_punct, "monotone-at-punctuation", false);
|
||||
param.SetParameter(use_early_distortion_cost, "early-distortion-cost", false);
|
||||
return true;
|
||||
}
|
||||
}
|
20
moses/parameters/ReorderingOptions.h
Normal file
20
moses/parameters/ReorderingOptions.h
Normal file
@ -0,0 +1,20 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "moses/Parameter.h"
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
struct
|
||||
ReorderingOptions
|
||||
{
|
||||
int max_distortion;
|
||||
bool monotone_at_punct;
|
||||
bool use_early_distortion_cost;
|
||||
bool init(Parameter const& param);
|
||||
ReorderingOptions(Parameter const& param);
|
||||
ReorderingOptions() {}
|
||||
};
|
||||
|
||||
}
|
||||
|
50
moses/parameters/SearchOptions.cpp
Normal file
50
moses/parameters/SearchOptions.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "SearchOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
SearchOptions::
|
||||
SearchOptions(Parameter const& param)
|
||||
: stack_diversity(0)
|
||||
{
|
||||
init(param);
|
||||
}
|
||||
|
||||
bool
|
||||
SearchOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(algo, "search-algorithm", Normal);
|
||||
param.SetParameter(stack_size, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
|
||||
param.SetParameter(stack_diversity, "stack-diversity", size_t(0));
|
||||
param.SetParameter(beam_width, "beam-threshold", DEFAULT_BEAM_WIDTH);
|
||||
param.SetParameter(early_discarding_threshold, "early-discarding-threshold",
|
||||
DEFAULT_EARLY_DISCARDING_THRESHOLD);
|
||||
param.SetParameter(timeout, "time-out", 0);
|
||||
param.SetParameter(max_phrase_length, "max-phrase-length",
|
||||
DEFAULT_MAX_PHRASE_LENGTH);
|
||||
param.SetParameter(trans_opt_threshold, "translation-option-threshold",
|
||||
DEFAULT_TRANSLATION_OPTION_THRESHOLD);
|
||||
param.SetParameter(max_trans_opt_per_cov, "max-trans-opt-per-coverage",
|
||||
DEFAULT_MAX_TRANS_OPT_SIZE);
|
||||
param.SetParameter(max_partial_trans_opt, "max-partial-trans-opt",
|
||||
DEFAULT_MAX_PART_TRANS_OPT_SIZE);
|
||||
|
||||
|
||||
// transformation to log of a few scores
|
||||
beam_width = TransformScore(beam_width);
|
||||
trans_opt_threshold = TransformScore(trans_opt_threshold);
|
||||
early_discarding_threshold = TransformScore(early_discarding_threshold);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
is_syntax(SearchAlgorithm algo)
|
||||
{
|
||||
return (algo == CYKPlus || algo == ChartIncremental ||
|
||||
algo == SyntaxS2T || algo == SyntaxT2S ||
|
||||
algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
|
||||
}
|
||||
|
||||
|
||||
}
|
44
moses/parameters/SearchOptions.h
Normal file
44
moses/parameters/SearchOptions.h
Normal file
@ -0,0 +1,44 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "moses/Parameter.h"
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
bool is_syntax(SearchAlgorithm algo);
|
||||
|
||||
struct
|
||||
SearchOptions
|
||||
{
|
||||
SearchAlgorithm algo;
|
||||
|
||||
// stack decoding
|
||||
size_t stack_size; // maxHypoStackSize;
|
||||
size_t stack_diversity; // minHypoStackDiversity;
|
||||
|
||||
size_t max_phrase_length;
|
||||
size_t max_trans_opt_per_cov;
|
||||
size_t max_partial_trans_opt;
|
||||
// beam search
|
||||
float beam_width;
|
||||
|
||||
int timeout;
|
||||
|
||||
// reordering options
|
||||
// bool reorderingConstraint; //! use additional reordering constraints
|
||||
// bool useEarlyDistortionCost;
|
||||
|
||||
float early_discarding_threshold;
|
||||
float trans_opt_threshold;
|
||||
|
||||
bool init(Parameter const& param);
|
||||
SearchOptions(Parameter const& param);
|
||||
SearchOptions() {}
|
||||
|
||||
bool UseEarlyDiscarding() const {
|
||||
return early_discarding_threshold != -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -57,7 +57,7 @@ die "Cannot locate input at $input" unless (-f $input);
|
||||
my $local_moses_ini = MosesRegressionTesting::get_localized_moses_ini($conf, $data_dir, $results_dir);
|
||||
my ($nbestfile,$nbestsize) = MosesRegressionTesting::get_nbestlist($conf);
|
||||
|
||||
if (defined($nbestsize) && $nbestsize > 0){
|
||||
if (defined($nbestsize) && $nbestsize > 0) {
|
||||
$NBEST=$nbestsize;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user