mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-05 15:58:03 +03:00
Code cleanup and refactoring.
This commit is contained in:
parent
b899ab8175
commit
bb6e0157aa
@ -101,7 +101,7 @@ OutputSurface(std::ostream &out, Phrase const& phrase) const
|
||||
std::vector<FactorType> const& factor_order = options()->output.factor_order;
|
||||
|
||||
bool markUnknown = options()->unk.mark;
|
||||
std::string const& fd = options()->output.FactorDelimiter;
|
||||
std::string const& fd = options()->output.factor_delimiter;
|
||||
|
||||
size_t size = phrase.GetSize();
|
||||
for (size_t pos = 0 ; pos < size ; pos++) {
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "moses/OutputCollector.h"
|
||||
#include "moses/ChartKBestExtractor.h"
|
||||
#include "moses/HypergraphOutput.h"
|
||||
#include "moses/TranslationTask.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -52,7 +53,7 @@ ChartManager::ChartManager(ttasksptr const& ttask)
|
||||
, m_start(clock())
|
||||
, m_hypothesisId(0)
|
||||
, m_parser(ttask, m_hypoStackColl)
|
||||
, m_translationOptionList(StaticData::Instance().GetRuleLimit(), m_source)
|
||||
, m_translationOptionList(ttask->options()->syntax.rule_limit, m_source)
|
||||
{ }
|
||||
|
||||
ChartManager::~ChartManager()
|
||||
|
@ -237,7 +237,10 @@ void ChartParser::CreateInputPaths(const InputType &input)
|
||||
m_inputPathMatrix.resize(size);
|
||||
|
||||
UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType,
|
||||
"Input must be a sentence or a tree, not lattice or confusion networks");
|
||||
"Input must be a sentence or a tree, " <<
|
||||
"not lattice or confusion networks");
|
||||
|
||||
TranslationTask const* ttask = m_ttask.lock().get();
|
||||
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
|
||||
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
|
||||
size_t endPos = startPos + phaseSize -1;
|
||||
@ -249,11 +252,11 @@ void ChartParser::CreateInputPaths(const InputType &input)
|
||||
|
||||
InputPath *node;
|
||||
if (range.GetNumWordsCovered() == 1) {
|
||||
node = new InputPath(m_ttask, subphrase, labels, range, NULL, NULL);
|
||||
node = new InputPath(ttask, subphrase, labels, range, NULL, NULL);
|
||||
vec.push_back(node);
|
||||
} else {
|
||||
const InputPath &prevNode = GetInputPath(startPos, endPos - 1);
|
||||
node = new InputPath(m_ttask, subphrase, labels, range, &prevNode, NULL);
|
||||
node = new InputPath(ttask, subphrase, labels, range, &prevNode, NULL);
|
||||
vec.push_back(node);
|
||||
}
|
||||
|
||||
|
@ -68,7 +68,7 @@ void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input,
|
||||
{
|
||||
SetInputPath(&inputPath);
|
||||
// if (StaticData::Instance().GetPlaceholderFactor() != NOT_FOUND) {
|
||||
if (inputPath.ttask.lock()->options()->input.placeholder_factor != NOT_FOUND) {
|
||||
if (inputPath.ttask->options()->input.placeholder_factor != NOT_FOUND) {
|
||||
CreateSourceRuleFromInputPath();
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ protected:
|
||||
#endif
|
||||
SPTR<std::map<std::string,float> const> m_context_weights;
|
||||
public:
|
||||
|
||||
typedef boost::shared_ptr<ContextScope> ptr;
|
||||
template<typename T>
|
||||
boost::shared_ptr<void> const&
|
||||
set(void const* const key, boost::shared_ptr<T> const& val) {
|
||||
|
@ -55,7 +55,9 @@ void GlobalLexicalModel::Load(AllOptions::ptr const& opts)
|
||||
{
|
||||
m_options = opts;
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||
const std::string& oFactorDelimiter = opts->output.factor_delimiter;
|
||||
const std::string& iFactorDelimiter = opts->input.factor_delimiter;
|
||||
|
||||
|
||||
VERBOSE(2, "Loading global lexical model from file " << m_filePath << endl);
|
||||
|
||||
@ -76,21 +78,23 @@ void GlobalLexicalModel::Load(AllOptions::ptr const& opts)
|
||||
|
||||
// create the output word
|
||||
Word *outWord = new Word();
|
||||
vector<string> factorString = Tokenize( token[0], factorDelimiter );
|
||||
vector<string> factorString = Tokenize( token[0], oFactorDelimiter );
|
||||
for (size_t i=0 ; i < m_outputFactorsVec.size() ; i++) {
|
||||
const FactorDirection& direction = Output;
|
||||
const FactorType& factorType = m_outputFactorsVec[i];
|
||||
const Factor* factor = factorCollection.AddFactor( direction, factorType, factorString[i] );
|
||||
const Factor* factor
|
||||
= factorCollection.AddFactor( direction, factorType, factorString[i] );
|
||||
outWord->SetFactor( factorType, factor );
|
||||
}
|
||||
|
||||
// create the input word
|
||||
Word *inWord = new Word();
|
||||
factorString = Tokenize( token[1], factorDelimiter );
|
||||
factorString = Tokenize( token[1], iFactorDelimiter );
|
||||
for (size_t i=0 ; i < m_inputFactorsVec.size() ; i++) {
|
||||
const FactorDirection& direction = Input;
|
||||
const FactorType& factorType = m_inputFactorsVec[i];
|
||||
const Factor* factor = factorCollection.AddFactor( direction, factorType, factorString[i] );
|
||||
const Factor* factor
|
||||
= factorCollection.AddFactor( direction, factorType, factorString[i] );
|
||||
inWord->SetFactor( factorType, factor );
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
InputPath::
|
||||
InputPath(ttaskwptr const theTask,
|
||||
InputPath(TranslationTask const* theTask,
|
||||
Phrase const& phrase,
|
||||
NonTerminalSet const& sourceNonTerms,
|
||||
Range const& range, InputPath const *prevNode,
|
||||
|
@ -40,7 +40,8 @@ public:
|
||||
TargetPhrases;
|
||||
|
||||
public:
|
||||
ttaskwptr const ttask;
|
||||
// ttaskwptr const ttask;
|
||||
TranslationTask const* ttask;
|
||||
protected:
|
||||
const InputPath *m_prevPath;
|
||||
Phrase m_phrase;
|
||||
@ -65,7 +66,7 @@ public:
|
||||
, m_nextNode(NOT_FOUND) {
|
||||
}
|
||||
|
||||
InputPath(ttaskwptr const ttask,
|
||||
InputPath(TranslationTask const* ttask, // ttaskwptr const ttask,
|
||||
Phrase const& phrase,
|
||||
NonTerminalSet const& sourceNonTerms,
|
||||
Range const& range,
|
||||
|
@ -123,7 +123,6 @@ public:
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"the",
|
||||
// StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||
@ -150,7 +149,6 @@ public:
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"the licenses",
|
||||
// StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 2 );
|
||||
@ -179,7 +177,6 @@ public:
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"the licenses for",
|
||||
// StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 3 );
|
||||
@ -206,7 +203,6 @@ public:
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"the licenses for most",
|
||||
// StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 4 );
|
||||
@ -252,7 +248,6 @@ public:
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"the",
|
||||
// StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||
@ -280,7 +275,6 @@ public:
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"licenses",
|
||||
// StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||
@ -308,7 +302,6 @@ public:
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"for",
|
||||
// StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||
@ -336,7 +329,6 @@ public:
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"most",
|
||||
// StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||
|
@ -1699,7 +1699,7 @@ OutputSurface(std::ostream &out, Hypothesis const& edge, bool const recursive) c
|
||||
}
|
||||
|
||||
bool markUnknown = options()->unk.mark;
|
||||
std::string const& fd = options()->output.FactorDelimiter;
|
||||
std::string const& fd = options()->output.factor_delimiter;
|
||||
|
||||
TargetPhrase const& phrase = edge.GetCurrTargetPhrase();
|
||||
size_t size = phrase.GetSize();
|
||||
|
@ -116,6 +116,7 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
||||
|
||||
|
||||
// convert into TargetPhrases
|
||||
std::string fd = m_obj->options()->output.factor_delimiter;
|
||||
for(size_t i=0; i<cands.size(); ++i) {
|
||||
TargetPhrase targetPhrase(m_obj);
|
||||
|
||||
@ -134,7 +135,8 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_obj, *cands[i].fnames[j], cands[i].fvalues[j]);
|
||||
}
|
||||
|
||||
CreateTargetPhrase(targetPhrase,factorStrings,scoreVector, Scores(0), &wacands[i], &src);
|
||||
CreateTargetPhrase(targetPhrase,factorStrings, fd, scoreVector, Scores(0),
|
||||
&wacands[i], &src);
|
||||
|
||||
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
||||
tCands.push_back(targetPhrase);
|
||||
@ -375,6 +377,7 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
|
||||
TargetPhrase targetPhrase(m_obj);
|
||||
CreateTargetPhrase(targetPhrase
|
||||
, j ->first
|
||||
, m_obj->options()->output.factor_delimiter
|
||||
, scores.transScore
|
||||
, scores.inputScores
|
||||
, NULL
|
||||
@ -403,6 +406,7 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
|
||||
|
||||
void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
|
||||
StringTgtCand::Tokens const& factorStrings,
|
||||
std::string const& factorDelimiter,
|
||||
Scores const& transVector,
|
||||
Scores const& inputVector,
|
||||
const std::string *alignmentString,
|
||||
@ -411,7 +415,8 @@ void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
for(size_t k=0; k<factorStrings.size(); ++k) {
|
||||
util::TokenIter<util::MultiCharacter, false> word(*factorStrings[k], StaticData::Instance().GetFactorDelimiter());
|
||||
util::TokenIter<util::MultiCharacter, false>
|
||||
word(*factorStrings[k], factorDelimiter);
|
||||
Word& w=targetPhrase.AddWord();
|
||||
for(size_t l=0; l<m_output.size(); ++l, ++word) {
|
||||
w[m_output[l]]= factorCollection.AddFactor(*word);
|
||||
|
@ -116,6 +116,7 @@ public:
|
||||
|
||||
void CreateTargetPhrase(TargetPhrase& targetPhrase,
|
||||
StringTgtCand::Tokens const& factorStrings,
|
||||
std::string const& factorDelimiter,
|
||||
Scores const& transVector,
|
||||
Scores const& inputVector,
|
||||
const std::string *alignmentString,
|
||||
|
@ -55,18 +55,6 @@ protected:
|
||||
|
||||
public:
|
||||
|
||||
// /// return shared pointer to ttask
|
||||
// // only TargetPhrases have non-NULL ttaskptrs!
|
||||
// virtual ttasksptr GetTtask() const {
|
||||
// return ttasksptr();
|
||||
// }
|
||||
|
||||
// /// check if this phrase belongs to a valid ttask
|
||||
// // only TargetPhrases have non-NULL ttaskptrs!
|
||||
// virtual bool HasTtaskSPtr() const {
|
||||
// return false;
|
||||
// }
|
||||
|
||||
virtual bool HasScope() const {
|
||||
return false;
|
||||
}
|
||||
|
@ -129,18 +129,10 @@ StaticData
|
||||
::ini_output_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
|
||||
// verbose level
|
||||
m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
|
||||
|
||||
|
||||
|
||||
m_parameter->SetParameter<string>(m_outputUnknownsFile,
|
||||
"output-unknowns", "");
|
||||
|
||||
// m_parameter->SetParameter<long>(m_startTranslationId,
|
||||
// "start-translation-id", 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -208,12 +200,6 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
// threading etc.
|
||||
if (!ini_performance_options()) return false;
|
||||
|
||||
// Compact phrase table and reordering model
|
||||
// m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false );
|
||||
// m_parameter->SetParameter(m_minlexrMemory, "minlexr-memory", false );
|
||||
|
||||
// S2T decoder
|
||||
|
||||
// FEATURE FUNCTION INITIALIZATION HAPPENS HERE ===============================
|
||||
|
||||
// set class-specific default parameters
|
||||
@ -324,8 +310,6 @@ void StaticData::LoadChartDecodingParameters()
|
||||
// source label overlap
|
||||
m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap",
|
||||
SourceLabelOverlapAdd);
|
||||
m_parameter->SetParameter(m_ruleLimit, "rule-limit",
|
||||
DEFAULT_MAX_TRANS_OPT_SIZE);
|
||||
|
||||
}
|
||||
|
||||
@ -621,7 +605,6 @@ void StaticData::LoadFeatureFunctions()
|
||||
m_requireSortingAfterSourceContext = true;
|
||||
}
|
||||
|
||||
// if (PhraseDictionary *ffCast = dynamic_cast<PhraseDictionary*>(ff)) {
|
||||
if (dynamic_cast<PhraseDictionary*>(ff)) {
|
||||
doLoad = false;
|
||||
}
|
||||
|
@ -104,12 +104,6 @@ protected:
|
||||
|
||||
std::string m_outputUnknownsFile; //! output unknowns in this file
|
||||
|
||||
size_t m_ruleLimit;
|
||||
|
||||
// Whether to load compact phrase table and reordering table into memory
|
||||
bool m_minphrMemory;
|
||||
bool m_minlexrMemory;
|
||||
|
||||
// Initial = 0 = can be used when creating poss trans
|
||||
// Other = 1 = used to calculate LM score once all steps have been processed
|
||||
Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
|
||||
@ -146,7 +140,6 @@ protected:
|
||||
|
||||
void NoCache();
|
||||
|
||||
bool m_continuePartialTranslation;
|
||||
std::string m_binPath;
|
||||
|
||||
// soft NT lookup for chart models
|
||||
@ -252,17 +245,10 @@ public:
|
||||
return m_unknownLHS;
|
||||
}
|
||||
|
||||
size_t GetRuleLimit() const {
|
||||
return m_ruleLimit;
|
||||
}
|
||||
float GetRuleCountThreshold() const {
|
||||
return 999999; /* TODO wtf! */
|
||||
}
|
||||
|
||||
bool ContinuePartialTranslation() const {
|
||||
return m_continuePartialTranslation;
|
||||
}
|
||||
|
||||
void ReLoadBleuScoreFeatureParameter(float weight);
|
||||
|
||||
Parameter* GetParameter() {
|
||||
@ -273,10 +259,6 @@ public:
|
||||
return m_threadCount;
|
||||
}
|
||||
|
||||
// long GetStartTranslationId() const {
|
||||
// return m_startTranslationId;
|
||||
// }
|
||||
|
||||
void SetExecPath(const std::string &path);
|
||||
const std::string &GetBinDirectory() const;
|
||||
|
||||
|
@ -60,9 +60,9 @@ void Manager<RuleMatcher>::Decode()
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// Get various pruning-related constants.
|
||||
const std::size_t popLimit = staticData.options()->cube.pop_limit;
|
||||
const std::size_t ruleLimit = staticData.GetRuleLimit();
|
||||
const std::size_t stackLimit = staticData.options()->search.stack_size;
|
||||
const std::size_t popLimit = options()->cube.pop_limit;
|
||||
const std::size_t ruleLimit = options()->syntax.rule_limit;
|
||||
const std::size_t stackLimit = options()->search.stack_size;
|
||||
|
||||
// Initialize the stacks.
|
||||
InitializeStacks();
|
||||
|
@ -163,9 +163,9 @@ void Manager<Parser>::Decode()
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// Get various pruning-related constants.
|
||||
const std::size_t popLimit = staticData.options()->cube.pop_limit;
|
||||
const std::size_t ruleLimit = staticData.GetRuleLimit();
|
||||
const std::size_t stackLimit = staticData.options()->search.stack_size;
|
||||
const std::size_t popLimit = options()->cube.pop_limit;
|
||||
const std::size_t ruleLimit = options()->syntax.rule_limit;
|
||||
const std::size_t stackLimit = options()->search.stack_size;
|
||||
|
||||
// Initialise the PChart and SChart.
|
||||
InitializeCharts();
|
||||
|
@ -94,11 +94,11 @@ void Manager<RuleMatcher>::InitializeStacks()
|
||||
template<typename RuleMatcher>
|
||||
void Manager<RuleMatcher>::Decode()
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
// Get various pruning-related constants.
|
||||
const std::size_t popLimit = this->options()->cube.pop_limit;
|
||||
const std::size_t ruleLimit = staticData.GetRuleLimit();
|
||||
const std::size_t ruleLimit = this->options()->syntax.rule_limit;
|
||||
const std::size_t stackLimit = this->options()->search.stack_size;
|
||||
|
||||
// Initialize the stacks.
|
||||
@ -215,8 +215,8 @@ void Manager<RuleMatcher>::ExtractKBest(
|
||||
// than k. The k-best factor determines how much bigger the limit should be,
|
||||
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
||||
// too many translations are identical.
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::size_t nBestFactor = staticData.options()->nbest.factor;
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
const std::size_t nBestFactor = this->options()->nbest.factor;
|
||||
std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
|
||||
|
||||
// Extract the derivations.
|
||||
|
@ -44,9 +44,6 @@ bool RuleTrieLoader::Load(Moses::AllOptions const& opts,
|
||||
{
|
||||
PrintUserTime(std::string("Start loading text phrase table. Moses format"));
|
||||
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
// const std::string &factorDelimiter = staticData.GetFactorDelimiter();
|
||||
|
||||
std::size_t count = 0;
|
||||
|
||||
std::ostream *progress = NULL;
|
||||
|
@ -53,7 +53,6 @@ TargetPhrase::TargetPhrase( std::string out_string, const PhraseDictionary *pt)
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
// XXX should this really be InputFactorOrder???
|
||||
CreateFromString(Output, staticData.options()->input.factor_order, out_string,
|
||||
// staticData.GetFactorDelimiter(), // eliminated [UG]
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
@ -45,8 +45,8 @@ ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory(
|
||||
{
|
||||
|
||||
size_t sourceSize = parser.GetSize();
|
||||
|
||||
m_completedRules.resize(sourceSize);
|
||||
size_t ruleLimit = parser.options()->syntax.rule_limit;
|
||||
m_completedRules.resize(sourceSize, CompletedRuleCollection(ruleLimit));
|
||||
|
||||
m_isSoftMatching = !m_softMatchingMap.empty();
|
||||
}
|
||||
|
@ -45,8 +45,8 @@ ChartRuleLookupManagerMemoryPerSentence::ChartRuleLookupManagerMemoryPerSentence
|
||||
{
|
||||
|
||||
size_t sourceSize = parser.GetSize();
|
||||
|
||||
m_completedRules.resize(sourceSize);
|
||||
size_t ruleLimit = parser.options()->syntax.rule_limit;
|
||||
m_completedRules.resize(sourceSize, CompletedRuleCollection(ruleLimit));
|
||||
|
||||
m_isSoftMatching = !m_softMatchingMap.empty();
|
||||
}
|
||||
|
@ -27,7 +27,8 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
CompletedRuleCollection::CompletedRuleCollection() : m_ruleLimit(StaticData::Instance().GetRuleLimit())
|
||||
CompletedRuleCollection::CompletedRuleCollection(size_t rule_limit)
|
||||
: m_ruleLimit(rule_limit)
|
||||
{
|
||||
m_scoreThreshold = numeric_limits<float>::infinity();
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ public:
|
||||
struct CompletedRuleCollection {
|
||||
public:
|
||||
|
||||
CompletedRuleCollection();
|
||||
CompletedRuleCollection(size_t rule_limit);
|
||||
~CompletedRuleCollection();
|
||||
|
||||
CompletedRuleCollection(const CompletedRuleCollection &old)
|
||||
|
@ -35,16 +35,6 @@ namespace Moses
|
||||
{
|
||||
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
|
||||
|
||||
// CacheColl::~CacheColl()
|
||||
// {
|
||||
// // not needed any more since the switch to shared pointers
|
||||
// // for (iterator iter = begin(); iter != end(); ++iter) {
|
||||
// // std::pair<TargetPhraseCollection::shared_ptr , clock_t> &key = iter->second;
|
||||
// // TargetPhraseCollection::shared_ptr tps = key.first;
|
||||
// // delete tps;
|
||||
// // }
|
||||
// }
|
||||
|
||||
PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow)
|
||||
: DecodeFeature(line, registerNow)
|
||||
, m_tableLimit(20) // default
|
||||
@ -82,8 +72,7 @@ GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
ret.reset(new TargetPhraseCollection(*ret));
|
||||
}
|
||||
cache[hash] = entry(ret, clock());
|
||||
} else {
|
||||
// in cache. just use it
|
||||
} else { // in cache. just use it
|
||||
iter->second.second = clock();
|
||||
ret = iter->second.first;
|
||||
}
|
||||
@ -175,31 +164,6 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
||||
}
|
||||
}
|
||||
|
||||
// persistent cache handling
|
||||
// saving presistent cache to disk
|
||||
//void PhraseDictionary::SaveCache() const
|
||||
//{
|
||||
// CacheColl &cache = GetCache();
|
||||
// for( std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter,
|
||||
// iter != cache.end(),
|
||||
// iter++ ) {
|
||||
//
|
||||
// }
|
||||
//}
|
||||
|
||||
// loading persistent cache from disk
|
||||
//void PhraseDictionary::LoadCache() const
|
||||
//{
|
||||
// CacheColl &cache = GetCache();
|
||||
// std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter;
|
||||
// iter = cache.begin();
|
||||
// while( iter != cache.end() ) {
|
||||
// std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iterRemove = iter++;
|
||||
// delete iterRemove->second.first;
|
||||
// cache.erase(iterRemove);
|
||||
// }
|
||||
//}
|
||||
|
||||
// reduce presistent cache by half of maximum size
|
||||
void PhraseDictionary::ReduceCache() const
|
||||
{
|
||||
@ -233,7 +197,9 @@ void PhraseDictionary::ReduceCache() const
|
||||
<< reduceCacheTime << " seconds." << std::endl);
|
||||
}
|
||||
|
||||
CacheColl &PhraseDictionary::GetCache() const
|
||||
CacheColl &
|
||||
PhraseDictionary::
|
||||
GetCache() const
|
||||
{
|
||||
CacheColl *cache;
|
||||
cache = m_cache.get();
|
||||
|
@ -44,6 +44,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "moses/TargetPhraseCollection.h"
|
||||
#include "moses/InputPath.h"
|
||||
#include "moses/FF/DecodeFeature.h"
|
||||
#include "moses/ContextScope.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -58,15 +59,6 @@ class ChartParser;
|
||||
// typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> TPCollLastUse;
|
||||
typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> CacheCollEntry;
|
||||
typedef boost::unordered_map<size_t, CacheCollEntry> CacheColl;
|
||||
// class CacheColl : public boost::unordered_map<size_t, TPCollLastUse>
|
||||
// {
|
||||
// // 1st = hash of source phrase/ address of phrase-table node
|
||||
// // 2nd = all translations
|
||||
// // 3rd = time of last access
|
||||
|
||||
// public:
|
||||
// ~CacheColl();
|
||||
// };
|
||||
|
||||
/**
|
||||
* Abstract base class for phrase dictionaries (tables).
|
||||
|
@ -330,7 +330,6 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseStr
|
||||
{
|
||||
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl);
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
// const std::string& factorDelimiter = staticData.GetFactorDelimiter();
|
||||
Phrase sourcePhrase(0);
|
||||
Phrase targetPhrase(0);
|
||||
|
||||
@ -428,7 +427,6 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector<std::string> ent
|
||||
{
|
||||
VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
// const std::string& factorDelimiter = staticData.GetFactorDelimiter();
|
||||
Phrase sourcePhrase(0);
|
||||
|
||||
std::vector<std::string>::iterator it;
|
||||
@ -517,7 +515,6 @@ void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, s
|
||||
{
|
||||
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)" << std::endl);
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
// const std::string& factorDelimiter = staticData.GetFactorDelimiter();
|
||||
Phrase sourcePhrase(0);
|
||||
TargetPhrase targetPhrase(0);
|
||||
|
||||
|
@ -144,7 +144,10 @@ TargetPhraseCollection::shared_ptr PhraseDictionaryMultiModelCounts::GetTargetPh
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryMultiModelCounts::CollectSufficientStats(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats) const
|
||||
void
|
||||
PhraseDictionaryMultiModelCounts::
|
||||
CollectSufficientStats(const Phrase& src, vector<float> &fs,
|
||||
map<string,multiModelCountsStats*>* allStats) const
|
||||
//fill fs and allStats with statistics from models
|
||||
{
|
||||
for(size_t i = 0; i < m_numModels; ++i) {
|
||||
|
@ -77,7 +77,7 @@ void ProbingPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQue
|
||||
InputPath &inputPath = **iter;
|
||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||
|
||||
if (sourcePhrase.GetSize() > StaticData::Instance().options()->search.max_phrase_length) {
|
||||
if (sourcePhrase.GetSize() > m_options->search.max_phrase_length) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -63,9 +63,10 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
|
||||
const ScorePair &scores = col[i].second;
|
||||
ScorePair *inputScore = new ScorePair(scores);
|
||||
|
||||
InputPath *path = new InputPath(ttask, subphrase, labels, range, NULL, inputScore);
|
||||
InputPath* path = new InputPath(ttask.get(), subphrase, labels,
|
||||
range, NULL, inputScore);
|
||||
list.push_back(path);
|
||||
|
||||
|
||||
m_inputPathQueue.push_back(path);
|
||||
}
|
||||
}
|
||||
@ -114,7 +115,8 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
|
||||
ScorePair *inputScore = new ScorePair(*prevInputScore);
|
||||
inputScore->PlusEquals(scores);
|
||||
|
||||
InputPath *path = new InputPath(ttask, subphrase, labels, range, &prevPath, inputScore);
|
||||
InputPath *path = new InputPath(ttask.get(), subphrase, labels, range,
|
||||
&prevPath, inputScore);
|
||||
list.push_back(path);
|
||||
|
||||
m_inputPathQueue.push_back(path);
|
||||
|
@ -65,7 +65,7 @@ TranslationOptionCollectionLattice
|
||||
ScorePair *inputScore = new ScorePair(scores);
|
||||
|
||||
InputPath *path
|
||||
= new InputPath(ttask, subphrase, labels, range, NULL, inputScore);
|
||||
= new InputPath(ttask.get(), subphrase, labels, range, NULL, inputScore);
|
||||
|
||||
path->SetNextNode(nextNode);
|
||||
m_inputPathQueue.push_back(path);
|
||||
|
@ -56,11 +56,11 @@ TranslationOptionCollectionText(ttasksptr const& ttask, Sentence const &input)
|
||||
|
||||
InputPath *path;
|
||||
if (range.GetNumWordsCovered() == 1) {
|
||||
path = new InputPath(ttask, subphrase, labels, range, NULL, NULL);
|
||||
path = new InputPath(ttask.get(), subphrase, labels, range, NULL, NULL);
|
||||
vec.push_back(path);
|
||||
} else {
|
||||
const InputPath &prevPath = GetInputPath(startPos, endPos - 1);
|
||||
path = new InputPath(ttask, subphrase, labels, range, &prevPath, NULL);
|
||||
path = new InputPath(ttask.get(), subphrase, labels, range, &prevPath, NULL);
|
||||
vec.push_back(path);
|
||||
}
|
||||
|
||||
|
@ -213,7 +213,8 @@ TO_STRING_BODY(Word);
|
||||
ostream& operator<<(ostream& out, const Word& word)
|
||||
{
|
||||
util::StringStream strme;
|
||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||
const std::string& factorDelimiter
|
||||
= StaticData::Instance().options()->output.factor_delimiter;
|
||||
bool firstPass = true;
|
||||
unsigned int stop = max_fax();
|
||||
for (unsigned int currFactor = 0 ; currFactor < stop; currFactor++) {
|
||||
|
@ -17,6 +17,7 @@ namespace Moses {
|
||||
xml_brackets.first = "<";
|
||||
xml_brackets.second = ">";
|
||||
factor_order.assign(1,0);
|
||||
factor_delimiter = "|";
|
||||
}
|
||||
|
||||
bool
|
||||
@ -76,6 +77,7 @@ namespace Moses {
|
||||
if (factor_order.empty()) factor_order.assign(1,0);
|
||||
param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND);
|
||||
|
||||
param.SetParameter<std::string>(factor_delimiter, "factor-delimiter", "|");
|
||||
param.SetParameter<std::string>(input_file_path,"input-file","");
|
||||
|
||||
return true;
|
||||
|
@ -14,10 +14,9 @@ namespace Moses
|
||||
InputTypeEnum input_type;
|
||||
XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
|
||||
std::vector<FactorType> factor_order; // input factor order
|
||||
|
||||
std::string factor_delimiter;
|
||||
FactorType placeholder_factor; // where to store original text for placeholders
|
||||
std::string input_file_path;
|
||||
|
||||
std::pair<std::string,std::string> xml_brackets;
|
||||
// strings to use as XML tags' opening and closing brackets.
|
||||
// Default are "<" and ">"
|
||||
|
@ -24,6 +24,7 @@ namespace Moses {
|
||||
, lattice_sample_size(0)
|
||||
{
|
||||
factor_order.assign(1,0);
|
||||
factor_delimiter = "|";
|
||||
}
|
||||
|
||||
bool
|
||||
@ -94,14 +95,14 @@ namespace Moses {
|
||||
params= param.GetParam("output-factors");
|
||||
if (params) factor_order = Scan<FactorType>(*params);
|
||||
if (factor_order.empty()) factor_order.assign(1,0);
|
||||
|
||||
|
||||
if (ReportAllFactors) {
|
||||
for (size_t i = 1; i < MAX_NUM_FACTORS; ++i)
|
||||
factor_order.push_back(i);
|
||||
}
|
||||
|
||||
param.SetParameter(FactorDelimiter, "factor-delimiter", std::string("|"));
|
||||
param.SetParameter(FactorDelimiter, "output-factor-delimiter", FactorDelimiter);
|
||||
param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|"));
|
||||
param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -134,12 +135,12 @@ namespace Moses {
|
||||
|
||||
m = param.find("factor-delimiter");
|
||||
if (m != param.end()) {
|
||||
FactorDelimiter = Trim(xmlrpc_c::value_string(m->second));
|
||||
factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
|
||||
}
|
||||
|
||||
m = param.find("output-factor-delimiter");
|
||||
if (m != param.end()) {
|
||||
FactorDelimiter = Trim(xmlrpc_c::value_string(m->second));
|
||||
factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -13,6 +13,7 @@ namespace Moses
|
||||
long start_translation_id;
|
||||
|
||||
std::vector<FactorType> factor_order;
|
||||
std::string factor_delimiter;
|
||||
|
||||
bool ReportAllFactors; // m_reportAllFactors;
|
||||
int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
|
||||
@ -23,7 +24,6 @@ namespace Moses
|
||||
|
||||
WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
|
||||
std::string AlignmentOutputFile;
|
||||
std::string FactorDelimiter;
|
||||
|
||||
bool WordGraph;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user