Merge ../mosesdecoder into perf_moses2

This commit is contained in:
Hieu Hoang 2015-11-26 20:52:02 +00:00
commit 533aff9d53
56 changed files with 244 additions and 216 deletions

View File

@ -25,6 +25,7 @@
#include <iterator>
#include <cassert>
#include "moses/InputFileStream.h"
#include "moses/Timer.h"
#include "moses/Util.h"
#include "OnDiskWrapper.h"
#include "SourcePhrase.h"

View File

@ -0,0 +1,10 @@
#!/bin/bash
git submodule init
git submodule update regtest
while [ true ] ; do
./bjam -j$(nproc) --with-irstlm=$(pwd)/opt --with-boost=$(pwd)/opt --with-cmph=$(pwd)/opt --with-xmlrpc-c=$(pwd)/opt --with-regtest=$(pwc)/regtest -a -q $@ && break
commit=$(git log | grep ^commit | head -n2 | tail -n1 | sed 's/commit //')
echo REVERTING TO COMMIT $commit
git checkout $commit
done

View File

@ -19,6 +19,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Decoder.h"
#include "moses/Manager.h"
#include "moses/Timer.h"
#include "moses/ChartManager.h"
#include "moses/Sentence.h"
#include "moses/InputType.h"
@ -74,6 +75,7 @@ MosesDecoder::MosesDecoder(const string& inifile, int debuglevel, int argc, vect
cerr << "Loading static data failed, exit." << endl;
exit(1);
}
ResetUserTime();
StaticData::LoadDataStatic(params, "mira");
for (int i = 0; i < BASE_ARGC; ++i) {
delete[] mosesargv[i];

View File

@ -49,6 +49,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "IOWrapper.h"
#include "LatticeMBR.h"
#include "Manager.h"
#include "Timer.h"
#include "StaticData.h"
#include "util/exception.hh"
@ -152,6 +153,7 @@ int main(int argc, char* argv[])
params->Explain();
exit(1);
}
ResetUserTime();
if (!StaticData::LoadDataStatic(params, argv[0])) {
exit(1);
}

View File

@ -36,6 +36,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "IOWrapper.h"
#include "StaticData.h"
#include "Util.h"
#include "Timer.h"
#include "ThreadPool.h"
#include "TranslationAnalysis.h"
#include "OutputCollector.h"
@ -191,6 +192,7 @@ int main(int argc, char** argv)
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
ResetUserTime();
if (!StaticData::LoadDataStatic(params, argv[0])) {
exit(1);
}

View File

@ -38,6 +38,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/InputPath.h"
#include "moses/Parameter.h"
#include "moses/TranslationModel/PhraseDictionary.h"
#include "moses/Timer.h"
#include "moses/StaticData.h"
#include "util/file_piece.hh"
@ -150,6 +151,7 @@ int main(int argc, char const** argv)
exit(1);
}
ResetUserTime();
if (!StaticData::LoadDataStatic(params.get(),argv[0])) {
exit(1);
}

View File

@ -49,6 +49,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/IOWrapper.h"
#include "moses/LatticeMBR.h"
#include "moses/Manager.h"
#include "moses/Timer.h"
#include "moses/StaticData.h"
#include "util/exception.hh"
@ -155,6 +156,8 @@ int main(int argc, char const* argv[])
params->Explain();
exit(1);
}
ResetUserTime();
if (!StaticData::LoadDataStatic(params, argv[0])) {
exit(1);
}

View File

@ -103,6 +103,7 @@ int main(int argc, char const** argv)
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
ResetUserTime();
if (!StaticData::LoadDataStatic(&params, argv[0])) {
exit(1);
}

View File

@ -224,7 +224,7 @@ Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const
hypothesis.GetManager().GetSentenceStats().StartTimeBuildHyp();
}
const Bitmap &bitmap = m_parent.GetWordsBitmap();
Hypothesis *newHypo = new Hypothesis(hypothesis, transOpt, bitmap);
Hypothesis *newHypo = new Hypothesis(hypothesis, transOpt, bitmap, hypothesis.GetManager().GetNextHypoId());
IFVERBOSE(2) {
hypothesis.GetManager().GetSentenceStats().StopTimeBuildHyp();
}

View File

@ -86,6 +86,7 @@ SimpleTranslationInterface::SimpleTranslationInterface(const string &mosesIni):
cerr << "Error; Cannot load parameters at " << mosesIni<<endl;
exit(1);
}
ResetUserTime();
if (!StaticData::LoadDataStatic(&m_params, mosesIni.c_str())) {
cerr << "Error; Cannot load static data in file " << mosesIni<<endl;
exit(1);
@ -321,6 +322,7 @@ int decoder_main(int argc, char const** argv)
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
ResetUserTime();
if (!StaticData::LoadDataStatic(&params, argv[0]))
exit(1);

View File

@ -153,6 +153,8 @@ void
FeatureFactory
::DefaultSetup(F *feature)
{
FeatureFunction::Register(feature);
StaticData &static_data = StaticData::InstanceNonConst();
const std::string &featureName = feature->GetScoreProducerDescription();
std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);

View File

@ -54,12 +54,11 @@ FeatureFunction(const std::string& line, bool registerNow)
{
m_numTuneableComponents = m_numScoreComponents;
ParseLine(line);
if (registerNow) Register();
// if (registerNow) Register(); // now done in FeatureFactory::DefaultSetup()
// TO DO: eliminate the registerNow parameter
}
FeatureFunction::
FeatureFunction(size_t numScoreComponents,
const std::string& line)
FeatureFunction::FeatureFunction(size_t numScoreComponents, const std::string& line, bool registerNow)
: m_tuneable(true)
, m_requireSortingAfterSourceContext(false)
, m_verbosity(std::numeric_limits<std::size_t>::max())
@ -68,15 +67,16 @@ FeatureFunction(size_t numScoreComponents,
{
m_numTuneableComponents = m_numScoreComponents;
ParseLine(line);
Register();
// if (registerNow) Register(); // now done in FeatureFactory::DefaultSetup()
// TO DO: eliminate the registerNow parameter
}
void
FeatureFunction::
Register()
Register(FeatureFunction* ff)
{
ScoreComponentCollection::RegisterScoreProducer(this);
s_staticColl.push_back(this);
ScoreComponentCollection::RegisterScoreProducer(ff);
s_staticColl.push_back(ff);
}
FeatureFunction::~FeatureFunction() {}

View File

@ -49,7 +49,8 @@ protected:
//In case there's multiple producers with the same description
static std::multiset<std::string> description_counts;
void Register();
public:
static void Register(FeatureFunction* ff);
private:
// void Initialize(const std::string &line);
void ParseLine(const std::string &line);
@ -62,8 +63,8 @@ public:
static FeatureFunction &FindFeatureFunction(const std::string& name);
static void Destroy();
FeatureFunction(const std::string &line, bool initializeNow);
FeatureFunction(size_t numScoreComponents, const std::string &line);
FeatureFunction(const std::string &line, bool registerNow);
FeatureFunction(size_t numScoreComponents, const std::string &line, bool registerNow = true);
virtual bool IsStateless() const = 0;
virtual ~FeatureFunction();

View File

@ -75,7 +75,7 @@ LexicalReordering(const std::string &line)
<< m_configuration->GetNumScoreComponents() << ")");
m_configuration->ConfigureSparse(sparseArgs, this);
this->Register();
// this->Register();
}
LexicalReordering::

View File

@ -4,6 +4,7 @@
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include "StatelessFeatureFunction.h"
#include "moses/TargetPhrase.h"
#include "moses/Factor.h"
namespace Moses

View File

@ -3,12 +3,16 @@
#include "FeatureFunction.h"
#include "moses/Syntax/SHyperedge.h"
namespace Moses
{
class FFState;
namespace Syntax
{
class SHyperedge;
}
/** base class for all stateful feature functions.
* eg. LM, distortion penalty
*/

View File

@ -2,11 +2,15 @@
#include "FeatureFunction.h"
#include "moses/Syntax/SHyperedge.h"
namespace Moses
{
namespace Syntax
{
class SHyperedge;
}
/** base class for all stateless feature functions.
* eg. phrase table, word penalty, phrase penalty
*/

View File

@ -46,7 +46,7 @@ namespace Moses
//size_t g_numHypos = 0;
Hypothesis::
Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt, const Bitmap &bitmap)
Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt, const Bitmap &bitmap, int id)
: m_prevHypo(NULL)
, m_sourceCompleted(bitmap)
, m_sourceInput(source)
@ -61,7 +61,7 @@ Hypothesis(Manager& manager, InputType const& source, const TranslationOption &i
, m_arcList(NULL)
, m_transOpt(initialTransOpt)
, m_manager(manager)
, m_id(m_manager.GetNextHypoId())
, m_id(id)
{
// ++g_numHypos;
// used for initial seeding of trans process
@ -71,14 +71,13 @@ Hypothesis(Manager& manager, InputType const& source, const TranslationOption &i
const vector<const StatefulFeatureFunction*>& ffs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i)
m_ffStates[i] = ffs[i]->EmptyHypothesisState(source);
m_manager.GetSentenceStats().AddCreated();
}
/***
* continue prevHypo by appending the phrases in transOpt
*/
Hypothesis::
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Bitmap &bitmap)
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Bitmap &bitmap, int id)
: m_prevHypo(&prevHypo)
, m_sourceCompleted(bitmap)
, m_sourceInput(prevHypo.m_sourceInput)
@ -93,13 +92,12 @@ Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, const
, m_arcList(NULL)
, m_transOpt(transOpt)
, m_manager(prevHypo.GetManager())
, m_id(m_manager.GetNextHypoId())
, m_id(id)
{
// ++g_numHypos;
m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
m_wordDeleted = transOpt.IsDeletionOption();
m_manager.GetSentenceStats().AddCreated();
}
Hypothesis::
@ -146,30 +144,6 @@ AddArc(Hypothesis *loserHypo)
m_arcList->push_back(loserHypo);
}
void
Hypothesis::
EvaluateWhenApplied(StatefulFeatureFunction const& sfff, int state_idx)
{
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored( sfff )) {
// Manager& manager = this->GetManager(); //Get the manager and the ttask
// ttasksptr const& ttask = manager.GetTtask();
FFState const* prev = m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL;
m_ffStates[state_idx]
= sfff.EvaluateWhenApplied(*this, prev, &m_currScoreBreakdown);
}
}
void
Hypothesis::
EvaluateWhenApplied(const StatelessFeatureFunction& slff)
{
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored( slff )) {
slff.EvaluateWhenApplied(*this, &m_currScoreBreakdown);
}
}
/***
* calculate the logarithm of our total translation score (sum up components)
*/
@ -177,9 +151,8 @@ void
Hypothesis::
EvaluateWhenApplied(float estimatedScore)
{
IFVERBOSE(2) {
m_manager.GetSentenceStats().StartTimeOtherScore();
}
const StaticData &staticData = StaticData::Instance();
// some stateless score producers cache their values in the translation
// option: add these here
// language model scores for n-grams completely contained within a target
@ -191,35 +164,27 @@ EvaluateWhenApplied(float estimatedScore)
StatelessFeatureFunction::GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
const StatelessFeatureFunction &ff = *sfs[i];
EvaluateWhenApplied(ff);
if(!staticData.IsFeatureFunctionIgnored(ff)) {
ff.EvaluateWhenApplied(*this, &m_currScoreBreakdown);
}
}
const vector<const StatefulFeatureFunction*>& ffs =
StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i) {
const StatefulFeatureFunction &ff = *ffs[i];
const StaticData &staticData = StaticData::Instance();
if (! staticData.IsFeatureFunctionIgnored(ff)) {
if(!staticData.IsFeatureFunctionIgnored(ff)) {
FFState const* s = m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL;
m_ffStates[i] = ff.EvaluateWhenApplied(*this, s, &m_currScoreBreakdown);
}
}
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeOtherScore();
m_manager.GetSentenceStats().StartTimeEstimateScore();
}
// FUTURE COST
m_estimatedScore = estimatedScore;
// TOTAL
m_futureScore = m_currScoreBreakdown.GetWeightedScore() + m_estimatedScore;
if (m_prevHypo) m_futureScore += m_prevHypo->GetScore();
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeEstimateScore();
}
}
const Hypothesis* Hypothesis::GetPrevHypo()const
@ -269,7 +234,7 @@ PrintHypothesis() const
void
Hypothesis::
CleanupArcList()
CleanupArcList(size_t nBestSize, bool distinctNBest)
{
// point this hypo's main hypo to itself
SetWinningHypo(this);
@ -281,11 +246,6 @@ CleanupArcList()
* so we'll keep all of arc list if nedd distinct n-best list
*/
const StaticData &staticData = StaticData::Instance();
AllOptions const& opts = m_manager.options();
size_t nBestSize = opts.nbest.nbest_size;
bool distinctNBest = opts.NBestDistinct();
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
// prune arc list only if there too many arcs
NTH_ELEMENT4(m_arcList->begin(), m_arcList->begin() + nBestSize - 1,
@ -379,7 +339,7 @@ GetTargetPhraseStringRep() const
void
Hypothesis::
OutputAlignment(std::ostream &out) const
OutputAlignment(std::ostream &out, WordAlignmentSort sortOrder) const
{
std::vector<const Hypothesis *> edges;
const Hypothesis *currentHypo = this;
@ -388,7 +348,7 @@ OutputAlignment(std::ostream &out) const
currentHypo = currentHypo->GetPrevHypo();
}
OutputAlignment(out, edges, m_manager.options().output.WA_SortOrder);
OutputAlignment(out, edges, sortOrder);
}
@ -454,13 +414,13 @@ OutputInput(std::ostream& os) const
void
Hypothesis::
OutputBestSurface(std::ostream &out, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
const ReportingOptions &options) const
{
if (m_prevHypo) {
// recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
m_prevHypo->OutputBestSurface(out, outputFactorOrder, reportSegmentation, reportAllFactors);
m_prevHypo->OutputBestSurface(out, outputFactorOrder, options);
}
OutputSurface(out, *this, outputFactorOrder, reportSegmentation, reportAllFactors);
OutputSurface(out, *this, outputFactorOrder, options);
}
//////////////////////////////////////////////////////////////////////////
@ -471,14 +431,15 @@ void
Hypothesis::
OutputSurface(std::ostream &out, const Hypothesis &edge,
const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
const ReportingOptions &options) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Must specific at least 1 output factor");
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
// TODO: slay the rest of StaticData here and move stuff into ReportingOptions
bool markUnknown = GetManager().options().unk.mark;
// = StaticData::Instance().GetMarkUnknown();
if (reportAllFactors == true) {
bool featureLabels = StaticData::Instance().options().nbest.include_feature_labels;
if (options.ReportAllFactors == true) {
out << phrase;
} else {
FactorType placeholderFactor
@ -526,23 +487,21 @@ OutputSurface(std::ostream &out, const Hypothesis &edge,
}
// trace ("report segmentation") option "-t" / "-tt"
if (reportSegmentation > 0 && phrase.GetSize() > 0) {
if (options.ReportSegmentation > 0 && phrase.GetSize() > 0) {
const Range &sourceRange = edge.GetCurrSourceWordsRange();
const int sourceStart = sourceRange.GetStartPos();
const int sourceEnd = sourceRange.GetEndPos();
out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
if (reportSegmentation == 2) {
WordAlignmentSort waso = m_manager.options().output.WA_SortOrder;
if (options.ReportSegmentation == 2) {
out << ",wa=";
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
Hypothesis::OutputAlignment(out, ai, 0, 0, waso);
Hypothesis::OutputAlignment(out, ai, 0, 0, options.WA_SortOrder);
out << ",total=";
out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
bool with_labels = m_manager.options().nbest.include_feature_labels;
scoreBreakdown.OutputAllFeatureScores(out, with_labels);
scoreBreakdown.OutputAllFeatureScores(out, featureLabels);
}
out << "| ";
}

View File

@ -50,6 +50,7 @@ class FFState;
class StatelessFeatureFunction;
class StatefulFeatureFunction;
class Manager;
class ReportingOptions;
typedef std::vector<Hypothesis*> ArcList;
@ -86,9 +87,9 @@ protected:
public:
/*! used by initial seeding of the translation process */
Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt, const Bitmap &bitmap);
Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt, const Bitmap &bitmap, int id);
/*! used when creating a new hypothesis using a translation option (phrase translation) */
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Bitmap &bitmap);
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Bitmap &bitmap, int id);
~Hypothesis();
void PrintHypothesis() const;
@ -201,7 +202,7 @@ public:
}
void AddArc(Hypothesis *loserHypo);
void CleanupArcList();
void CleanupArcList(size_t nBestSize, bool distinctNBest);
//! returns a list alternative previous hypotheses (or NULL if n-best support is disabled)
inline const ArcList* GetArcList() const {
@ -230,13 +231,6 @@ public:
m_ffStates[idx] = state;
}
// Added by oliver.wilson@ed.ac.uk for async lm stuff.
void EvaluateWhenApplied(const StatefulFeatureFunction &sfff, int state_idx);
void EvaluateWhenApplied(const StatelessFeatureFunction &slff);
//! target span that trans opt would populate if applied to this hypo. Used for alignment check
size_t GetNextStartPos(const TranslationOption &transOpt) const;
std::vector<std::vector<unsigned int> > *GetLMStats() const {
return NULL;
}
@ -246,7 +240,7 @@ public:
}
void
OutputAlignment(std::ostream &out) const;
OutputAlignment(std::ostream &out, WordAlignmentSort sortOrder) const;
static void
OutputAlignment(std::ostream &out,
@ -261,9 +255,9 @@ public:
void OutputInput(std::ostream& os) const;
static void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo);
void OutputBestSurface(std::ostream &out, const std::vector<Moses::FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors) const;
void OutputBestSurface(std::ostream &out, const std::vector<Moses::FactorType> &outputFactorOrder, const ReportingOptions &options) const;
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const;
const ReportingOptions &options) const;
// creates a map of TARGET positions which should be replaced by word using placeholder
std::map<size_t, const Moses::Factor*> GetPlaceholders(const Moses::Hypothesis &hypo, Moses::FactorType placeholderFactor) const;

View File

@ -244,7 +244,7 @@ void HypothesisStackCubePruning::CleanupArcList()
iterator iter;
for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
Hypothesis *mainHypo = *iter;
mainHypo->CleanupArcList();
mainHypo->CleanupArcList(this->m_manager.options().nbest.nbest_size, this->m_manager.options().NBestDistinct());
}
}

View File

@ -266,7 +266,7 @@ void HypothesisStackNormal::CleanupArcList()
iterator iter;
for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
Hypothesis *mainHypo = *iter;
mainHypo->CleanupArcList();
mainHypo->CleanupArcList(this->m_manager.options().nbest.nbest_size, this->m_manager.options().NBestDistinct());
}
}

View File

@ -30,8 +30,6 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
// example file on how to use moses library
#include <iostream>
#include <stack>
#include <boost/algorithm/string.hpp>
@ -39,33 +37,21 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/Syntax/KBestExtractor.h"
#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/SHyperedge.h"
#include "moses/Syntax/S2T/DerivationWriter.h"
#include "moses/Syntax/SVertex.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
#include "moses/Hypothesis.h"
#include "moses/Range.h"
#include "moses/TrellisPathList.h"
#include "moses/StaticData.h"
#include "moses/FeatureVector.h"
#include "moses/InputFileStream.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/TreeInput.h"
#include "moses/ForestInput.h"
#include "moses/ConfusionNet.h"
#include "moses/WordLattice.h"
#include "moses/Incremental.h"
#include "moses/ChartManager.h"
#include "util/exception.hh"
#include "IOWrapper.h"
#include <boost/algorithm/string/predicate.hpp>
#include <boost/filesystem.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filter/bzip2.hpp>

View File

@ -229,11 +229,12 @@ Manager::
PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
{
const LanguageModel &abstract = LanguageModel::GetFirstLM();
const float oov_weight = abstract.OOVFeatureEnabled() ? abstract.GetOOVWeight() : 0.0;
const StaticData &data = StaticData::Instance();
const float lm_weight = data.GetWeights(&abstract)[0];
const float oov_weight = abstract.OOVFeatureEnabled() ? data.GetWeights(&abstract)[1] : 0.0;
size_t cpl = data.options().cube.pop_limit;
size_t nbs = data.options().nbest.nbest_size;
search::Config config(abstract.GetWeight() * log_10, cpl, search::NBestConfig(nbs));
search::Config config(lm_weight * log_10, cpl, search::NBestConfig(nbs));
search::Context<Model> context(config, model);
size_t size = m_source.GetSize();

View File

@ -26,7 +26,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/ChartManager.h"
#include "moses/FactorCollection.h"
#include "moses/Phrase.h"
#include "moses/StaticData.h"
#include "util/exception.hh"
using namespace std;
@ -35,30 +34,19 @@ namespace Moses
{
LanguageModel::LanguageModel(const std::string &line) :
StatefulFeatureFunction(StaticData::Instance().GetLMEnableOOVFeature() ? 2 : 1, line )
StatefulFeatureFunction(line, /* registerNow = */ false),
m_enableOOVFeature(false)
{
m_enableOOVFeature = StaticData::Instance().GetLMEnableOOVFeature();
// load m_enableOOVFeature via SetParameter() first
// ReadParameters();
this->m_numScoreComponents = this->m_numTuneableComponents = m_enableOOVFeature ? 2 : 1;
// register with the correct m_numScoreComponents
// Register();
}
LanguageModel::~LanguageModel() {}
float LanguageModel::GetWeight() const
{
//return StaticData::Instance().GetAllWeights().GetScoresForProducer(this)[0];
return StaticData::Instance().GetWeights(this)[0];
}
float LanguageModel::GetOOVWeight() const
{
if (m_enableOOVFeature) {
//return StaticData::Instance().GetAllWeights().GetScoresForProducer(this)[1];
return StaticData::Instance().GetWeights(this)[1];
} else {
return 0;
}
}
void LanguageModel::IncrementalCallback(Incremental::Manager &manager) const
{
UTIL_THROW(util::Exception, "Incremental search is only supported by KenLM.");
@ -83,7 +71,7 @@ EvaluateInIsolation(Phrase const& source, TargetPhrase const& targetPhrase,
float estimateScore = fullScore - nGramScore;
if (StaticData::Instance().GetLMEnableOOVFeature()) {
if (m_enableOOVFeature) {
vector<float> scores(2), estimateScores(2);
scores[0] = nGramScore;
scores[1] = oovCount;
@ -121,4 +109,14 @@ const LanguageModel &LanguageModel::GetFirstLM()
throw std::logic_error("Incremental search only supports one language model.");
}
void LanguageModel::SetParameter(const std::string& key, const std::string& value)
{
if(key == "oov-feature") {
m_enableOOVFeature = Scan<bool>(value);
this->m_numScoreComponents = this->m_numTuneableComponents = m_enableOOVFeature ? 2 : 1;
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
} // namespace Moses

View File

@ -46,8 +46,6 @@ class LanguageModel : public StatefulFeatureFunction
protected:
LanguageModel(const std::string &line);
// This can't be in the constructor for virual function dispatch reasons
bool m_enableOOVFeature;
public:
@ -59,9 +57,7 @@ public:
return m_enableOOVFeature;
}
float GetWeight() const;
float GetOOVWeight() const;
virtual void SetParameter(const std::string& key, const std::string& value);
virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0;

View File

@ -45,6 +45,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/StaticData.h"
#include "moses/ChartHypothesis.h"
#include "moses/Incremental.h"
#include "moses/Syntax/SHyperedge.h"
#include "moses/Syntax/SVertex.h"
using namespace std;
@ -154,10 +155,9 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
ReadParameters();
lm::ngram::Config config;
IFVERBOSE(1) {
if(this->m_verbosity >= 1) {
config.messages = &std::cerr;
}
else {
} else {
config.messages = NULL;
}
FactorCollection &collection = FactorCollection::Instance();

View File

@ -98,6 +98,8 @@ private:
std::vector<lm::WordIndex> m_lmIdLookup;
protected:
//bool m_oovFeatureEnabled; /// originally from LanguageModel, copied here to separate the interfaces. Called m_enableOOVFeature there
};
} // namespace Moses

View File

@ -47,7 +47,7 @@ protected:
public:
virtual ~LanguageModelSingleFactor();
bool IsUseable(const FactorMask &mask) const;
void SetParameter(const std::string& key, const std::string& value);
virtual void SetParameter(const std::string& key, const std::string& value);
const Factor *GetSentenceStart() const {
return m_sentenceStart;

View File

@ -1480,6 +1480,7 @@ const Hypothesis *Manager::GetBestHypothesis() const
int Manager::GetNextHypoId()
{
GetSentenceStats().AddCreated(); // count created hypotheses
return m_hypoId++;
}
@ -1546,11 +1547,10 @@ void Manager::OutputBest(OutputCollector *collector) const
bestHypo->OutputBestSurface(
out,
staticData.GetOutputFactorOrder(),
options().output.ReportSegmentation,
options().output.ReportAllFactors);
options().output);
if (options().output.PrintAlignmentInfo) {
out << "||| ";
bestHypo->OutputAlignment(out);
bestHypo->OutputAlignment(out, options().output.WA_SortOrder);
}
IFVERBOSE(1) {

View File

@ -51,7 +51,7 @@ MockHypothesisGuard
const Bitmap &initBitmap = bitmaps.GetInitialBitmap();
m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt,
initBitmap);
initBitmap, m_manager->GetNextHypoId());
//create the chain
vector<Alignment>::const_iterator ai = alignments.begin();
@ -67,7 +67,7 @@ MockHypothesisGuard
m_targetPhrases.back().CreateFromString(Input, factors, *ti, NULL);
m_toptions.push_back(new TranslationOption
(range,m_targetPhrases.back()));
m_hypothesis = new Hypothesis(*prevHypo, *m_toptions.back(), newBitmap);
m_hypothesis = new Hypothesis(*prevHypo, *m_toptions.back(), newBitmap, m_manager->GetNextHypoId());
}

View File

@ -981,6 +981,9 @@ ConvertWeightArgsLM()
featureLine += "lazyken=0 ";
}
if(oovWeights.size() > lmIndex)
featureLine += "oov-feature=1 ";
featureLine += "path=" + modelToks[3]; // file
AddFeature(featureLine);

View File

@ -161,11 +161,10 @@ void Phrase::PrependWord(const Word &newWord)
m_words[0] = newWord;
}
void Phrase::CreateFromString(FactorDirection direction
,const std::vector<FactorType> &factorOrder
,const StringPiece &phraseString
// ,const StringPiece &factorDelimiter // eliminated [UG]
,Word **lhs)
void Phrase::CreateFromString(FactorDirection direction,
const std::vector<FactorType> &factorOrder,
const StringPiece &phraseString,
Word **lhs)
{
// parse
vector<StringPiece> annotatedWordVector;

View File

@ -97,16 +97,17 @@ public:
/** destructor */
virtual ~Phrase();
/** Fills phrase with words from format string, typically from phrase table or sentence input
* \param factorOrder factor types of each element in 2D string vector
* \param phraseString formatted input string to parse
* \param factorDelimiter delimiter between factors.
/**
* Fills phrase with words from format string, typically from phrase table or sentence input
*
* \param factorOrder factor types of each element in 2D string vector
* \param phraseString formatted input string to parse
* \param lhs returns the non-terminal Word for the left-hand side of an SCFG rule, may be NULL for phrase-based
*/
void CreateFromString(FactorDirection direction
, const std::vector<FactorType> &factorOrder
, const StringPiece &phraseString
// , const StringPiece &factorDelimiter // never used [UG]
, Word **lhs);
void CreateFromString(FactorDirection direction,
const std::vector<FactorType> &factorOrder,
const StringPiece &phraseString,
Word **lhs);
/** copy factors from the other phrase to this phrase.
IsCompatible() must be run beforehand to ensure incompatible factors aren't overwritten

View File

@ -89,7 +89,11 @@ public:
struct MockProducers {
MockProducers() {}
MockProducers() {
FeatureFunction::Register(&single);
FeatureFunction::Register(&multi);
FeatureFunction::Register(&sparse);
}
MockSingleFeature single;
MockMultiFeature multi;

View File

@ -79,7 +79,7 @@ void SearchCubePruning::Decode()
{
// initial seed hypothesis: nothing translated, no words produced
const Bitmap &initBitmap = m_bitmaps.GetInitialBitmap();
Hypothesis *hypo = new Hypothesis(m_manager, m_source, m_initialTransOpt, initBitmap);
Hypothesis *hypo = new Hypothesis(m_manager, m_source, m_initialTransOpt, initBitmap, m_manager.GetNextHypoId());
HypothesisStackCubePruning &firstStack
= *static_cast<HypothesisStackCubePruning*>(m_hypoStackColl.front());
@ -117,7 +117,13 @@ void SearchCubePruning::Decode()
for(bmIter = accessor.begin(); bmIter != accessor.end(); ++bmIter) {
// build the first hypotheses
IFVERBOSE(2) {
m_manager.GetSentenceStats().StartTimeOtherScore();
}
bmIter->second->InitializeEdges();
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeOtherScore();
}
m_manager.GetSentenceStats().StartTimeManageCubes();
BCQueue.push(bmIter->second);
m_manager.GetSentenceStats().StopTimeManageCubes();
@ -137,7 +143,13 @@ void SearchCubePruning::Decode()
m_manager.GetSentenceStats().AddPopped();
}
// push on stack and create successors
IFVERBOSE(2) {
m_manager.GetSentenceStats().StartTimeOtherScore();
}
bc->ProcessBestHypothesis();
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeOtherScore();
}
// if there are any hypothesis left in this specific container, add back to queue
m_manager.GetSentenceStats().StartTimeManageCubes();
if (!bc->Empty())
@ -148,9 +160,15 @@ void SearchCubePruning::Decode()
// ensure diversity, a minimum number of inserted hyps for each bitmap container;
// NOTE: diversity doesn't ensure they aren't pruned at some later point
if (Diversity > 0) {
IFVERBOSE(2) {
m_manager.GetSentenceStats().StartTimeOtherScore();
}
for(bmIter = accessor.begin(); bmIter != accessor.end(); ++bmIter) {
bmIter->second->EnsureMinStackHyps(Diversity);
}
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeOtherScore();
}
}
// the stack is pruned before processing (lazy pruning):

View File

@ -86,7 +86,7 @@ void SearchNormal::Decode()
// initial seed hypothesis: nothing translated, no words produced
const Bitmap &initBitmap = m_bitmaps.GetInitialBitmap();
Hypothesis *hypo = new Hypothesis(m_manager, m_source, m_initialTransOpt, initBitmap);
Hypothesis *hypo = new Hypothesis(m_manager, m_source, m_initialTransOpt, initBitmap, m_manager.GetNextHypoId());
m_hypoStackColl[0]->AddPrune(hypo);
@ -306,12 +306,29 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis,
IFVERBOSE(2) {
stats.StartTimeBuildHyp();
}
newHypo = new Hypothesis(hypothesis, transOpt, bitmap);
newHypo = new Hypothesis(hypothesis, transOpt, bitmap, m_manager.GetNextHypoId());
IFVERBOSE(2) {
stats.StopTimeBuildHyp();
}
if (newHypo==NULL) return;
IFVERBOSE(2) {
m_manager.GetSentenceStats().StartTimeOtherScore();
}
newHypo->EvaluateWhenApplied(estimatedScore);
IFVERBOSE(2) {
m_manager.GetSentenceStats().StopTimeOtherScore();
// TODO: these have been meaningless for a while.
// At least since commit 67fb5c
// should now be measured in SearchNormal.cpp:254 instead, around CalcFutureScore2()
// CalcFutureScore2() also called in BackwardsEdge::Initialize().
//
// however, CalcFutureScore2() should be quick
// since it uses dynamic programming results in SquareMatrix
m_manager.GetSentenceStats().StartTimeEstimateScore();
m_manager.GetSentenceStats().StopTimeEstimateScore();
}
} else
// early discarding: check if hypothesis is too bad to build
{
@ -340,7 +357,7 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis,
IFVERBOSE(2) {
stats.StartTimeBuildHyp();
}
newHypo = new Hypothesis(hypothesis, transOpt, bitmap);
newHypo = new Hypothesis(hypothesis, transOpt, bitmap, m_manager.GetNextHypoId());
if (newHypo==NULL) return;
IFVERBOSE(2) {
stats.StopTimeBuildHyp();

View File

@ -61,7 +61,6 @@ StaticData StaticData::s_instance;
StaticData::StaticData()
: m_sourceStartPosMattersForRecombination(false)
, m_requireSortingAfterSourceContext(false)
, m_lmEnableOOVFeature(false)
, m_isAlwaysCreateDirectTranslationOption(false)
, m_currentWeightSetting("default")
, m_treeStructure(NULL)
@ -282,8 +281,6 @@ ini_oov_options()
// m_parameter->SetParameter<string>(m_unknownWordPrefix, "unknown-word-prefix", "UNK" );
// m_parameter->SetParameter<string>(m_unknownWordSuffix, "unknown-word-suffix", "" );
m_parameter->SetParameter(m_lmEnableOOVFeature, "lmodel-oov-feature", false);
//source word deletion
m_parameter->SetParameter(m_wordDeletionEnabled, "phrase-drop-allowed", false );
@ -301,7 +298,6 @@ ini_zombie_options()
bool StaticData::LoadData(Parameter *parameter)
{
ResetUserTime();
m_parameter = parameter;
const PARAM_VEC *params;

View File

@ -116,7 +116,6 @@ protected:
std::pair<std::string,std::string> m_xmlBrackets; //! strings to use as XML tags' opening and closing brackets. Default are "<" and ">"
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
bool m_lmEnableOOVFeature;
bool m_isAlwaysCreateDirectTranslationOption;
//! constructor. only the 1 static variable can be created
@ -366,10 +365,6 @@ public:
return m_lmcache_cleanup_threshold;
}
bool GetLMEnableOOVFeature() const {
return m_lmEnableOOVFeature;
}
const std::string& GetOutputUnknownsFile() const {
return m_outputUnknownsFile;
}

View File

@ -13,6 +13,7 @@
#include "moses/FactorCollection.h"
#include "moses/Word.h"
#include "moses/Util.h"
#include "moses/Timer.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/Range.h"

View File

@ -3,6 +3,7 @@
#include <vector>
#include "moses/ScoreComponentCollection.h"
#include "moses/Syntax/SHyperedge.h"
#include "moses/TargetPhrase.h"
namespace Moses

View File

@ -13,6 +13,7 @@
#include "moses/FactorCollection.h"
#include "moses/Word.h"
#include "moses/Util.h"
#include "moses/Timer.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/Range.h"

View File

@ -9,6 +9,26 @@
namespace Moses
{
//global variable
Timer g_timer;
void ResetUserTime()
{
g_timer.start();
};
void PrintUserTime(const std::string &message)
{
g_timer.check(message.c_str());
}
double GetUserTime()
{
return g_timer.get_elapsed_time();
}
/***
* Return the total wall time that the timer has been in the "running"
* state since it was first "started".

View File

@ -9,6 +9,21 @@
namespace Moses
{
// A couple of utilities to measure decoding time
/** Start global timer. */
void ResetUserTime();
/** Print out an optional message followed by the current global timer timing. */
void PrintUserTime(const std::string &message);
/**
* Total wall time that the global timer has been in the "running"
* state since it was first "started".
*/
double GetUserTime();
/** Wrapper around time_t to time how long things have been running
* according to walltime. We avoid CPU time since it is less reliable
* in a multi-threaded environment and can spuriously include clock cycles

View File

@ -22,6 +22,7 @@
#include "moses/AlignmentInfoCollection.h"
#include "moses/InputFileStream.h"
#include "moses/Util.h"
#include "moses/Timer.h"
#include "moses/Word.h"
#include "Trie.h"

View File

@ -64,7 +64,7 @@ int main(int argc, char* argv[])
if (size_t(m.ca()) == num_occurrences) continue;
num_occurrences = m.ca();
SPTR<SamplingBias const> zilch;
BitextSampler<Token> s(B.get(), m, zilch, 1000, 1000,
BitextSampler<Token> s(B, m, zilch, 1000, 1000,
sapt::random_sampling);
s();
if (s.stats()->trg.size() == 0) continue;

View File

@ -102,7 +102,7 @@ namespace sapt
template<typename Token> class BitextSampler;
template<typename TKN>
class Bitext : public Moses::reference_counter
class Bitext // : public Moses::reference_counter
{
public:
template<typename Token> friend class BitextSampler;
@ -167,7 +167,7 @@ namespace sapt
prep2(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
#endif
public:
protected:
Bitext(size_t const max_sample = 1000, size_t const xnum_workers = 16);
Bitext(Ttrack<Token>* const t1, Ttrack<Token>* const t2,
@ -176,7 +176,7 @@ namespace sapt
TSA<Token>* const i1, TSA<Token>* const i2,
size_t const max_sample=1000,
size_t const xnum_workers=16);
public:
virtual void
open(std::string const base, std::string const L1, std::string const L2) = 0;

View File

@ -81,7 +81,7 @@ BitextSampler : public Moses::reference_counter
public:
BitextSampler(BitextSampler const& other);
BitextSampler const& operator=(BitextSampler const& other);
BitextSampler(bitext const* const bitext,
BitextSampler(SPTR<bitext const> const& bitext,
typename bitext::iter const& phrase,
SPTR<SamplingBias const> const& bias,
size_t const min_samples,
@ -182,7 +182,7 @@ flip_coin(id_type & sid, ushort & offset)
template<typename Token>
BitextSampler<Token>::
BitextSampler(Bitext<Token> const* const bitext,
BitextSampler(SPTR<Bitext<Token> const> const& bitext,
typename bitext::iter const& phrase,
SPTR<SamplingBias const> const& bias, size_t const min_samples, size_t const max_samples,
sampling_method const method)

View File

@ -82,7 +82,7 @@ namespace Moses
// * Don't use features that depend on generation steps that won't be run
// yet at extract time
SetFeaturesToApply();
Register();
// Register();
}
void
@ -721,7 +721,7 @@ namespace Moses
if (foo) { sfix = *foo; sfix->wait(); }
else
{
BitextSampler<Token> s(btfix.get(), mfix, context->bias,
BitextSampler<Token> s(btfix, mfix, context->bias,
m_min_sample_size,
m_default_sample_size,
m_sampling_method);
@ -910,8 +910,9 @@ namespace Moses
uint64_t pid = mfix.getPid();
if (!context->cache1->get(pid))
{
BitextSampler<Token> s(btfix.get(), mfix, context->bias,
m_min_sample_size, m_default_sample_size, m_sampling_method);
BitextSampler<Token> s(btfix, mfix, context->bias,
m_min_sample_size, m_default_sample_size,
m_sampling_method);
if (*context->cache1->get(pid, s.stats()) == s.stats())
m_thread_pool->add(s);
}

View File

@ -100,7 +100,7 @@ show(Bitext<Token> const& B, iter const& m, pstats& stats)
void
process(Bitext<Token> const* bitext, TSA<Token>::tree_iterator& m)
process(SPTR<Bitext<Token> const> const& bitext, TSA<Token>::tree_iterator& m)
{
static boost::shared_ptr<SamplingBias> nil(new SamplingBiasAlways(bitext->sid2did()));
static Moses::bitext::sampling_method random = Moses::bitext::random_sampling;
@ -126,7 +126,7 @@ process(Bitext<Token> const* bitext, TSA<Token>::tree_iterator& m)
int main(int argc, char* argv[])
{
interpret_args(argc, argv);
iptr<mmbitext> B(new mmbitext);
SPTR<mmbitext> B(new mmbitext);
B->open(bname, L1, L2);
TSA<Token>::tree_iterator m(B->I1.get());
// m.extend((*B.V1)["job"]);

View File

@ -5,6 +5,7 @@
#include "moses/TranslationAnalysis.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
#include "moses/Timer.h"
#include "moses/InputType.h"
#include "moses/OutputCollector.h"
#include "moses/Incremental.h"

View File

@ -35,7 +35,7 @@
#include <boost/algorithm/string/predicate.hpp>
#include "TypeDef.h"
#include "Util.h"
#include "Timer.h"
//#include "Timer.h"
#include "util/exception.hh"
#include "util/file.hh"
#include "moses/FF/StatelessFeatureFunction.h"
@ -48,8 +48,6 @@ using namespace boost::algorithm;
namespace Moses
{
//global variable
Timer g_timer;
string GetTempFolder()
{
@ -90,21 +88,6 @@ bool FileExists(const std::string& filePath)
return !ifs.fail();
}
void ResetUserTime()
{
g_timer.start();
};
void PrintUserTime(const std::string &message)
{
g_timer.check(message.c_str());
}
double GetUserTime()
{
return g_timer.get_elapsed_time();
}
std::vector< std::map<std::string, std::string> > ProcessAndStripDLT(std::string &line)
{
std::vector< std::map<std::string, std::string> > meta;

View File

@ -466,11 +466,6 @@ inline void ShrinkToFit(T& v)
bool FileExists(const std::string& filePath);
// A couple of utilities to measure decoding time
void ResetUserTime();
void PrintUserTime(const std::string &message);
double GetUserTime();
// dump SGML parser for <dlt> tags
std::vector< std::map<std::string, std::string> > ProcessAndStripDLT(std::string &line);

View File

@ -15,8 +15,8 @@ namespace Moses {
vector<pair<size_t,size_t> const* > a
= this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(waso);
typedef pair<size_t,size_t> item;
map<string, xmlrpc_c::value> M;
BOOST_FOREACH(item const* p, a) {
map<string, xmlrpc_c::value> M;
M["source-word"] = xmlrpc_c::value_int(src.GetStartPos() + p->first);
M["target-word"] = xmlrpc_c::value_int(trg.GetStartPos() + p->second);
dest.push_back(xmlrpc_c::value_struct(M));

View File

@ -397,7 +397,7 @@ pack_hypothesis(const Moses::Manager& manager, vector<Hypothesis const* > const&
if (m_withWordAlignInfo) {
// word alignment, if requested
vector<xmlrpc_c::value> w_aln;
BOOST_FOREACH(Hypothesis const* e, edges)
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
e->OutputLocalWordAlignment(w_aln);
dest["word-align"] = xmlrpc_c::value_array(w_aln);
}

@ -1 +1 @@
Subproject commit 37a595fd7bf41226933c0fdb6fb792bdc877c3fd
Subproject commit f69e79f5fc92d993354fa775de197b029d321175

View File

@ -2296,19 +2296,22 @@ sub create_ini {
}
my $lm_oov_prob = 0.1;
my $lm_extra_options = "";
if ($_POST_DECODING_TRANSLIT || $_TRANSLITERATION_PHRASE_TABLE){
$lm_oov_prob = -100.0;
$_LMODEL_OOV_FEATURE = "yes";
}
if ($_LMODEL_OOV_FEATURE) {
# enable language model OOV feature
$lm_extra_options = " oov-feature=1";
}
$feature_spec .= "$type name=LM$i factor=$f path=$fn order=$o\n";
$feature_spec .= "$type name=LM$i factor=$f path=$fn order=$o$lm_extra_options\n";
$weight_spec .= "LM$i= 0.5".($_LMODEL_OOV_FEATURE?" $lm_oov_prob":"")."\n";
$i++;
}
if ($_LMODEL_OOV_FEATURE) {
print INI "\n# language model OOV feature enabled\n[lmodel-oov-feature]\n1\n\n";
}
# hierarchical model settings
print INI "\n";