2015-10-23 20:33:12 +03:00
|
|
|
/*
|
2015-10-26 00:20:55 +03:00
|
|
|
* System.cpp
|
2015-10-23 20:33:12 +03:00
|
|
|
*
|
|
|
|
* Created on: 23 Oct 2015
|
|
|
|
* Author: hieu
|
|
|
|
*/
|
2015-10-27 18:46:37 +03:00
|
|
|
#include <string>
|
|
|
|
#include <iostream>
|
|
|
|
#include <boost/foreach.hpp>
|
2015-11-10 15:46:26 +03:00
|
|
|
#include <boost/thread.hpp>
|
|
|
|
#include <boost/thread/mutex.hpp>
|
2015-10-26 00:20:55 +03:00
|
|
|
#include "System.h"
|
2015-11-03 16:24:39 +03:00
|
|
|
#include "FF/FeatureFunction.h"
|
2015-11-04 16:09:53 +03:00
|
|
|
#include "TranslationModel/UnknownWordPenalty.h"
|
2015-11-11 19:23:49 +03:00
|
|
|
#include "legacy/Util2.h"
|
2015-10-27 18:46:37 +03:00
|
|
|
#include "util/exception.hh"
|
|
|
|
|
|
|
|
using namespace std;
|
2015-10-23 20:33:12 +03:00
|
|
|
|
2015-12-10 23:49:30 +03:00
|
|
|
namespace Moses2
|
|
|
|
{
|
|
|
|
|
2016-03-31 23:00:16 +03:00
|
|
|
System::System(const Parameter ¶msArg) :
|
|
|
|
params(paramsArg), featureFunctions(*this)
|
2015-10-27 18:46:37 +03:00
|
|
|
{
|
2016-04-07 16:51:43 +03:00
|
|
|
options.init(paramsArg);
|
2016-04-27 02:35:20 +03:00
|
|
|
IsPb();
|
|
|
|
|
2016-03-31 23:00:16 +03:00
|
|
|
bestCollector.reset(new OutputCollector());
|
|
|
|
|
2016-06-22 02:14:35 +03:00
|
|
|
params.SetParameter(cpuAffinityOffset, "cpu-affinity-offset", -1);
|
2016-03-31 23:00:16 +03:00
|
|
|
params.SetParameter(cpuAffinityOffsetIncr, "cpu-affinity-increment", 1);
|
|
|
|
|
|
|
|
const PARAM_VEC *section;
|
|
|
|
|
|
|
|
// output collectors
|
2016-04-07 18:23:14 +03:00
|
|
|
if (options.nbest.nbest_size) {
|
|
|
|
nbestCollector.reset(new OutputCollector(options.nbest.output_file_path));
|
2016-03-31 23:00:16 +03:00
|
|
|
}
|
2016-03-17 20:54:25 +03:00
|
|
|
|
2016-08-15 15:21:29 +03:00
|
|
|
if (!options.output.detailed_transrep_filepath.empty()) {
|
|
|
|
detailedTranslationCollector.reset(new OutputCollector(options.output.detailed_transrep_filepath));
|
|
|
|
}
|
|
|
|
|
2016-03-31 23:00:16 +03:00
|
|
|
featureFunctions.Create();
|
|
|
|
LoadWeights();
|
2016-08-11 14:41:19 +03:00
|
|
|
|
|
|
|
if (params.GetParam("show-weights")) {
|
|
|
|
cerr << "Showing weights then exit" << endl;
|
2016-08-11 16:24:32 +03:00
|
|
|
featureFunctions.ShowWeights(weights);
|
2016-08-11 17:28:00 +03:00
|
|
|
return;
|
2016-08-11 14:41:19 +03:00
|
|
|
}
|
|
|
|
|
2016-03-31 23:00:16 +03:00
|
|
|
cerr << "START featureFunctions.Load()" << endl;
|
|
|
|
featureFunctions.Load();
|
|
|
|
cerr << "START LoadMappings()" << endl;
|
|
|
|
LoadMappings();
|
|
|
|
cerr << "END LoadMappings()" << endl;
|
2016-06-21 13:20:38 +03:00
|
|
|
LoadDecodeGraphBackoff();
|
|
|
|
cerr << "END LoadDecodeGraphBackoff()" << endl;
|
2016-05-25 17:53:19 +03:00
|
|
|
|
2016-06-21 18:57:16 +03:00
|
|
|
UTIL_THROW_IF2(options.input.xml_policy == XmlConstraint, "XmlConstraint not supported");
|
|
|
|
|
2016-05-25 17:53:19 +03:00
|
|
|
// max spans for scfg decoding
|
|
|
|
if (!isPb) {
|
|
|
|
section = params.GetParam("max-chart-span");
|
|
|
|
if (section && section->size()) {
|
|
|
|
maxChartSpans = Scan<size_t>(*section);
|
|
|
|
maxChartSpans.resize(mappings.size(), DEFAULT_MAX_CHART_SPAN);
|
|
|
|
|
2016-08-29 21:38:45 +03:00
|
|
|
/*
|
2016-05-25 17:53:19 +03:00
|
|
|
cerr << "maxChartSpans=" << maxChartSpans.size();
|
|
|
|
for (size_t i = 0; i < maxChartSpans.size(); ++i) {
|
|
|
|
cerr << " " << mappings[i]->GetName() << "=" << maxChartSpans[i];
|
|
|
|
}
|
|
|
|
cerr << endl;
|
2016-08-29 21:38:45 +03:00
|
|
|
*/
|
2016-05-25 17:53:19 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-27 18:46:37 +03:00
|
|
|
}
|
|
|
|
|
2016-03-20 14:40:39 +03:00
|
|
|
System::~System()
|
|
|
|
{
|
2015-10-27 18:46:37 +03:00
|
|
|
}
|
|
|
|
|
2015-10-27 19:54:15 +03:00
|
|
|
void System::LoadWeights()
|
2015-10-24 14:39:15 +03:00
|
|
|
{
|
2015-11-05 14:19:37 +03:00
|
|
|
weights.Init(featureFunctions);
|
2016-08-19 00:23:21 +03:00
|
|
|
|
|
|
|
//cerr << "Weights:" << endl;
|
|
|
|
typedef std::map<std::string, std::vector<float> > WeightMap;
|
|
|
|
const WeightMap &allWeights = params.GetAllWeights();
|
|
|
|
BOOST_FOREACH(const WeightMap::value_type &valPair, allWeights) {
|
|
|
|
const string &ffName = valPair.first;
|
|
|
|
const std::vector<float> &ffWeights = valPair.second;
|
|
|
|
/*
|
|
|
|
cerr << ffName << "=";
|
|
|
|
for (size_t i = 0; i < ffWeights.size(); ++i) {
|
|
|
|
cerr << ffWeights[i] << " ";
|
|
|
|
}
|
|
|
|
cerr << endl;
|
|
|
|
*/
|
|
|
|
weights.SetWeights(featureFunctions, ffName, ffWeights);
|
|
|
|
}
|
2015-10-27 19:54:15 +03:00
|
|
|
}
|
2015-10-27 18:46:37 +03:00
|
|
|
|
2015-10-31 05:45:01 +03:00
|
|
|
void System::LoadMappings()
|
|
|
|
{
|
2015-11-13 13:40:55 +03:00
|
|
|
const PARAM_VEC *vec = params.GetParam("mapping");
|
2015-11-05 14:19:37 +03:00
|
|
|
UTIL_THROW_IF2(vec == NULL, "Must have [mapping] section");
|
2015-10-31 05:45:01 +03:00
|
|
|
|
2016-03-31 23:00:16 +03:00
|
|
|
BOOST_FOREACH(const std::string &line, *vec){
|
|
|
|
vector<string> toks = Tokenize(line);
|
|
|
|
assert( (toks.size() == 2 && toks[0] == "T") || (toks.size() == 3 && toks[1] == "T") );
|
|
|
|
|
|
|
|
size_t ptInd;
|
|
|
|
if (toks.size() == 2) {
|
|
|
|
ptInd = Scan<size_t>(toks[1]);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
ptInd = Scan<size_t>(toks[2]);
|
2015-10-31 05:45:01 +03:00
|
|
|
}
|
2016-06-20 21:01:50 +03:00
|
|
|
const PhraseTable *pt = featureFunctions.GetPhraseTableExcludeUnknownWordPenalty(ptInd);
|
2016-03-31 23:00:16 +03:00
|
|
|
mappings.push_back(pt);
|
|
|
|
}
|
2015-10-27 18:46:37 +03:00
|
|
|
|
2016-03-31 23:00:16 +03:00
|
|
|
// unk pt
|
2016-06-20 21:01:50 +03:00
|
|
|
const UnknownWordPenalty *unkWP = featureFunctions.GetUnknownWordPenalty();
|
2015-12-07 21:16:51 +03:00
|
|
|
if (unkWP) {
|
2016-03-31 23:00:16 +03:00
|
|
|
mappings.push_back(unkWP);
|
2015-12-07 21:16:51 +03:00
|
|
|
}
|
2015-10-31 05:45:01 +03:00
|
|
|
}
|
2015-10-24 15:19:42 +03:00
|
|
|
|
2016-06-21 13:20:38 +03:00
|
|
|
void System::LoadDecodeGraphBackoff()
|
|
|
|
{
|
|
|
|
const PARAM_VEC *vec = params.GetParam("decoding-graph-backoff");
|
|
|
|
|
2016-06-21 18:57:16 +03:00
|
|
|
for (size_t i = 0; i < mappings.size(); ++i) {
|
|
|
|
PhraseTable *pt = const_cast<PhraseTable*>(mappings[i]);
|
2016-06-21 13:20:38 +03:00
|
|
|
|
2016-06-21 18:57:16 +03:00
|
|
|
if (vec && vec->size() < i) {
|
|
|
|
pt->decodeGraphBackoff = Scan<int>((*vec)[i]);
|
|
|
|
}
|
|
|
|
else if (pt == featureFunctions.GetUnknownWordPenalty()) {
|
|
|
|
pt->decodeGraphBackoff = 1;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
pt->decodeGraphBackoff = 0;
|
2016-06-21 13:20:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-03 00:44:44 +03:00
|
|
|
MemPool &System::GetSystemPool() const
|
|
|
|
{
|
2016-05-10 17:33:07 +03:00
|
|
|
return GetThreadSpecificObj(m_systemPool);
|
2016-01-03 00:44:44 +03:00
|
|
|
}
|
|
|
|
|
2015-11-10 14:28:17 +03:00
|
|
|
MemPool &System::GetManagerPool() const
|
|
|
|
{
|
2016-05-10 17:33:07 +03:00
|
|
|
return GetThreadSpecificObj(m_managerPool);
|
2015-11-10 14:28:17 +03:00
|
|
|
}
|
|
|
|
|
2015-11-18 18:33:42 +03:00
|
|
|
FactorCollection &System::GetVocab() const
|
|
|
|
{
|
2016-03-31 23:00:16 +03:00
|
|
|
return m_vocab;
|
2015-11-18 18:33:42 +03:00
|
|
|
}
|
2015-11-20 03:49:48 +03:00
|
|
|
|
2016-02-29 13:55:24 +03:00
|
|
|
Recycler<HypothesisBase*> &System::GetHypoRecycler() const
|
2016-02-26 16:15:27 +03:00
|
|
|
{
|
2016-03-31 23:00:16 +03:00
|
|
|
return GetThreadSpecificObj(m_hypoRecycler);
|
2016-02-26 16:15:27 +03:00
|
|
|
}
|
|
|
|
|
2016-05-10 17:33:07 +03:00
|
|
|
Batch &System::GetBatch(MemPool &pool) const
|
|
|
|
{
|
|
|
|
Batch *obj;
|
|
|
|
obj = m_batch.get();
|
|
|
|
if (obj == NULL) {
|
|
|
|
obj = new Batch(pool);
|
|
|
|
m_batch.reset(obj);
|
|
|
|
}
|
|
|
|
assert(obj);
|
|
|
|
return *obj;
|
|
|
|
}
|
|
|
|
|
2016-04-27 02:35:20 +03:00
|
|
|
void System::IsPb()
|
|
|
|
{
|
|
|
|
switch (options.search.algo) {
|
|
|
|
case Normal:
|
2016-05-10 01:55:59 +03:00
|
|
|
case NormalBatch:
|
2016-04-27 02:35:20 +03:00
|
|
|
case CubePruning:
|
|
|
|
case CubePruningPerMiniStack:
|
|
|
|
case CubePruningPerBitmap:
|
|
|
|
case CubePruningCardinalStack:
|
|
|
|
case CubePruningBitmapStack:
|
|
|
|
case CubePruningMiniStack:
|
|
|
|
isPb = true;
|
|
|
|
break;
|
|
|
|
case CYKPlus:
|
|
|
|
isPb = false;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-11 14:41:19 +03:00
|
|
|
|
2015-12-10 23:49:30 +03:00
|
|
|
}
|
|
|
|
|