2015-10-23 20:33:12 +03:00
|
|
|
/*
|
2015-10-26 00:20:55 +03:00
|
|
|
* System.cpp
|
2015-10-23 20:33:12 +03:00
|
|
|
*
|
|
|
|
* Created on: 23 Oct 2015
|
|
|
|
* Author: hieu
|
|
|
|
*/
|
2015-10-27 18:46:37 +03:00
|
|
|
#include <string>
|
|
|
|
#include <iostream>
|
|
|
|
#include <boost/foreach.hpp>
|
2015-11-10 15:46:26 +03:00
|
|
|
#include <boost/thread.hpp>
|
|
|
|
#include <boost/thread/mutex.hpp>
|
2015-10-26 00:20:55 +03:00
|
|
|
#include "System.h"
|
2015-11-03 16:24:39 +03:00
|
|
|
#include "FF/FeatureFunction.h"
|
2015-11-04 16:09:53 +03:00
|
|
|
#include "TranslationModel/UnknownWordPenalty.h"
|
2015-11-11 19:23:49 +03:00
|
|
|
#include "legacy/Util2.h"
|
2015-10-27 18:46:37 +03:00
|
|
|
#include "util/exception.hh"
|
|
|
|
|
|
|
|
using namespace std;
|
2015-10-23 20:33:12 +03:00
|
|
|
|
2015-12-10 23:49:30 +03:00
|
|
|
namespace Moses2
|
|
|
|
{
|
|
|
|
|
2015-11-11 20:31:05 +03:00
|
|
|
System::System(const Parameter ¶msArg)
|
2015-11-05 14:19:37 +03:00
|
|
|
:params(paramsArg)
|
|
|
|
,featureFunctions(*this)
|
|
|
|
|
2015-10-27 18:46:37 +03:00
|
|
|
{
|
2015-11-10 15:46:26 +03:00
|
|
|
ini_performance_options();
|
2015-11-13 13:40:55 +03:00
|
|
|
params.SetParameter(stackSize, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
|
2015-11-06 19:39:30 +03:00
|
|
|
params.SetParameter(maxDistortion, "distortion-limit", -1);
|
2015-11-06 21:23:40 +03:00
|
|
|
params.SetParameter(maxPhraseLength, "max-phrase-length",
|
2015-11-13 13:40:55 +03:00
|
|
|
DEFAULT_MAX_PHRASE_LENGTH);
|
2015-11-16 19:47:49 +03:00
|
|
|
params.SetParameter(searchAlgorithm, "search-algorithm", Normal);
|
2015-11-30 16:54:22 +03:00
|
|
|
params.SetParameter(popLimit, "cube-pruning-pop-limit",
|
|
|
|
DEFAULT_CUBE_PRUNING_POP_LIMIT);
|
2016-01-12 18:12:28 +03:00
|
|
|
params.SetParameter(cubePruningDiversity, "cube-pruning-diversity",
|
|
|
|
(size_t) 0);
|
2016-01-12 18:47:30 +03:00
|
|
|
params.SetParameter(cubePruningLazyScoring, "cube-pruning-lazy-scoring", false);
|
2016-01-12 18:12:28 +03:00
|
|
|
|
2016-01-04 02:22:50 +03:00
|
|
|
params.SetParameter(cpuAffinityOffset, "cpu-affinity-offset",
|
|
|
|
0);
|
2015-11-04 20:23:04 +03:00
|
|
|
|
2016-01-06 19:35:24 +03:00
|
|
|
reportSegmentation = (params.GetParam("report-segmentation-enriched")
|
|
|
|
? 2 : params.GetParam("report-segmentation")
|
|
|
|
? 1 : 0);
|
2016-01-06 19:09:08 +03:00
|
|
|
|
2015-12-03 15:21:50 +03:00
|
|
|
params.SetParameter(outputHypoScore, "output-hypo-score",
|
|
|
|
false);
|
|
|
|
|
2015-12-01 02:03:33 +03:00
|
|
|
const PARAM_VEC *section;
|
|
|
|
|
|
|
|
section = params.GetParam("n-best-list");
|
|
|
|
if (section) {
|
|
|
|
if (section->size() >= 2) {
|
|
|
|
outputFilePath = section->at(0);
|
|
|
|
nbestSize = Scan<size_t>( section->at(1) );
|
|
|
|
onlyDistinct = (section->size()>2 && section->at(2)=="distinct");
|
|
|
|
} else {
|
|
|
|
throw "wrong format for switch -n-best-list file size [disinct]";
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
nbestSize = 0;
|
|
|
|
}
|
|
|
|
|
2015-11-05 14:19:37 +03:00
|
|
|
featureFunctions.Create();
|
2015-10-27 19:54:15 +03:00
|
|
|
LoadWeights();
|
2015-11-05 14:19:37 +03:00
|
|
|
featureFunctions.Load();
|
2015-10-31 05:45:01 +03:00
|
|
|
LoadMappings();
|
2015-10-27 18:46:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
System::~System() {
|
|
|
|
}
|
|
|
|
|
2015-10-27 19:54:15 +03:00
|
|
|
void System::LoadWeights()
|
2015-10-24 14:39:15 +03:00
|
|
|
{
|
2015-11-11 20:31:05 +03:00
|
|
|
const PARAM_VEC *vec = params.GetParam("weight");
|
2015-11-05 14:19:37 +03:00
|
|
|
UTIL_THROW_IF2(vec == NULL, "Must have [weight] section");
|
2015-10-27 18:46:37 +03:00
|
|
|
|
2015-11-05 14:19:37 +03:00
|
|
|
weights.Init(featureFunctions);
|
|
|
|
BOOST_FOREACH(const std::string &line, *vec) {
|
|
|
|
weights.CreateFromString(featureFunctions, line);
|
2015-10-27 18:46:37 +03:00
|
|
|
}
|
2015-10-27 19:54:15 +03:00
|
|
|
}
|
2015-10-27 18:46:37 +03:00
|
|
|
|
2015-10-31 05:45:01 +03:00
|
|
|
void System::LoadMappings()
|
|
|
|
{
|
2015-11-13 13:40:55 +03:00
|
|
|
const PARAM_VEC *vec = params.GetParam("mapping");
|
2015-11-05 14:19:37 +03:00
|
|
|
UTIL_THROW_IF2(vec == NULL, "Must have [mapping] section");
|
2015-10-31 05:45:01 +03:00
|
|
|
|
2015-11-05 14:19:37 +03:00
|
|
|
BOOST_FOREACH(const std::string &line, *vec) {
|
2015-11-12 02:28:18 +03:00
|
|
|
vector<string> toks = Tokenize(line);
|
2015-11-06 19:09:44 +03:00
|
|
|
assert( (toks.size() == 2 && toks[0] == "T") || (toks.size() == 3 && toks[1] == "T") );
|
|
|
|
|
|
|
|
size_t ptInd;
|
|
|
|
if (toks.size() == 2) {
|
2015-11-12 02:29:58 +03:00
|
|
|
ptInd = Scan<size_t>(toks[1]);
|
2015-11-06 19:09:44 +03:00
|
|
|
}
|
|
|
|
else {
|
2015-11-12 02:29:58 +03:00
|
|
|
ptInd = Scan<size_t>(toks[2]);
|
2015-11-06 19:09:44 +03:00
|
|
|
}
|
2015-11-05 14:19:37 +03:00
|
|
|
const PhraseTable *pt = featureFunctions.GetPhraseTablesExcludeUnknownWordPenalty(ptInd);
|
|
|
|
mappings.push_back(pt);
|
2015-10-31 05:45:01 +03:00
|
|
|
}
|
2015-10-27 18:46:37 +03:00
|
|
|
|
2015-10-31 05:45:01 +03:00
|
|
|
// unk pt
|
2015-12-07 21:16:51 +03:00
|
|
|
const UnknownWordPenalty *unkWP = dynamic_cast<const UnknownWordPenalty*>(featureFunctions.FindFeatureFunction("UnknownWordPenalty0"));
|
|
|
|
if (unkWP) {
|
|
|
|
mappings.push_back(unkWP);
|
|
|
|
}
|
2015-10-31 05:45:01 +03:00
|
|
|
}
|
2015-10-24 15:19:42 +03:00
|
|
|
|
2016-01-03 00:44:44 +03:00
|
|
|
MemPool &System::GetSystemPool() const
|
|
|
|
{
|
|
|
|
MemPool &ret = GetThreadSpecificObj(m_systemPool);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-11-10 14:28:17 +03:00
|
|
|
MemPool &System::GetManagerPool() const
|
|
|
|
{
|
2015-11-24 14:24:09 +03:00
|
|
|
MemPool &ret = GetThreadSpecificObj(m_managerPool);
|
|
|
|
return ret;
|
2015-11-10 14:28:17 +03:00
|
|
|
}
|
|
|
|
|
2015-11-10 15:46:26 +03:00
|
|
|
void
|
|
|
|
System
|
|
|
|
::ini_performance_options()
|
|
|
|
{
|
2015-11-13 13:40:55 +03:00
|
|
|
const PARAM_VEC *paramsVec;
|
2015-11-10 15:46:26 +03:00
|
|
|
// m_parameter->SetParameter<size_t>(m_timeout_threshold, "time-out", -1);
|
|
|
|
// m_timeout = (GetTimeoutThreshold() == (size_t)-1) ? false : true;
|
|
|
|
|
|
|
|
numThreads = 1;
|
|
|
|
paramsVec = params.GetParam("threads");
|
|
|
|
if (paramsVec && paramsVec->size()) {
|
|
|
|
if (paramsVec->at(0) == "all") {
|
|
|
|
#ifdef WITH_THREADS
|
|
|
|
numThreads = boost::thread::hardware_concurrency();
|
|
|
|
if (!numThreads) {
|
|
|
|
std::cerr << "-threads all specified but Boost doesn't know how many cores there are";
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
std::cerr << "-threads all specified but moses not built with thread support";
|
|
|
|
return false;
|
|
|
|
#endif
|
|
|
|
} else {
|
2015-11-12 02:29:58 +03:00
|
|
|
numThreads = Scan<int>(paramsVec->at(0));
|
2015-11-10 15:46:26 +03:00
|
|
|
if (numThreads < 1) {
|
|
|
|
std::cerr << "Specify at least one thread.";
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
#ifndef WITH_THREADS
|
|
|
|
if (numThreads > 1) {
|
|
|
|
std::cerr << "Error: Thread count of " << params->at(0)
|
|
|
|
<< " but moses not built with thread support";
|
|
|
|
throw
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2015-11-18 18:33:42 +03:00
|
|
|
|
|
|
|
FactorCollection &System::GetVocab() const
|
|
|
|
{
|
|
|
|
return m_vocab;
|
|
|
|
}
|
2015-11-20 03:49:48 +03:00
|
|
|
|
2016-01-03 02:03:08 +03:00
|
|
|
Recycler<Hypothesis*> &System::GetHypoRecycler() const
|
2015-12-09 18:12:08 +03:00
|
|
|
{
|
2015-12-11 16:38:23 +03:00
|
|
|
return GetThreadSpecificObj(m_hypoRecycler);
|
2015-12-09 18:12:08 +03:00
|
|
|
}
|
|
|
|
|
2015-11-24 14:24:09 +03:00
|
|
|
ObjectPoolContiguous<Hypothesis*> &System::GetBatchForEval() const
|
2015-11-20 03:49:48 +03:00
|
|
|
{
|
2015-12-11 16:38:23 +03:00
|
|
|
return GetThreadSpecificObj(m_batchForEval);
|
2015-11-20 03:49:48 +03:00
|
|
|
}
|
2015-12-09 18:31:41 +03:00
|
|
|
|
2015-12-30 02:46:38 +03:00
|
|
|
|
2015-12-29 21:44:42 +03:00
|
|
|
|
2015-12-10 23:49:30 +03:00
|
|
|
}
|
|
|
|
|