Merge ../mosesdecoder into perf_ff

This commit is contained in:
Ubuntu 2015-10-13 12:59:24 +00:00
commit 9e2024aa3c
38 changed files with 1892 additions and 181 deletions

View File

@ -54,7 +54,7 @@
# --static forces static linking (the default will fall
# back to shared)
#
# debug-symbols=on|off include (default) or exclude debugging
# debug-symbols=on|off include or exclude (default) debugging
# information also known as -g
# --notrace compiles without TRACE macros
#
@ -298,6 +298,8 @@ contrib/server//mosesserver
mm
rephraser
contrib/c++tokenizer//tokenizer
contrib/expected-bleu-training//train-expected-bleu
contrib/expected-bleu-training//prepare-expected-bleu-training
;

View File

@ -0,0 +1,223 @@
/*
Moses - statistical machine translation system
Copyright (C) 2005-2015 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "ExpectedBleuOptimizer.h"
namespace ExpectedBleuTraining
{
void ExpectedBleuOptimizer::AddTrainingInstance(const size_t nBestSizeCount,
const std::vector<float>& sBleu,
const std::vector<double>& overallScoreUntransformed,
const std::vector< boost::unordered_map<size_t, float> > &sparseScore,
bool maintainUpdateSet)
{
// compute xBLEU
double sumUntransformedScores = 0.0;
for (std::vector<double>::const_iterator overallScoreUntransformedIt=overallScoreUntransformed.begin();
overallScoreUntransformedIt!=overallScoreUntransformed.end(); ++overallScoreUntransformedIt)
{
sumUntransformedScores += *overallScoreUntransformedIt;
}
double xBleu = 0.0;
assert(nBestSizeCount == overallScoreUntransformed.size());
std::vector<double> p;
for (size_t i=0; i<nBestSizeCount; ++i)
{
if (sumUntransformedScores != 0) {
p.push_back( overallScoreUntransformed[i] / sumUntransformedScores );
} else {
p.push_back( 0 );
}
xBleu += p.back() * sBleu[ i ];
}
for (size_t i=0; i<nBestSizeCount; ++i)
{
double D = sBleu[ i ] - xBleu;
for (boost::unordered_map<size_t, float>::const_iterator sparseScoreIt=sparseScore[i].begin();
sparseScoreIt!=sparseScore[i].end(); ++sparseScoreIt)
{
const size_t name = sparseScoreIt->first;
float N = sparseScoreIt->second;
if ( std::fpclassify( p[i] * N * D ) == FP_SUBNORMAL )
{
m_err << "Error: encountered subnormal value: p[i] * N * D= " << p[i] * N * D
<< " with p[i]= " << p[i] << " N= " << N << " D= " << D << '\n';
m_err.flush();
exit(1);
} else {
m_gradient[name] += p[i] * N * D;
if ( maintainUpdateSet )
{
m_updateSet.insert(name);
}
}
}
}
m_xBleu += xBleu;
}
void ExpectedBleuOptimizer::InitSGD(const std::vector<float>& sparseScalingFactor)
{
const size_t nFeatures = sparseScalingFactor.size();
memcpy(&m_previousSparseScalingFactor.at(0), &sparseScalingFactor.at(0), nFeatures);
m_gradient.resize(nFeatures);
}
float ExpectedBleuOptimizer::UpdateSGD(std::vector<float>& sparseScalingFactor,
size_t batchSize,
bool useUpdateSet)
{
float xBleu = m_xBleu / batchSize;
// update sparse scaling factors
if (useUpdateSet) {
for (std::set<size_t>::const_iterator it = m_updateSet.begin(); it != m_updateSet.end(); ++it)
{
size_t name = *it;
UpdateSingleScalingFactorSGD(name, sparseScalingFactor, batchSize);
}
m_updateSet.clear();
} else {
for (size_t name=0; name<sparseScalingFactor.size(); ++name)
{
UpdateSingleScalingFactorSGD(name, sparseScalingFactor, batchSize);
}
}
m_xBleu = 0;
m_gradient.clear();
return xBleu;
}
void ExpectedBleuOptimizer::UpdateSingleScalingFactorSGD(size_t name,
std::vector<float>& sparseScalingFactor,
size_t batchSize)
{
// regularization
if ( m_regularizationParameter != 0 )
{
m_gradient[name] = m_gradient[name] / m_xBleu - m_regularizationParameter * 2 * sparseScalingFactor[name];
} else {
// need to normalize by dividing by batchSize
m_gradient[name] /= batchSize;
}
// the actual update
sparseScalingFactor[name] += m_learningRate * m_gradient[name];
// discard scaling factors below a threshold
if ( fabs(sparseScalingFactor[name]) < m_floorAbsScalingFactor )
{
sparseScalingFactor[name] = 0;
}
}
void ExpectedBleuOptimizer::InitRPROP(const std::vector<float>& sparseScalingFactor)
{
const size_t nFeatures = sparseScalingFactor.size();
m_previousSparseScalingFactor.resize(nFeatures);
memcpy(&m_previousSparseScalingFactor.at(0), &sparseScalingFactor.at(0), nFeatures);
m_previousGradient.resize(nFeatures);
m_gradient.resize(nFeatures);
m_stepSize.resize(nFeatures, m_initialStepSize);
}
float ExpectedBleuOptimizer::UpdateRPROP(std::vector<float>& sparseScalingFactor,
const size_t batchSize)
{
float xBleu = m_xBleu / batchSize;
// update sparse scaling factors
for (size_t name=0; name<sparseScalingFactor.size(); ++name)
{
// Sum of gradients. All we need is the sign. Don't need to normalize by dividing by batchSize.
// regularization
if ( m_regularizationParameter != 0 )
{
m_gradient[name] = m_gradient[name] / m_xBleu - m_regularizationParameter * 2 * sparseScalingFactor[name];
}
// step size
int sign = Sign(m_gradient[name]) * Sign(m_previousGradient[name]);
if (sign > 0) {
m_stepSize[name] *= m_increaseRate;
} else if (sign < 0) {
m_stepSize[name] *= m_decreaseRate;
}
if (m_stepSize[name] < m_minStepSize) {
m_stepSize[name] = m_minStepSize;
}
if (m_stepSize[name] > m_maxStepSize) {
m_stepSize[name] = m_maxStepSize;
}
// the actual update
m_previousGradient[name] = m_gradient[name];
if (sign >= 0) {
if (m_gradient[name] > 0) {
m_previousSparseScalingFactor[name] = sparseScalingFactor[name];
sparseScalingFactor[name] += m_stepSize[name];
} else if (m_gradient[name] < 0) {
m_previousSparseScalingFactor[name] = sparseScalingFactor[name];
sparseScalingFactor[name] -= m_stepSize[name];
}
} else {
sparseScalingFactor[name] = m_previousSparseScalingFactor[name];
// m_previousGradient[name] = 0;
}
// discard scaling factors below a threshold
if ( fabs(sparseScalingFactor[name]) < m_floorAbsScalingFactor )
{
sparseScalingFactor[name] = 0;
}
}
m_xBleu = 0;
m_gradient.clear();
return xBleu;
}
}

View File

@ -0,0 +1,117 @@
/*
Moses - statistical machine translation system
Copyright (C) 2005-2015 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma once
#include <vector>
#include <set>
#include <boost/unordered_map.hpp>
#include "util/file_stream.hh"
namespace ExpectedBleuTraining
{
class ExpectedBleuOptimizer
{
public:
ExpectedBleuOptimizer(util::FileStream& err,
float learningRate=1,
float initialStepSize=0.001,
float decreaseRate=0.5,
float increaseRate=1.2,
float minStepSize=1e-7,
float maxStepSize=1,
float floorAbsScalingFactor=0,
float regularizationParameter=0)
: m_err(err)
, m_learningRate(learningRate)
, m_initialStepSize(initialStepSize)
, m_decreaseRate(decreaseRate)
, m_increaseRate(increaseRate)
, m_minStepSize(minStepSize)
, m_maxStepSize(maxStepSize)
, m_floorAbsScalingFactor(floorAbsScalingFactor)
, m_regularizationParameter(regularizationParameter)
, m_xBleu(0)
{ }
void AddTrainingInstance(const size_t nBestSizeCount,
const std::vector<float>& sBleu,
const std::vector<double>& overallScoreUntransformed,
const std::vector< boost::unordered_map<size_t, float> > &sparseScore,
bool maintainUpdateSet = false);
void InitSGD(const std::vector<float>& sparseScalingFactor);
float UpdateSGD(std::vector<float>& sparseScalingFactor,
size_t batchSize,
bool useUpdateSet = false);
void InitRPROP(const std::vector<float>& sparseScalingFactor);
float UpdateRPROP(std::vector<float>& sparseScalingFactor,
const size_t batchSize);
protected:
util::FileStream& m_err;
// for SGD
const float m_learningRate;
// for RPROP
const float m_initialStepSize;
const float m_decreaseRate;
const float m_increaseRate;
const float m_minStepSize;
const float m_maxStepSize;
std::vector<float> m_previousSparseScalingFactor;
std::vector<float> m_previousGradient;
std::vector<float> m_gradient;
std::vector<float> m_stepSize;
// other
const float m_floorAbsScalingFactor;
const float m_regularizationParameter;
double m_xBleu;
std::set<size_t> m_updateSet;
void UpdateSingleScalingFactorSGD(size_t name,
std::vector<float>& sparseScalingFactor,
size_t batchSize);
inline int Sign(double x)
{
if (x > 0) return 1;
if (x < 0) return -1;
return 0;
}
};
}

View File

@ -0,0 +1,2 @@
exe prepare-expected-bleu-training : PrepareExpectedBleuTraining.cpp ../../util//kenutil ;
exe train-expected-bleu : TrainExpectedBleu.cpp ExpectedBleuOptimizer.cpp ../../util//kenutil ;

View File

@ -0,0 +1,222 @@
/*
Moses - statistical machine translation system
Copyright (C) 2005-2015 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <vector>
#include <string>
#include <sstream>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <boost/program_options.hpp>
#include "util/file_stream.hh"
#include "util/file.hh"
#include "util/file_piece.hh"
#include "util/string_piece.hh"
#include "util/tokenize_piece.hh"
namespace po = boost::program_options;
int main(int argc, char **argv)
{
util::FileStream err(2);
std::string filenameNBestListIn, filenameFeatureNamesOut, filenameIgnoreFeatureNames;
size_t maxNBestSize;
try {
po::options_description descr("Usage");
descr.add_options()
("help,h", "produce help message")
("n-best-list,n", po::value<std::string>(&filenameNBestListIn)->required(),
"input n-best list file")
("write-feature-names-file,f", po::value<std::string>(&filenameFeatureNamesOut)->required(),
"output file for mapping between feature names and indices")
("ignore-features-file,i", po::value<std::string>(&filenameIgnoreFeatureNames)->required(),
"input file containing list of feature names to be ignored")
("n-best-size-limit,l", po::value<size_t>(&maxNBestSize)->default_value(100),
"limit of n-best list entries to be considered")
;
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, descr), vm);
if (vm.count("help")) {
std::ostringstream os;
os << descr;
std::cout << os.str() << '\n';
exit(0);
}
po::notify(vm);
} catch(std::exception& e) {
err << "Error: " << e.what() << '\n';
err.flush();
exit(1);
}
util::FilePiece ifsNBest(filenameNBestListIn.c_str());
util::FilePiece ifsIgnoreFeatureNames(filenameIgnoreFeatureNames.c_str());
util::scoped_fd fdFeatureNames(util::CreateOrThrow(filenameFeatureNamesOut.c_str()));
util::FileStream ofsFeatureNames(fdFeatureNames.get());
util::FileStream ofsNBest(1);
boost::unordered_set<std::string> ignoreFeatureNames;
StringPiece line;
while ( ifsIgnoreFeatureNames.ReadLineOrEOF(line) )
{
if ( !line.empty() ) {
util::TokenIter<util::AnyCharacter> item(line, " \t=");
if ( item != item.end() )
{
ignoreFeatureNames.insert(item->as_string());
}
err << "ignoring " << *item << '\n';
}
}
size_t maxFeatureNamesIdx = 0;
boost::unordered_map<std::string, size_t> featureNames;
size_t sentenceIndex = 0;
size_t nBestSizeCount = 0;
size_t globalIndex = 0;
while ( ifsNBest.ReadLineOrEOF(line) )
{
util::TokenIter<util::MultiCharacter> item(line, " ||| ");
if ( item == item.end() )
{
err << "Error: flawed content in " << filenameNBestListIn << '\n';
exit(1);
}
size_t sentenceIndexCurrent = atol( item->as_string().c_str() );
if ( sentenceIndex != sentenceIndexCurrent )
{
nBestSizeCount = 0;
sentenceIndex = sentenceIndexCurrent;
}
if ( nBestSizeCount < maxNBestSize )
{
// process n-best list entry
StringPiece scores;
StringPiece decoderScore;
for (size_t nItem=1; nItem<=3; ++nItem)
{
if ( ++item == item.end() ) {
err << "Error: flawed content in " << filenameNBestListIn << '\n';
exit(1);
}
if (nItem == 2) {
scores = *item;
}
if (nItem == 3) {
decoderScore = *item;
}
}
ofsNBest << sentenceIndex << ' '
<< decoderScore;
util::TokenIter<util::SingleCharacter> token(scores, ' ');
std::string featureNameCurrent("ERROR");
std::string featureNameCurrentBase("ERROR");
bool ignore = false;
int scoreComponentIndex = 0;
while ( token != token.end() )
{
if ( token->ends_with("=") )
{
scoreComponentIndex = 0;
featureNameCurrent = token->substr(0,token->size()-1).as_string();
size_t idx = featureNameCurrent.find_first_of('_');
if ( idx == StringPiece::npos ) {
featureNameCurrentBase = featureNameCurrent;
} else {
featureNameCurrentBase = featureNameCurrent.substr(0,idx+1);
}
ignore = false;
if ( ignoreFeatureNames.find(featureNameCurrentBase) != ignoreFeatureNames.end() )
{
ignore = true;
} else {
if ( (featureNameCurrent.compare(featureNameCurrentBase)) &&
(ignoreFeatureNames.find(featureNameCurrent) != ignoreFeatureNames.end()) )
{
ignore = true;
}
}
}
else
{
if ( !ignore )
{
float featureValueCurrent = atof( token->as_string().c_str() );;
if ( scoreComponentIndex > 0 )
{
std::ostringstream oss;
oss << scoreComponentIndex;
featureNameCurrent.append("+");
}
if ( featureValueCurrent != 0 )
{
boost::unordered_map<std::string, size_t>::iterator featureName = featureNames.find(featureNameCurrent);
if ( featureName == featureNames.end() )
{
std::pair< boost::unordered_map<std::string, size_t>::iterator, bool> inserted =
featureNames.insert( std::make_pair(featureNameCurrent, maxFeatureNamesIdx) );
++maxFeatureNamesIdx;
featureName = inserted.first;
}
ofsNBest << ' ' << featureName->second // feature name index
<< ' ' << *token; // feature value
}
++scoreComponentIndex;
}
}
++token;
}
ofsNBest << '\n';
++nBestSizeCount;
}
++globalIndex;
}
ofsFeatureNames << maxFeatureNamesIdx << '\n';
for (boost::unordered_map<std::string, size_t>::const_iterator featureNamesIt=featureNames.begin();
featureNamesIt!=featureNames.end(); ++featureNamesIt)
{
ofsFeatureNames << featureNamesIt->second << ' ' << featureNamesIt->first << '\n';
}
}

View File

@ -0,0 +1,379 @@
/*
Moses - statistical machine translation system
Copyright (C) 2005-2015 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "ExpectedBleuOptimizer.h"
#include "util/file_stream.hh"
#include "util/file_piece.hh"
#include "util/string_piece.hh"
#include "util/tokenize_piece.hh"
#include <sstream>
#include <boost/program_options.hpp>
using namespace ExpectedBleuTraining;
namespace po = boost::program_options;
int main(int argc, char **argv) {
util::FileStream out(1);
util::FileStream err(2);
size_t maxNBestSize;
size_t iterationLimit;
std::string filenameSBleu, filenameNBestList, filenameFeatureNames, filenameInitialWeights;
bool ignoreDecoderScore;
float learningRate;
float initialStepSize;
float decreaseRate;
float increaseRate;
float minStepSize;
float maxStepSize;
float floorAbsScalingFactor;
float regularizationParameter;
bool printZeroWeights;
bool miniBatches;
std::string optimizerTypeStr;
size_t optimizerType = 0;
#define EXPECTED_BLEU_OPTIMIZER_TYPE_RPROP 1
#define EXPECTED_BLEU_OPTIMIZER_TYPE_SGD 2
try {
po::options_description descr("Usage");
descr.add_options()
("help,h", "produce help message")
("n-best-size-limit,l", po::value<size_t>(&maxNBestSize)->default_value(100),
"limit of n-best list entries to be considered for training")
("iterations,i", po::value<size_t>(&iterationLimit)->default_value(50),
"number of training iterations")
("sbleu-file,b", po::value<std::string>(&filenameSBleu)->required(),
"file containing sentence-level BLEU scores for all n-best list entries")
("prepared-n-best-list,n", po::value<std::string>(&filenameNBestList)->required(),
"input n-best list file, in prepared format for expected BLEU training")
("feature-name-file,f", po::value<std::string>(&filenameFeatureNames)->required(),
"file containing mapping between feature names and indices")
("initial-weights-file,w", po::value<std::string>(&filenameInitialWeights)->default_value(""),
"file containing start values for scaling factors (optional)")
("ignore-decoder-score", boost::program_options::value<bool>(&ignoreDecoderScore)->default_value(0),
"exclude decoder score from computation of posterior probability")
("regularization", boost::program_options::value<float>(&regularizationParameter)->default_value(0), // e.g. 1e-5
"regularization parameter; suggested value range: [1e-8,1e-5]")
("learning-rate", boost::program_options::value<float>(&learningRate)->default_value(1),
"learning rate for the SGD optimizer")
("floor", boost::program_options::value<float>(&floorAbsScalingFactor)->default_value(0), // e.g. 1e-7
"set scaling factor to 0 if below this absolute value after update")
("initial-step-size", boost::program_options::value<float>(&initialStepSize)->default_value(0.001), // TODO: try 0.01 and 0.1
"initial step size for the RPROP optimizer")
("decrease-rate", boost::program_options::value<float>(&decreaseRate)->default_value(0.5),
"decrease rate for the RPROP optimizer")
("increase-rate", boost::program_options::value<float>(&increaseRate)->default_value(1.2),
"increase rate for the RPROP optimizer")
("min-step-size", boost::program_options::value<float>(&minStepSize)->default_value(1e-7),
"minimum step size for the RPROP optimizer")
("max-step-size", boost::program_options::value<float>(&maxStepSize)->default_value(1),
"maximum step size for the RPROP optimizer")
("print-zero-weights", boost::program_options::value<bool>(&printZeroWeights)->default_value(0),
"output scaling factors even if they are trained to 0")
("optimizer", po::value<std::string>(&optimizerTypeStr)->default_value("RPROP"),
"optimizer type used for training (known algorithms: RPROP, SGD)")
("mini-batches", boost::program_options::value<bool>(&miniBatches)->default_value(0),
"update after every single sentence (SGD only)")
;
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, descr), vm);
if (vm.count("help")) {
std::ostringstream os;
os << descr;
out << os.str() << '\n';
out.flush();
exit(0);
}
po::notify(vm);
} catch(std::exception& e) {
err << "Error: " << e.what() << '\n';
err.flush();
exit(1);
}
if ( !optimizerTypeStr.compare("rprop") || !optimizerTypeStr.compare("RPROP") ) {
optimizerType = EXPECTED_BLEU_OPTIMIZER_TYPE_RPROP;
} else if ( !optimizerTypeStr.compare("sgd") || !optimizerTypeStr.compare("SGD") ) {
optimizerType = EXPECTED_BLEU_OPTIMIZER_TYPE_SGD;
} else {
err << "Error: unknown optimizer type: \"" << optimizerTypeStr << "\" (known optimizers: rprop, sgd) " << '\n';
err.flush();
exit(1);
}
util::FilePiece ifsFeatureNames(filenameFeatureNames.c_str());
StringPiece lineFeatureName;
if ( !ifsFeatureNames.ReadLineOrEOF(lineFeatureName) )
{
err << "Error: flawed content in " << filenameFeatureNames << '\n';
err.flush();
exit(1);
}
size_t maxFeatureNamesIdx = atol( lineFeatureName.as_string().c_str() );
std::vector<std::string> featureNames(maxFeatureNamesIdx);
boost::unordered_map<std::string, size_t> featureIndexes;
for (size_t i=0; i<maxFeatureNamesIdx; ++i)
{
if ( !ifsFeatureNames.ReadLineOrEOF(lineFeatureName) ) {
err << "Error: flawed content in " << filenameFeatureNames << '\n';
err.flush();
exit(1);
}
util::TokenIter<util::SingleCharacter> token(lineFeatureName, ' ');
size_t featureIndexCurrent = atol( token->as_string().c_str() );
token++;
featureNames[featureIndexCurrent] = token->as_string();
featureIndexes[token->as_string()] = featureIndexCurrent;
}
std::vector<float> sparseScalingFactor(maxFeatureNamesIdx);
std::vector< boost::unordered_map<size_t, float> > sparseScore(maxNBestSize);
// read initial weights, if any given
if ( filenameInitialWeights.length() != 0 )
{
util::FilePiece ifsInitialWeights(filenameInitialWeights.c_str());
StringPiece lineInitialWeight;
if ( !ifsInitialWeights.ReadLineOrEOF(lineInitialWeight) ) {
err << "Error: flawed content in " << filenameInitialWeights << '\n';
err.flush();
exit(1);
}
do {
util::TokenIter<util::SingleCharacter> token(lineInitialWeight, ' ');
boost::unordered_map<std::string, size_t>::const_iterator found = featureIndexes.find(token->as_string());
if ( found == featureIndexes.end() ) {
err << "Error: flawed content in " << filenameInitialWeights << " (unkown feature name \"" << token->as_string() << "\")" << '\n';
err.flush();
exit(1);
}
token++;
sparseScalingFactor[found->second] = atof( token->as_string().c_str() );
} while ( ifsInitialWeights.ReadLineOrEOF(lineInitialWeight) );
}
// train
ExpectedBleuOptimizer optimizer(err,
learningRate,
initialStepSize,
decreaseRate,
increaseRate,
minStepSize,
maxStepSize,
floorAbsScalingFactor,
regularizationParameter);
if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_RPROP )
{
optimizer.InitRPROP(sparseScalingFactor);
} else if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_SGD ) {
optimizer.InitRPROP(sparseScalingFactor);
} else {
err << "Error: unknown optimizer type" << '\n';
err.flush();
exit(1);
}
for (size_t nIteration=1; nIteration<=iterationLimit; ++nIteration)
{
util::FilePiece ifsSBleu(filenameSBleu.c_str());
util::FilePiece ifsNBest(filenameNBestList.c_str());
out << "### ITERATION " << nIteration << '\n' << '\n';
size_t sentenceIndex = 0;
size_t batchSize = 0;
size_t nBestSizeCount = 0;
size_t globalIndex = 0;
StringPiece lineNBest;
std::vector<double> overallScoreUntransformed;
std::vector<float> sBleu;
float xBleu = 0;
// double expPrecisionCorrection = 0.0;
while ( ifsNBest.ReadLineOrEOF(lineNBest) )
{
util::TokenIter<util::SingleCharacter> token(lineNBest, ' ');
if ( token == token.end() )
{
err << "Error: flawed content in " << filenameNBestList << '\n';
err.flush();
exit(1);
}
size_t sentenceIndexCurrent = atol( token->as_string().c_str() );
token++;
if ( sentenceIndex != sentenceIndexCurrent )
{
if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_RPROP )
{
optimizer.AddTrainingInstance( nBestSizeCount, sBleu, overallScoreUntransformed, sparseScore );
} else if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_SGD ) {
optimizer.AddTrainingInstance( nBestSizeCount, sBleu, overallScoreUntransformed, sparseScore, miniBatches );
if ( miniBatches ) {
xBleu += optimizer.UpdateSGD( sparseScalingFactor, 1 );
// out << "ITERATION " << nIteration << " SENTENCE " << sentenceIndex << " XBLEUSUM= " << xBleu << '\n';
// for (size_t i=0; i<sparseScalingFactor.size(); ++i)
// {
// if ( (sparseScalingFactor[i] != 0) || printZeroWeights )
// {
// out << "ITERATION " << nIteration << " WEIGHT " << featureNames[i] << " " << sparseScalingFactor[i] << '\n';
// }
// }
// out << '\n';
// out.flush();
}
} else {
err << "Error: unknown optimizer type" << '\n';
err.flush();
exit(1);
}
for (size_t i=0; i<nBestSizeCount; ++i) {
sparseScore[i].clear();
}
nBestSizeCount = 0;
overallScoreUntransformed.clear();
sBleu.clear();
sentenceIndex = sentenceIndexCurrent;
++batchSize;
}
StringPiece lineSBleu;
if ( !ifsSBleu.ReadLineOrEOF(lineSBleu) )
{
err << "Error: insufficient number of lines in " << filenameSBleu << '\n';
err.flush();
exit(1);
}
if ( nBestSizeCount < maxNBestSize )
{
// retrieve sBLEU
float sBleuCurrent = atof( lineSBleu.as_string().c_str() );
sBleu.push_back(sBleuCurrent);
// process n-best list entry
if ( token == token.end() )
{
err << "Error: flawed content in " << filenameNBestList << '\n';
err.flush();
exit(1);
}
double scoreCurrent = 0;
if ( !ignoreDecoderScore )
{
scoreCurrent = atof( token->as_string().c_str() ); // decoder score
}
token++;
// if ( nBestSizeCount == 0 ) // best translation (first n-best list entry for the current sentence / a new mini-batch)
// {
// expPrecisionCorrection = std::floor ( scoreCurrent ); // decoder score of first-best
// }
while (token != token.end())
{
size_t featureNameCurrent = atol( token->as_string().c_str() );
token++;
float featureValueCurrent = atof( token->as_string().c_str() );
sparseScore[nBestSizeCount].insert(std::make_pair(featureNameCurrent, featureValueCurrent));
scoreCurrent += sparseScalingFactor[featureNameCurrent] * featureValueCurrent;
token++;
}
// overallScoreUntransformed.push_back( std::exp(scoreCurrent - expPrecisionCorrection) );
overallScoreUntransformed.push_back( std::exp(scoreCurrent) );
++nBestSizeCount;
}
++globalIndex;
}
if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_RPROP )
{
optimizer.AddTrainingInstance( nBestSizeCount, sBleu, overallScoreUntransformed, sparseScore ); // last sentence in the corpus
xBleu = optimizer.UpdateRPROP( sparseScalingFactor, batchSize );
out << "xBLEU= " << xBleu << '\n';
} else if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_SGD ) {
optimizer.AddTrainingInstance( nBestSizeCount, sBleu, overallScoreUntransformed, sparseScore, miniBatches ); // last sentence in the corpus
if ( miniBatches ) {
xBleu += optimizer.UpdateSGD( sparseScalingFactor, 1 );
xBleu /= batchSize;
} else {
xBleu = optimizer.UpdateSGD( sparseScalingFactor, batchSize );
}
out << "xBLEU= " << xBleu << '\n';
} else {
err << "Error: unknown optimizer type" << '\n';
err.flush();
exit(1);
}
for (size_t i=0; i<nBestSizeCount; ++i) {
sparseScore[i].clear();
}
nBestSizeCount = 0;
overallScoreUntransformed.clear();
sBleu.clear();
out << '\n';
for (size_t i=0; i<sparseScalingFactor.size(); ++i)
{
if ( (sparseScalingFactor[i] != 0) || printZeroWeights )
{
out << "ITERATION " << nIteration << " WEIGHT " << featureNames[i] << " " << sparseScalingFactor[i] << '\n';
}
}
out << '\n';
out.flush();
}
}

View File

@ -1,5 +1,5 @@
SALMDIR=/Users/hieuhoang/workspace/salm
FLAVOR?=o64
FLAVOR?=o32
INC=-I$(SALMDIR)/Src/Shared -I$(SALMDIR)/Src/SuffixArrayApplications -I$(SALMDIR)/Src/SuffixArrayApplications/SuffixArraySearch
OBJS=$(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArrayApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArraySearchApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_String.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_IDVocabulary.$(FLAVOR)

View File

@ -110,13 +110,13 @@ public:
BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
, BitmapContainer &parent
, const TranslationOptionList &translations
, const SquareMatrix &futureScore,
, const SquareMatrix &futureScores,
const InputType& itype)
: m_initialized(false)
, m_prevBitmapContainer(prevBitmapContainer)
, m_parent(parent)
, m_translations(translations)
, m_futurescore(futureScore)
, m_futureScores(futureScores)
, m_seenPosition()
{
@ -195,6 +195,10 @@ BackwardsEdge::Initialize()
return;
}
const WordsBitmap &bm = m_hypotheses[0]->GetWordsBitmap();
const WordsRange &newRange = m_translations.Get(0)->GetSourceWordsRange();
m_futureScore = m_futureScores.CalcFutureScore2(bm, newRange.GetStartPos(), newRange.GetEndPos());
Hypothesis *expanded = CreateHypothesis(*m_hypotheses[0], *m_translations.Get(0));
m_parent.Enqueue(0, 0, expanded, this);
SetSeenPosition(0, 0);
@ -211,7 +215,7 @@ Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const
IFVERBOSE(2) {
hypothesis.GetManager().GetSentenceStats().StopTimeBuildHyp();
}
newHypo->EvaluateWhenApplied(m_futurescore);
newHypo->EvaluateWhenApplied(m_futureScore);
return newHypo;
}

View File

@ -161,7 +161,8 @@ private:
const BitmapContainer &m_prevBitmapContainer;
BitmapContainer &m_parent;
const TranslationOptionList &m_translations;
const SquareMatrix &m_futurescore;
const SquareMatrix &m_futureScores;
float m_futureScore;
std::vector< const Hypothesis* > m_hypotheses;
boost::unordered_set< int > m_seenPosition;
@ -180,7 +181,7 @@ public:
BackwardsEdge(const BitmapContainer &prevBitmapContainer
, BitmapContainer &parent
, const TranslationOptionList &translations
, const SquareMatrix &futureScore,
, const SquareMatrix &futureScores,
const InputType& source);
~BackwardsEdge();

View File

@ -73,6 +73,7 @@
#include "moses/FF/VW/VWFeatureSourceBigrams.h"
#include "moses/FF/VW/VWFeatureSourceIndicator.h"
#include "moses/FF/VW/VWFeatureSourcePhraseInternal.h"
#include "moses/FF/VW/VWFeatureSourceSenseWindow.h"
#include "moses/FF/VW/VWFeatureSourceWindow.h"
#include "moses/FF/VW/VWFeatureTargetBigrams.h"
#include "moses/FF/VW/VWFeatureTargetIndicator.h"
@ -279,6 +280,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(VWFeatureSourceBigrams);
MOSES_FNAME(VWFeatureSourceIndicator);
MOSES_FNAME(VWFeatureSourcePhraseInternal);
MOSES_FNAME(VWFeatureSourceSenseWindow);
MOSES_FNAME(VWFeatureSourceWindow);
MOSES_FNAME(VWFeatureTargetBigrams);
MOSES_FNAME(VWFeatureTargetPhraseInternal);

View File

@ -0,0 +1,141 @@
#pragma once
#include <string>
#include <algorithm>
#include <boost/foreach.hpp>
#include "ThreadLocalByFeatureStorage.h"
#include "VWFeatureSource.h"
#include "moses/Util.h"
/*
* Produces features from factors in the following format:
* wordsense1:0.25^wordsense1:0.7^wordsense3:0.05
*
* This is useful e.g. for including different possible word senses as features weighted
* by their probability.
*
* By default, features are extracted from a small context window around the current
* phrase and from within the phrase.
*/
namespace Moses
{
class VWFeatureSourceSenseWindow : public VWFeatureSource
{
public:
VWFeatureSourceSenseWindow(const std::string &line)
: VWFeatureSource(line), m_tlsSenses(this), m_tlsForms(this), m_lexicalized(true), m_size(DEFAULT_WINDOW_SIZE) {
ReadParameters();
// Call this last
VWFeatureBase::UpdateRegister();
}
// precompute feature strings for each input sentence
virtual void InitializeForInput(ttasksptr const& ttask) {
InputType const& input = *(ttask->GetSource().get());
std::vector<WordSenses>& senses = *m_tlsSenses.GetStored();
std::vector<std::string>& forms = *m_tlsForms.GetStored();
senses.clear();
forms.clear();
senses.resize(input.GetSize());
forms.resize(input.GetSize());
for (size_t i = 0; i < input.GetSize(); i++) {
senses[i] = GetSenses(input, i);
forms[i] = m_lexicalized ? GetWordForm(input, i) + "^" : "";
}
}
void operator()(const InputType &input
, const InputPath &inputPath
, const WordsRange &sourceRange
, Discriminative::Classifier &classifier) const {
int begin = sourceRange.GetStartPos();
int end = sourceRange.GetEndPos() + 1;
int inputLen = input.GetSize();
const std::vector<WordSenses>& senses = *m_tlsSenses.GetStored();
const std::vector<std::string>& forms = *m_tlsForms.GetStored();
// before current phrase
for (int i = std::max(0, begin - m_size); i < begin; i++) {
BOOST_FOREACH(const Sense &sense, senses[i]) {
classifier.AddLabelIndependentFeature("snsb^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
classifier.AddLabelIndependentFeature("snsb^" + forms[i] + sense.m_label, sense.m_prob);
}
}
// within current phrase
for (int i = begin; i < end; i++) {
BOOST_FOREACH(const Sense &sense, senses[i]) {
classifier.AddLabelIndependentFeature("snsin^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
classifier.AddLabelIndependentFeature("snsin^" + forms[i] + sense.m_label, sense.m_prob);
}
}
// after current phrase
for (int i = end; i < std::min(end + m_size, inputLen); i++) {
BOOST_FOREACH(const Sense &sense, senses[i]) {
classifier.AddLabelIndependentFeature("snsa^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
classifier.AddLabelIndependentFeature("snsa^" + forms[i] + sense.m_label, sense.m_prob);
}
}
}
virtual void SetParameter(const std::string& key, const std::string& value) {
if (key == "size") {
m_size = Scan<size_t>(value);
} else if (key == "lexicalized") {
m_lexicalized = Scan<bool>(value);
} else {
VWFeatureSource::SetParameter(key, value);
}
}
private:
static const int DEFAULT_WINDOW_SIZE = 3;
struct Sense {
std::string m_label;
float m_prob;
};
typedef std::vector<Sense> WordSenses;
typedef ThreadLocalByFeatureStorage<std::vector<WordSenses> > TLSSenses;
typedef ThreadLocalByFeatureStorage<std::vector<std::string> > TLSWordForms;
TLSSenses m_tlsSenses; // for each input sentence, contains extracted senses and probs for each word
TLSWordForms m_tlsForms; // word forms for each input sentence
std::vector<Sense> GetSenses(const InputType &input, size_t pos) const {
std::string w = GetWord(input, pos);
std::vector<std::string> senseTokens = Tokenize(w, "^");
std::vector<Sense> out(senseTokens.size());
for (size_t i = 0; i < senseTokens.size(); i++) {
std::vector<std::string> senseColumns = Tokenize(senseTokens[i], ":");
if (senseColumns.size() != 2) {
UTIL_THROW2("VW :: bad format of sense distribution: " << senseTokens[i]);
}
out[i].m_label = senseColumns[0];
out[i].m_prob = Scan<float>(senseColumns[1]);
}
return out;
}
// assuming that word surface form is always factor 0, output the word form
inline std::string GetWordForm(const InputType &input, size_t pos) const {
return input.GetWord(pos).GetString(0).as_string();
}
bool m_lexicalized;
int m_size;
};
}

View File

@ -226,7 +226,7 @@ EvaluateWhenApplied(const StatelessFeatureFunction& slff)
*/
void
Hypothesis::
EvaluateWhenApplied(const SquareMatrix &futureScore)
EvaluateWhenApplied(float futureScore)
{
IFVERBOSE(2) {
m_manager.GetSentenceStats().StartTimeOtherScore();
@ -263,7 +263,7 @@ EvaluateWhenApplied(const SquareMatrix &futureScore)
}
// FUTURE COST
m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
m_futureScore = futureScore;
// TOTAL
m_totalScore = m_currScoreBreakdown.GetWeightedScore() + m_futureScore;

View File

@ -146,7 +146,7 @@ public:
return m_currTargetWordsRange.GetNumWordsCovered();
}
void EvaluateWhenApplied(const SquareMatrix &futureScore);
void EvaluateWhenApplied(float futureScore);
int GetId()const {
return m_id;

View File

@ -248,14 +248,16 @@ ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos
// early discarding: check if hypothesis is too bad to build
// this idea is explained in (Moore&Quirk, MT Summit 2007)
float expectedScore = 0.0f;
const WordsBitmap &sourceCompleted = hypothesis.GetWordsBitmap();
float futureScore = m_transOptColl.GetFutureScore().CalcFutureScore2( sourceCompleted, startPos, endPos );
if (m_options.search.UseEarlyDiscarding()) {
// expected score is based on score of current hypothesis
expectedScore = hypothesis.GetScore();
// add new future score estimate
expectedScore +=
m_transOptColl.GetFutureScore()
.CalcFutureScore(hypothesis.GetWordsBitmap(), startPos, endPos);
expectedScore += futureScore;
}
// loop through all translation options
@ -264,7 +266,7 @@ ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos
if (!tol) return;
TranslationOptionList::const_iterator iter;
for (iter = tol->begin() ; iter != tol->end() ; ++iter) {
ExpandHypothesis(hypothesis, **iter, expectedScore);
ExpandHypothesis(hypothesis, **iter, expectedScore, futureScore);
}
}
@ -277,7 +279,10 @@ ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos
* \param expectedScore base score for early discarding
* (base hypothesis score plus future score estimation)
*/
void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt, float expectedScore)
void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis,
const TranslationOption &transOpt,
float expectedScore,
float futureScore)
{
const StaticData &staticData = StaticData::Instance();
SentenceStats &stats = m_manager.GetSentenceStats();
@ -293,7 +298,7 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const Translat
stats.StopTimeBuildHyp();
}
if (newHypo==NULL) return;
newHypo->EvaluateWhenApplied(m_transOptColl.GetFutureScore());
newHypo->EvaluateWhenApplied(futureScore);
} else
// early discarding: check if hypothesis is too bad to build
{

View File

@ -44,8 +44,10 @@ protected:
ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos);
virtual void
ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt,
float expectedScore);
ExpandHypothesis(const Hypothesis &hypothesis,
const TranslationOption &transOpt,
float expectedScore,
float futureScore);
public:
SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);

View File

@ -76,7 +76,7 @@ float SquareMatrix::CalcFutureScore( WordsBitmap const &bitmap ) const
* /param endPos end of the span that is added to the coverage
*/
float SquareMatrix::CalcFutureScore( WordsBitmap const &bitmap, size_t startPos, size_t endPos ) const
float SquareMatrix::CalcFutureScore2( WordsBitmap const &bitmap, size_t startPos, size_t endPos ) const
{
const size_t notInGap= numeric_limits<size_t>::max();
float futureScore = 0.0f;

View File

@ -62,7 +62,7 @@ public:
m_array[startPos * m_size + endPos] = value;
}
float CalcFutureScore( WordsBitmap const& ) const;
float CalcFutureScore( WordsBitmap const&, size_t startPos, size_t endPos ) const;
float CalcFutureScore2( WordsBitmap const&, size_t startPos, size_t endPos ) const;
TO_STRING();
};

View File

@ -98,11 +98,11 @@ size_t BlockHashIndex::GetFprint(const char* key) const
size_t BlockHashIndex::GetHash(size_t i, const char* key)
{
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
if(m_hashes[i] == 0)
LoadRange(i);
//#ifdef WITH_THREADS
// boost::mutex::scoped_lock lock(m_mutex);
//#endif
//if(m_hashes[i] == 0)
//LoadRange(i);
#ifdef HAVE_CMPH
size_t idx = cmph_search((cmph_t*)m_hashes[i], key, (cmph_uint32) strlen(key));
#else
@ -322,6 +322,7 @@ size_t BlockHashIndex::GetSize() const
void BlockHashIndex::KeepNLastRanges(float ratio, float tolerance)
{
/*
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
@ -338,7 +339,7 @@ void BlockHashIndex::KeepNLastRanges(float ratio, float tolerance)
for(LastLoaded::reverse_iterator it = lastLoaded.rbegin() + size_t(n * (1 - tolerance));
it != lastLoaded.rend(); it++)
DropRange(it->second);
}
}*/
}
void BlockHashIndex::CalcHash(size_t current, void* source_void)

View File

@ -155,10 +155,12 @@ LexicalReorderingTableCompact::
Load(std::string filePath)
{
std::FILE* pFile = std::fopen(filePath.c_str(), "r");
if(m_inMemory)
UTIL_THROW_IF2(pFile == NULL, "File " << filePath << " could not be opened");
//if(m_inMemory)
m_hash.Load(pFile);
else
m_hash.LoadIndex(pFile);
//else
//m_hash.LoadIndex(pFile);
size_t read = 0;
read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile);

View File

@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <algorithm>
#include <sys/stat.h>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/thread/tss.hpp>
#include "PhraseDictionaryCompact.h"
#include "moses/FactorCollection.h"
@ -43,6 +44,8 @@ using namespace boost::algorithm;
namespace Moses
{
typename PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache;
PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line)
:PhraseDictionary(line, true)
,m_inMemory(true)
@ -75,12 +78,12 @@ void PhraseDictionaryCompact::Load()
std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
size_t indexSize;
if(m_inMemory)
//if(m_inMemory)
// Load source phrase index into memory
indexSize = m_hash.Load(pFile);
else
// else
// Keep source phrase index on disk
indexSize = m_hash.LoadIndex(pFile);
//indexSize = m_hash.LoadIndex(pFile);
size_t coderSize = m_phraseDecoder->Load(pFile);
@ -162,13 +165,9 @@ PhraseDictionaryCompact::~PhraseDictionaryCompact()
void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc)
{
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_sentenceMutex);
PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()];
#else
PhraseCache &ref = m_sentenceCache;
#endif
ref.push_back(tpc);
if(!m_sentenceCache.get())
m_sentenceCache.reset(new PhraseCache());
m_sentenceCache->push_back(tpc);
}
void PhraseDictionaryCompact::AddEquivPhrase(const Phrase &source,
@ -176,23 +175,16 @@ void PhraseDictionaryCompact::AddEquivPhrase(const Phrase &source,
void PhraseDictionaryCompact::CleanUpAfterSentenceProcessing(const InputType &source)
{
if(!m_inMemory)
m_hash.KeepNLastRanges(0.01, 0.2);
if(!m_sentenceCache.get())
m_sentenceCache.reset(new PhraseCache());
m_phraseDecoder->PruneCache();
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_sentenceMutex);
PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()];
#else
PhraseCache &ref = m_sentenceCache;
#endif
for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++)
for(PhraseCache::iterator it = m_sentenceCache->begin();
it != m_sentenceCache->end(); it++)
delete *it;
PhraseCache temp;
temp.swap(ref);
temp.swap(*m_sentenceCache);
ReduceCache();
}

View File

@ -52,13 +52,8 @@ protected:
bool m_useAlignmentInfo;
typedef std::vector<TargetPhraseCollection*> PhraseCache;
#ifdef WITH_THREADS
boost::mutex m_sentenceMutex;
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
#else
typedef PhraseCache SentenceCache;
#endif
SentenceCache m_sentenceCache;
typedef boost::thread_specific_ptr<PhraseCache> SentenceCache;
static SentenceCache m_sentenceCache;
BlockHashIndex m_hash;
PhraseDecoder* m_phraseDecoder;

View File

@ -0,0 +1,32 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "TargetPhraseCollectionCache.h"
namespace Moses
{
boost::thread_specific_ptr<typename TargetPhraseCollectionCache::CacheMap>
TargetPhraseCollectionCache::m_phraseCache;
}

View File

@ -26,12 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <set>
#include <vector>
#ifdef WITH_THREADS
#ifdef BOOST_HAS_PTHREADS
#include <boost/thread/mutex.hpp>
#endif
#endif
#include <boost/thread/tss.hpp>
#include <boost/shared_ptr.hpp>
#include "moses/Phrase.h"
@ -63,12 +58,7 @@ private:
};
typedef std::map<Phrase, LastUsed> CacheMap;
CacheMap m_phraseCache;
#ifdef WITH_THREADS
boost::mutex m_mutex;
#endif
static boost::thread_specific_ptr<CacheMap> m_phraseCache;
public:
@ -80,31 +70,37 @@ public:
}
iterator Begin() {
return m_phraseCache.begin();
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
return m_phraseCache->begin();
}
const_iterator Begin() const {
return m_phraseCache.begin();
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
return m_phraseCache->begin();
}
iterator End() {
return m_phraseCache.end();
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
return m_phraseCache->end();
}
const_iterator End() const {
return m_phraseCache.end();
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
return m_phraseCache->end();
}
/** retrieve translations for source phrase from persistent cache **/
void Cache(const Phrase &sourcePhrase, TargetPhraseVectorPtr tpv,
size_t bitsLeft = 0, size_t maxRank = 0) {
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
// check if source phrase is already in cache
iterator it = m_phraseCache.find(sourcePhrase);
if(it != m_phraseCache.end())
iterator it = m_phraseCache->find(sourcePhrase);
if(it != m_phraseCache->end())
// if found, just update clock
it->second.m_clock = clock();
else {
@ -113,19 +109,17 @@ public:
TargetPhraseVectorPtr tpv_temp(new TargetPhraseVector());
tpv_temp->resize(maxRank);
std::copy(tpv->begin(), tpv->begin() + maxRank, tpv_temp->begin());
m_phraseCache[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft);
(*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft);
} else
m_phraseCache[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft);
(*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft);
}
}
std::pair<TargetPhraseVectorPtr, size_t> Retrieve(const Phrase &sourcePhrase) {
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
iterator it = m_phraseCache.find(sourcePhrase);
if(it != m_phraseCache.end()) {
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
iterator it = m_phraseCache->find(sourcePhrase);
if(it != m_phraseCache->end()) {
LastUsed &lu = it->second;
lu.m_clock = clock();
return std::make_pair(lu.m_tpv, lu.m_bitsLeft);
@ -135,34 +129,31 @@ public:
// if cache full, reduce
void Prune() {
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
if(m_phraseCache.size() > m_max * (1 + m_tolerance)) {
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
if(m_phraseCache->size() > m_max * (1 + m_tolerance)) {
typedef std::set<std::pair<clock_t, Phrase> > Cands;
Cands cands;
for(CacheMap::iterator it = m_phraseCache.begin();
it != m_phraseCache.end(); it++) {
for(CacheMap::iterator it = m_phraseCache->begin();
it != m_phraseCache->end(); it++) {
LastUsed &lu = it->second;
cands.insert(std::make_pair(lu.m_clock, it->first));
}
for(Cands::iterator it = cands.begin(); it != cands.end(); it++) {
const Phrase& p = it->second;
m_phraseCache.erase(p);
m_phraseCache->erase(p);
if(m_phraseCache.size() < (m_max * (1 - m_tolerance)))
if(m_phraseCache->size() < (m_max * (1 - m_tolerance)))
break;
}
}
}
void CleanUp() {
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
m_phraseCache.clear();
if(!m_phraseCache.get())
m_phraseCache.reset(new CacheMap());
m_phraseCache->clear();
}
};

View File

@ -14,7 +14,7 @@ print STDERR "Training OSM - Start\n".`date`;
my $ORDER = 5;
my $OUT_DIR = "/tmp/osm.$$";
my $___FACTOR_DELIMITER = "|";
my ($MOSES_SRC_DIR,$CORPUS_F,$CORPUS_E,$ALIGNMENT,$SRILM_DIR,$FACTOR,$LMPLZ);
my ($MOSES_SRC_DIR,$CORPUS_F,$CORPUS_E,$ALIGNMENT,$SRILM_DIR,$FACTOR,$LMPLZ,$DOMAIN,$TUNE,$INP_EXT,$OP_EXT);
my $cmd;
@ -29,6 +29,10 @@ die("ERROR: wrong syntax when invoking OSM-Train.perl")
'alignment=s' => \$ALIGNMENT,
'order=i' => \$ORDER,
'factor=s' => \$FACTOR,
'input-extension=s' => \$INP_EXT,
'output-extension=s' => \$OP_EXT,
'tune=s' => \$TUNE,
'domain=s' => \$DOMAIN,
'srilm-dir=s' => \$SRILM_DIR,
'lmplz=s' => \$LMPLZ,
'out-dir=s' => \$OUT_DIR);
@ -74,19 +78,172 @@ if (defined($FACTOR)) {
`ln -s $corpus_stem_f.$factor_val.$ext_f $OUT_DIR/$factor_val/f`;
`ln -s $corpus_stem_e.$factor_val.$ext_e $OUT_DIR/$factor_val/e`;
create_model($factor_val);
if (defined($TUNE) && defined($DOMAIN) && $factor_val eq "0-0")
{
die("ERROR: For Interpolated OSM model, you need SRILM")
unless -e $SRILM_DIR;
`mkdir $OUT_DIR/TUNE`;
`$MOSES_SRC_DIR/scripts/training/reduce-factors.perl --corpus $TUNE.$INP_EXT --reduced $OUT_DIR/TUNE/tune.$INP_EXT --factor 0`;
`$MOSES_SRC_DIR/scripts/training/reduce-factors.perl --corpus $TUNE.$OP_EXT --reduced $OUT_DIR/TUNE/tune.$OP_EXT --factor 0`;
create_interpolated_model($factor_val);
}
else
{
create_model($factor_val);
}
}
}
else {
`ln -s $CORPUS_F $OUT_DIR/f`;
`ln -s $CORPUS_E $OUT_DIR/e`;
create_model("");
if (defined($TUNE) && defined($DOMAIN))
{
die("ERROR: For Interpolated OSM model, you need SRILM")
unless -e $SRILM_DIR;
`mkdir $OUT_DIR/TUNE`;
`cp $TUNE.$INP_EXT --reduced $OUT_DIR/TUNE/tune.$INP_EXT`;
`cp $TUNE.$OP_EXT --reduced $OUT_DIR/TUNE/tune.$OP_EXT`;
create_interpolated_model("");
}
else
{
create_model("");
}
}
# create model
print "Training OSM - End".`date`;
sub read_domain_file{
open(my $fh, '<:encoding(UTF-8)', $DOMAIN)
or die "Could not open file '$DOMAIN' $!";
my @corpora;
while (my $row = <$fh>) {
chomp $row;
my ($num,$dom) = split(/\ /,$row);
push @corpora, $dom;
push @corpora, $num;
#print "$dom $num\n";
}
return @corpora;
}
sub create_interpolated_model{
my ($factor_val) = @_;
my $fNum = 0;
my $dName;
my @corpora = read_domain_file();
my $i = 0;
while($i < scalar(@corpora))
{
$dName = "$OUT_DIR/$factor_val/$corpora[$i]";
$cmd = "mkdir $dName";
`$cmd`;
my $cal = $corpora[$i+1] - $fNum;
$cmd = "head -$corpora[$i+1] $OUT_DIR/$factor_val/e | tail -$cal > $dName/e";
`$cmd`;
$cmd = "head -$corpora[$i+1] $OUT_DIR/$factor_val/f | tail -$cal > $dName/f";
`$cmd`;
$cmd = "head -$corpora[$i+1] $OUT_DIR/align | tail -$cal > $dName/align";
`$cmd`;
#print STDERR "Flip Alignment\n";
#`$MOSES_SRC_DIR/scripts/OSM/flipAlignment.perl $dName/alignment > $dName/align`;
print STDERR "Extracting Singletons\n";
$cmd = "$MOSES_SRC_DIR/scripts/OSM/extract-singletons.perl $dName/e $dName/f $dName/align > $dName/Singletons";
print STDERR "Executing: $cmd\n";
`$cmd`;
print STDERR "Converting Bilingual Sentence Pair into Operation Corpus\n";
$cmd = "$MOSES_SRC_DIR/bin/generateSequences $dName/e $dName/f $dName/align $dName/Singletons > $dName/opCorpus";
print STDERR "Executing: $cmd\n";
`$cmd`;
print STDERR "Learning Operation Sequence Translation Model\n";
if (defined($SRILM_DIR)) {
$cmd = "$SRILM_DIR/ngram-count -kndiscount -order $ORDER -unk -text $dName/opCorpus -lm $dName/operationLM 2>> /dev/stderr";
print STDERR "Executing: $cmd\n";
`$cmd`;
}
else {
$cmd = "$LMPLZ -T $OUT_DIR --order $ORDER --text $dName/opCorpus --arpa $dName/operationLM --prune 0 0 1 2>> /dev/stderr";
print STDERR "Executing: $cmd\n";
`$cmd`;
}
print "$cmd\n";
$fNum = $corpora[$i+1];
$i = $i+2;
}
`$MOSES_SRC_DIR/scripts/OSM/flipAlignment.perl $TUNE.align > $OUT_DIR/TUNE/tune.align`;
print STDERR "Extracting Singletons\n";
$cmd = "$MOSES_SRC_DIR/scripts/OSM/extract-singletons.perl $OUT_DIR/TUNE/tune.$OP_EXT $OUT_DIR/TUNE/tune.$INP_EXT $OUT_DIR/TUNE/tune.align > $OUT_DIR/TUNE/Singletons";
print STDERR "Executing: $cmd\n";
`$cmd`;
print STDERR "Converting Bilingual Sentence Pair into Operation Corpus\n";
$cmd = "$MOSES_SRC_DIR/bin/generateSequences $OUT_DIR/TUNE/tune.$OP_EXT $OUT_DIR/TUNE/tune.$INP_EXT $OUT_DIR/TUNE/tune.align $OUT_DIR/TUNE/Singletons > $OUT_DIR/TUNE/tune.opCorpus";
print STDERR "Executing: $cmd\n";
`$cmd`;
print STDERR "Interpolating OSM Models\n";
$cmd = "$MOSES_SRC_DIR/scripts/ems/support/interpolate-lm.perl --tuning $OUT_DIR/TUNE/tune.opCorpus --name $OUT_DIR/$factor_val/operationLM --srilm $SRILM_DIR --lm ";
$i = 0;
$dName = "$OUT_DIR/$factor_val/$corpora[$i]/operationLM";
$cmd = $cmd . $dName;
$i = $i+2;
while($i < scalar(@corpora))
{
$cmd = $cmd . ",";
$dName = "$OUT_DIR/$factor_val/$corpora[$i]/operationLM";
$cmd = $cmd . $dName;
$i = $i+2;
}
print STDERR "Executing: $cmd\n";
`$cmd`;
print STDERR "Binarizing\n";
$cmd = "$MOSES_SRC_DIR/bin/build_binary $OUT_DIR/$factor_val/operationLM $OUT_DIR/$factor_val/operationLM.bin";
print STDERR "Executing: $cmd\n";
system($cmd) == 0 or die("system $cmd failed: $?");
}
sub create_model{
my ($factor_val) = @_;

View File

@ -391,6 +391,28 @@ alignment-symmetrization-method = grow-diag-final-and
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
#
# OR if you want to use with SRILM
#
#operation-sequence-model-settings = "--srilm-dir /path-to-srilm/bin/i686-m64"
## Class-based Operation Sequence Model (OSM)
# if OSM has to be enabled with factors then add factors as below.
# Durrani, Koehn, Schmid, Fraser (COLING, 2014).
#Investigating the Usefulness of Generalized Word Representations in SMT
#
#operation-sequence-model-settings = "--factor 0-0+1-1"
## Interpolated Operation Sequence Model (OSM)
# if OSM has to be enabled with factors then add factors as below.
# Durrani, Sajjad, Joty, Abdelali and Vogel (Mt Summit, 2015).
# Using Joint Models for Domain Adaptation in Statistical Machine Translation
#
#interpolated-operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "--srilm-dir /path-to-srilm/bin/i686-m64 --tune /path-to-tune-folder/tune_file"
#Interpolated OSM can only be used with SRILM because of the interpolation script
# if OSM training should be skipped, point to OSM Model
#osm-model =

View File

@ -411,9 +411,30 @@ alignment-symmetrization-method = grow-diag-final-and
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
#
# OR if you want to use with SRILM
#
#operation-sequence-model-settings = "--srilm-dir /path-to-srilm/bin/i686-m64"
## Class-based Operation Sequence Model (OSM)
# if OSM has to be enabled with factors then add factors as below.
# Durrani, Koehn, Schmid, Fraser (COLING, 2014).
#Investigating the Usefulness of Generalized Word Representations in SMT
#
#operation-sequence-model-settings = "--factor 0-0+1-1"
## Interpolated Operation Sequence Model (OSM)
# if OSM has to be enabled with factors then add factors as below.
# Durrani, Sajjad, Joty, Abdelali and Vogel (Mt Summit, 2015).
# Using Joint Models for Domain Adaptation in Statistical Machine Translation
#
#interpolated-operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "--srilm-dir /path-to-srilm/bin/i686-m64 --tune /path-to-tune-folder/tune_file"
#Interpolated OSM can only be used with SRILM because of the interpolation script
# if OSM training should be skipped, point to OSM Model
#osm-model =
### unsupervised transliteration module
# Durrani, Sajjad, Hoang and Koehn (EACL, 2014).
# "Integrating an Unsupervised Transliteration Model

View File

@ -373,8 +373,30 @@ alignment-symmetrization-method = grow-diag-final-and
#
#operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = ""
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
#
# OR if you want to use with SRILM
#
#operation-sequence-model-settings = "--srilm-dir /path-to-srilm/bin/i686-m64"
## Class-based Operation Sequence Model (OSM)
# if OSM has to be enabled with factors then add factors as below.
# Durrani, Koehn, Schmid, Fraser (COLING, 2014).
#Investigating the Usefulness of Generalized Word Representations in SMT
#
#operation-sequence-model-settings = "--factor 0-0+1-1"
## Interpolated Operation Sequence Model (OSM)
# if OSM has to be enabled with factors then add factors as below.
# Durrani, Sajjad, Joty, Abdelali and Vogel (Mt Summit, 2015).
# Using Joint Models for Domain Adaptation in Statistical Machine Translation
#
#interpolated-operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "--srilm-dir /path-to-srilm/bin/i686-m64 --tune /path-to-tune-folder/tune_file"
#Interpolated OSM can only be used with SRILM because of the interpolation script
# if OSM training should be skipped, point to OSM Model
#osm-model =

View File

@ -389,8 +389,30 @@ alignment-symmetrization-method = grow-diag-final-and
#
#operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = ""
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
#
# OR if you want to use with SRILM
#
#operation-sequence-model-settings = "--srilm-dir /path-to-srilm/bin/i686-m64"
## Class-based Operation Sequence Model (OSM)
# if OSM has to be enabled with factors then add factors as below.
# Durrani, Koehn, Schmid, Fraser (COLING, 2014).
#Investigating the Usefulness of Generalized Word Representations in SMT
#
#operation-sequence-model-settings = "--factor 0-0+1-1"
## Interpolated Operation Sequence Model (OSM)
# if OSM has to be enabled with factors then add factors as below.
# Durrani, Sajjad, Joty, Abdelali and Vogel (Mt Summit, 2015).
# Using Joint Models for Domain Adaptation in Statistical Machine Translation
#
#interpolated-operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "--srilm-dir /path-to-srilm/bin/i686-m64 --tune /path-to-tune-folder/tune_file"
#Interpolated OSM can only be used with SRILM because of the interpolation script
# if OSM training should be skipped, point to OSM Model
#osm-model =

View File

@ -533,7 +533,7 @@ build-domains
in: CORPUS:post-split-factorized-stem
out: domains
default-name: model/domains
ignore-unless: domain-features mml-filter-corpora
ignore-unless: domain-features mml-filter-corpora interpolated-operation-sequence-model
template: $moses-script-dir/ems/support/build-domain-file-from-subcorpora.perl $input-extension IN > OUT
final-model: yes
mml-score
@ -702,7 +702,14 @@ build-osm
out: osm-model
ignore-unless: operation-sequence-model
rerun-on-change: operation-sequence-model training-options script giza-settings operation-sequence-model-settings
template: $moses-script-dir/OSM/OSM-Train.perl --corpus-f IN0.$input-extension --corpus-e IN0.$output-extension --alignment IN1.$alignment-symmetrization-method --order $operation-sequence-model-order --out-dir OUT --moses-src-dir $moses-src-dir $operation-sequence-model-settings
template: $moses-script-dir/OSM/OSM-Train.perl --corpus-f IN0.$input-extension --corpus-e IN0.$output-extension --alignment IN1.$alignment-symmetrization-method --order $operation-sequence-model-order --out-dir OUT --moses-src-dir $moses-src-dir --input-extension $input-extension --output-extension $output-extension $operation-sequence-model-settings
default-name: model/OSM
build-interpolated-osm
in: corpus word-alignment domains
out: osm-model
ignore-unless: interpolated-operation-sequence-model
rerun-on-change: interpolated-operation-sequence-model training-options script giza-settings operation-sequence-model-settings
template: $moses-script-dir/OSM/OSM-Train.perl --corpus-f IN0.$input-extension --corpus-e IN0.$output-extension --alignment IN1.$alignment-symmetrization-method --order $operation-sequence-model-order --out-dir OUT --moses-src-dir $moses-src-dir --input-extension $input-extension --output-extension $output-extension $operation-sequence-model-settings --domain IN2
default-name: model/OSM
build-transliteration-model
in: corpus word-alignment
@ -940,6 +947,21 @@ parse-input-devtest
pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
ignore-unless: use-mira
template: $input-parser < IN > OUT
parse-relax-input
in: split-input
out: input
default-name: tuning/input.parse-relaxed
pass-unless: input-parse-relaxer
pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
template: $input-parse-relaxer < IN > OUT
parse-relax-input-devtest
in: split-input-devtest
out: input-devtest
default-name: tuning/input.devtest.parse-relaxed
pass-unless: input-parse-relaxer
pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
ignore-unless: use-mira
template: $input-parse-relaxer < IN > OUT
factorize-input
in: parsed-input
out: factorized-input
@ -1001,35 +1023,20 @@ truecase-input-devtest
ignore-unless: AND input-truecaser use-mira
template: $input-truecaser -model IN1.$input-extension < IN > OUT
split-input
in: truecased-input
in: truecased-input SPLITTER:splitter-model
out: split-input
rerun-on-change: input-splitter SPLITTER:splitter-model
rerun-on-change: input-splitter
default-name: tuning/input.split
pass-unless: input-splitter
template: $input-splitter -model SPLITTER:splitter-model.$input-extension < IN > OUT
template: $input-splitter -model IN1.$input-extension < IN > OUT
split-input-devtest
in: truecased-input-devtest
in: truecased-input-devtest SPLITTER:splitter-model
out: split-input-devtest
rerun-on-change: input-splitter
default-name: tuning/input.devtest.split
pass-unless: input-splitter
ignore-unless: use-mira
template: $input-splitter -model SPLITTER:splitter-model.$input-extension < IN > OUT
parse-relax-input
in: split-input
out: input
default-name: tuning/input.parse-relaxed
pass-unless: input-parse-relaxer
pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
template: $input-parse-relaxer < IN > OUT
parse-relax-input-devtest
in: split-input-devtest
out: input-devtest
default-name: tuning/input.devtest.parse-relaxed
pass-unless: input-parse-relaxer
pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval
ignore-unless: use-mira
template: $input-parse-relaxer < IN > OUT
template: $input-splitter -model IN1.$input-extension < IN > OUT
reference-from-sgm
in: reference-sgm input-sgm
out: raw-reference
@ -1269,12 +1276,11 @@ truecase-input
ignore-unless: input-truecaser
template: $input-truecaser -model IN1.$input-extension < IN > OUT
split-input
in: truecased-input
in: truecased-input SPLITTER:splitter-model
out: split-input
rerun-on-change: input-splitter SPLITTER:splitter-model
default-name: evaluation/input.split
pass-unless: input-splitter
template: $input-splitter -model SPLITTER:splitter-model.$input-extension < IN > OUT
template: $input-splitter -model IN1.$input-extension < IN > OUT
filter
in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table
out: filtered-dir

312
scripts/generic/multi_moses.py Executable file
View File

@ -0,0 +1,312 @@
#!/usr/bin/env python
# Written by Michael Denkowski
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
'''Parallelize decoding with multiple instances of moses on a local machine
To use with mert-moses.pl, activate --multi-moses and set the number of moses
instances and threads per instance with --decoder-flags='--threads P:T:E'
This script runs a specified number of moses instances, each using one or more
threads. The highest speed is generally seen with many single-threaded
instances while the lowest memory usage is seen with a single many-threaded
instance. It is recommended to use the maximum number of instances that will
fit into memory (up to the number of available CPUs) and distribute CPUs across
them equally. For example, a machine with 32 CPUs that can fit 3 copies of
moses into memory would use --threads 2:11:10 for 2 instances with 11 threads
each and an extra instance with 10 threads (3 instances total using all CPUs).
Memory mapped models can be shared by multiple processes and increase the number
of instances that can fit into memory:
Mmaped phrase tables (Ulrich Germann)
http://www.statmt.org/moses/?n=Advanced.Incremental#ntoc3
Mmaped mapped language models (Kenneth Heafield)
http://www.statmt.org/moses/?n=FactoredTraining.BuildingLanguageModel#ntoc19
'''
import collections
import os
import Queue
import signal
import subprocess
import sys
import threading
HELP = '''Multiple process decoding with Moses
Usage:
{} moses --config moses.ini [options] [decoder flags]
Options:
--threads P:T:E
P: Number of parallel instances to run
T: Number of threads per instance
E: Number of threads in optional extra instance
(default 1:1:0, overrides [threads] in moses.ini. Specifying T
and E is optional, e.g. --threads 16 starts 16 single-threaded
instances)
--n-best-list nbest.out N [distinct]: location and size of N-best list
--show-weights: for mert-moses.pl, just call moses and exit
Other options (decoder flags) are passed through to moses instances
'''
# Defaults
INPUT = sys.stdin
PROCS = 1
THREADS = 1
EXTRA = 0
DONE = threading.Event()
PID = os.getpid()
# A very long time, used as Queue operation timeout even though we don't
# actually want a timeout but we do want interruptibility
# (https://bugs.python.org/issue1360)
NEVER = 60 * 60 * 24 * 365 * 1000
# Single unit of computation: decode a line, output result, signal done
Task = collections.namedtuple('Task', ['id', 'line', 'out', 'event'])
def kill_main(msg):
'''kill -9 the main thread to stop everything immediately'''
sys.stderr.write('{}\n'.format(msg))
os.kill(PID, signal.SIGKILL)
def gzopen(f):
'''Open plain or gzipped text'''
return gzip.open(f, 'rb') if f.endswith('.gz') else open(f, 'r')
def run_instance(cmd_base, threads, tasks, n_best=False):
'''Run an instance of moses that processes tasks (input lines) from a
queue using a specified number of threads'''
cmd = cmd_base[:]
cmd.append('--threads')
cmd.append(str(threads))
try:
# Queue of tasks instance is currently working on, limited to the number of
# threads. The queue should be kept full for optimal CPU usage.
work = Queue.Queue(maxsize=threads)
# Multi-threaded instance
moses = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
# Read and handle instance output as available
def handle_output():
while True:
# Output line triggers task completion
line = moses.stdout.readline()
# End of output (instance finished)
if not line:
break
task = work.get(timeout=NEVER)
if n_best:
# Read and copy lines until sentinel line, copy real line id
# id ||| hypothesis words ||| feature scores ||| total score
(first_i, rest) = line.split(' ||| ', 1)
task.out.append(' ||| '.join((task.id, rest)))
while True:
line = moses.stdout.readline()
(i, rest) = line.split(' ||| ', 1)
# Sentinel
if i != first_i:
break
task.out.append(' ||| '.join((task.id, rest)))
else:
task.out.append(line)
# Signal task done
task.event.set()
# Output thread
handler = threading.Thread(target=handle_output, args=())
# Daemon: guaranteed to finish before non-daemons
handler.setDaemon(True)
handler.start()
# Input thread: take tasks as they are available and add them to work
# queue. Stop when DONE encountered.
while True:
task = tasks.get(timeout=NEVER)
work.put(task, timeout=NEVER)
if task.event == DONE:
break
if n_best:
# Input line followed by blank line (sentinel)
moses.stdin.write(task.line)
moses.stdin.write('\n')
else:
moses.stdin.write(task.line)
# Cleanup
moses.stdin.close()
moses.wait()
handler.join()
except:
kill_main('Error with moses instance: see stderr')
def write_results(results, n_best=False, n_best_out=None):
'''Write out results (output lines) from a queue as they are populated'''
while True:
task = results.get(timeout=NEVER)
if task.event == DONE:
break
task.event.wait()
if n_best:
# Write top-best and N-best
# id ||| hypothesis words ||| feature scores ||| total score
top_best = task.out[0].split(' ||| ', 2)[1]
# Except don't write top-best if writing N-best to stdout "-"
if n_best_out != sys.stdout:
sys.stdout.write('{}\n'.format(top_best))
sys.stdout.flush()
for line in task.out:
n_best_out.write(line)
n_best_out.flush()
else:
sys.stdout.write(task.out[0])
sys.stdout.flush()
def main(argv):
# Defaults
moses_ini = None
input = INPUT
procs = PROCS
threads = THREADS
extra = EXTRA
n_best = False
n_best_file = None
n_best_size = None
n_best_distinct = False
n_best_out = None
show_weights = False
# Decoder command
cmd = argv[1:]
# Parse special options and remove from cmd
i = 1
while i < len(cmd):
if cmd[i] in ('-f', '-config', '--config'):
moses_ini = cmd[i + 1]
# Do not remove from cmd
i += 2
elif cmd[i] in ('-i', '-input-file', '--input-file'):
input = gzopen(cmd[i + 1])
cmd = cmd[:i] + cmd[i + 2:]
elif cmd[i] in ('-th', '-threads', '--threads'):
# P:T:E
args = cmd[i + 1].split(':')
procs = int(args[0])
if len(args) > 1:
threads = int(args[1])
if len(args) > 2:
extra = int(args[2])
cmd = cmd[:i] + cmd[i + 2:]
elif cmd[i] in ('-n-best-list', '--n-best-list'):
n_best = True
n_best_file = cmd[i + 1]
n_best_size = cmd[i + 2]
# Optional "distinct"
if i + 3 < len(cmd) and cmd[i + 3] == 'distinct':
n_best_distinct = True
cmd = cmd[:i] + cmd[i + 4:]
else:
cmd = cmd[:i] + cmd[i + 3:]
# Handled specially for mert-moses.pl
elif cmd[i] in ('-show-weights', '--show-weights'):
show_weights = True
# Do not remove from cmd
i += 1
else:
i += 1
# If mert-moses.pl passes -show-weights, just call moses
if show_weights:
sys.stdout.write(subprocess.check_output(cmd))
sys.stdout.flush()
sys.exit(0)
# Check inputs
if not (len(cmd) > 0 and moses_ini):
sys.stderr.write(HELP.format(os.path.basename(argv[0])))
sys.exit(2)
if not (os.path.isfile(cmd[0]) and os.access(cmd[0], os.X_OK)):
raise Exception('moses "{}" is not executable\n'.format(cmd[0]))
# Report settings
sys.stderr.write('Moses flags: {}\n'.format(' '.join('\'{}\''.format(s) if ' ' in s else s for s in cmd[1:])))
sys.stderr.write('Instances: {}\n'.format(procs))
sys.stderr.write('Threads per: {}\n'.format(threads))
if extra:
sys.stderr.write('Extra: {}\n'.format(extra))
if n_best:
sys.stderr.write('N-best list: {} ({}{})\n'.format(n_best_file, n_best_size, ', distinct' if n_best_distinct else ''))
# Task and result queues (buffer 8 * total threads input lines)
tasks = Queue.Queue(maxsize=(8 * ((procs * threads) + extra)))
results = Queue.Queue()
# N-best capture
if n_best:
cmd.append('--n-best-list')
cmd.append('-')
cmd.append(n_best_size)
if n_best_distinct:
cmd.append('distinct')
if n_best_file == '-':
n_best_out = sys.stdout
else:
n_best_out = open(n_best_file, 'w')
# Start instances
instances = []
for i in range(procs + (1 if extra else 0)):
t = threading.Thread(target=run_instance, args=(cmd, (threads if i < procs else extra), tasks, n_best))
instances.append(t)
# Daemon: guaranteed to finish before non-daemons
t.setDaemon(True)
t.start()
# Start results writer
writer = threading.Thread(target=write_results, args=(results, n_best, n_best_out))
writer.start()
# Main loop: queue task for each input line
id = 0
while True:
line = input.readline()
if not line:
break
# (input, out lines, err lines, "done" event)
task = Task(str(id), line, [], threading.Event())
results.put(task, timeout=NEVER)
tasks.put(task, timeout=NEVER)
id += 1
# Tell instances to exit
for t in instances:
tasks.put(Task(None, None, None, DONE), timeout=NEVER)
for t in instances:
t.join()
# Stop results writer
results.put(Task(None, None, None, DONE), timeout=NEVER)
writer.join()
# Cleanup
if n_best:
n_best_out.close()
if __name__ == '__main__':
try:
main(sys.argv)
except:
kill_main('Error with main I/O: see stderr')

View File

@ -52,9 +52,9 @@ parser.set_defaults(
ngram_size=14,
minibatch_size=1000,
noise=100,
hidden=750,
hidden=0,
input_embedding=150,
output_embedding=150,
output_embedding=750,
threads=1,
output_model="train.10k",
output_dir=None,

View File

@ -166,6 +166,10 @@ my $prev_aggregate_nbl_size = -1; # number of previous step to consider when loa
# and so on
my $maximum_iterations = 25;
# Multiple instance parallelization
my $___MULTI_MOSES = "$SCRIPTS_ROOTDIR/generic/multi_moses.py";
my $___USE_MULTI_MOSES = undef;
# Simulated post-editing
my $___MOSES_SIM_PE = "$SCRIPTS_ROOTDIR/generic/moses_sim_pe.py";
my $___DEV_SYMAL = undef;
@ -227,7 +231,8 @@ GetOptions(
"promix-training=s" => \$__PROMIX_TRAINING,
"promix-table=s" => \@__PROMIX_TABLES,
"threads=i" => \$__THREADS,
"spe-symal=s" => \$___DEV_SYMAL
"spe-symal=s" => \$___DEV_SYMAL,
"multi-moses" => \$___USE_MULTI_MOSES
) or exit(1);
# the 4 required parameters can be supplied on the command line directly
@ -325,6 +330,9 @@ Options:
(parameter sets factor [0;1] given to current weights)
--spe-symal=SYMAL ... Use simulated post-editing when decoding.
(SYMAL aligns input to refs)
--multi-moses ... Use multiple instances of moses instead of threads for decoding
(Use with --decoder-flags='-threads N' to get N instances, each of
which uses a single thread (overrides threads in moses.ini))
";
exit 1;
}
@ -1305,6 +1313,10 @@ sub run_decoder {
$decoder_cmd = "$___DECODER $___DECODER_FLAGS -config $___CONFIG";
$decoder_cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE);
$decoder_cmd .= " $decoder_config $lsamp_cmd $nbest_list_cmd -input-file $___DEV_F";
if (defined $___USE_MULTI_MOSES) {
# If requested, prefix full decoder command with multi-moses wrapper
$decoder_cmd = "$___MULTI_MOSES $decoder_cmd";
}
if (defined $___DEV_SYMAL) {
# If simulating post-editing, route command through moses_sim_pe.py
# Always use single (first) reference. Simulated post-editing undefined for multiple references.

View File

@ -42,41 +42,37 @@ template <class Derived> class FakeOStream {
return C().write(str.data(), str.size());
}
// For anything with ToStringBuf<T>::kBytes, define operator<< using ToString.
// This includes uint64_t, int64_t, uint32_t, int32_t, uint16_t, int16_t,
// float, double
// Handle integers by size and signedness.
private:
template <int Arg> struct EnableIfKludge {
template <class Arg> struct EnableIfKludge {
typedef Derived type;
};
template <class From, unsigned Length = sizeof(From), bool Signed = std::numeric_limits<From>::is_signed, bool IsInteger = std::numeric_limits<From>::is_integer> struct Coerce {};
template <class From> struct Coerce<From, 2, false, true> { typedef uint16_t To; };
template <class From> struct Coerce<From, 4, false, true> { typedef uint32_t To; };
template <class From> struct Coerce<From, 8, false, true> { typedef uint64_t To; };
template <class From> struct Coerce<From, 2, true, true> { typedef int16_t To; };
template <class From> struct Coerce<From, 4, true, true> { typedef int32_t To; };
template <class From> struct Coerce<From, 8, true, true> { typedef int64_t To; };
public:
template <class T> typename EnableIfKludge<ToStringBuf<T>::kBytes>::type &operator<<(const T value) {
return CallToString(value);
template <class From> typename EnableIfKludge<typename Coerce<From>::To>::type &operator<<(const From value) {
return CallToString(static_cast<typename Coerce<From>::To>(value));
}
/* clang on OS X appears to consider std::size_t aka unsigned long distinct
* from uint64_t. So this function makes clang work. gcc considers
* uint64_t and std::size_t the same (on 64-bit) so this isn't necessary.
* But it does no harm since gcc sees it as a specialization of the
* EnableIfKludge template.
* Also, delegating to *this << static_cast<uint64_t>(value) would loop
* indefinitely on gcc.
*/
Derived &operator<<(std::size_t value) { return CoerceToString(value); }
// union types will map to int, but don't pass the template magic above in gcc.
Derived &operator<<(int value) { return CoerceToString(value); }
// gcc considers these distinct from uint64_t
Derived &operator<<(unsigned long long value) { return CoerceToString(value); }
Derived &operator<<(signed long long value) { return CoerceToString(value); }
Derived &operator<<(long value) { return CoerceToString(value); }
// Character types that get copied as bytes instead of displayed as integers.
Derived &operator<<(char val) { return put(val); }
Derived &operator<<(signed char val) { return put(static_cast<char>(val)); }
Derived &operator<<(unsigned char val) { return put(static_cast<char>(val)); }
Derived &operator<<(bool val) { return put(val + '0'); }
// enums will fall back to int but are not caught by the template.
Derived &operator<<(int val) { return CallToString(static_cast<typename Coerce<int>::To>(val)); }
Derived &operator<<(float val) { return CallToString(val); }
Derived &operator<<(double val) { return CallToString(val); }
// This is here to catch all the other pointer types.
Derived &operator<<(const void *value) { return CallToString(value); }
// This is here because the above line also catches const char*.
@ -102,20 +98,6 @@ template <class Derived> class FakeOStream {
return *static_cast<const Derived*>(this);
}
template <class From, unsigned Length = sizeof(From), bool Signed = std::numeric_limits<From>::is_signed> struct Coerce {};
template <class From> struct Coerce<From, 2, false> { typedef uint16_t To; };
template <class From> struct Coerce<From, 4, false> { typedef uint32_t To; };
template <class From> struct Coerce<From, 8, false> { typedef uint64_t To; };
template <class From> struct Coerce<From, 2, true> { typedef int16_t To; };
template <class From> struct Coerce<From, 4, true> { typedef int32_t To; };
template <class From> struct Coerce<From, 8, true> { typedef int64_t To; };
template <class From> Derived &CoerceToString(const From value) {
return CallToString(static_cast<typename Coerce<From>::To>(value));
}
// This is separate to prevent an infinite loop if the compiler considers
// types the same (i.e. gcc std::size_t and uint64_t or uint32_t).
template <class T> Derived &CallToString(const T value) {

View File

@ -644,14 +644,16 @@ const char kHexDigits[] = "0123456789abcdef";
} // namespace
char *ToString(const void *v, char *to) {
// Apparently it's 0, not 0x0.
*to++ = '0';
*to++ = 'x';
// Fun fact: gcc/clang boost::lexical_cast on Linux do just "0" while clang on OS X does "0x0"
// I happen to prefer 0x0.
if (!v) {
*to++ = '0';
return to;
}
*to++ = '0';
*to++ = 'x';
uintptr_t value = reinterpret_cast<uintptr_t>(v);
uint8_t shift = sizeof(void*) * 8 - 4;
for (; !(value >> shift); shift -= 4) {}

View File

@ -15,7 +15,12 @@ template <class T> void TestValue(const T value) {
char buf[ToStringBuf<T>::kBytes];
StringPiece result(buf, ToString(value, buf) - buf);
BOOST_REQUIRE_GE(static_cast<std::size_t>(ToStringBuf<T>::kBytes), result.size());
BOOST_CHECK_EQUAL(boost::lexical_cast<std::string>(value), result);
if (value) {
BOOST_CHECK_EQUAL(boost::lexical_cast<std::string>(value), result);
} else {
// Platforms can do void * as 0x0 or 0.
BOOST_CHECK(result == "0x0" || result == "0");
}
}
template <class T> void TestCorners() {
@ -33,7 +38,7 @@ BOOST_AUTO_TEST_CASE(Corners) {
TestCorners<int16_t>();
TestCorners<int32_t>();
TestCorners<int64_t>();
//TestCorners<const void*>();
TestCorners<const void*>();
}
template <class T> void TestAll() {
@ -64,7 +69,6 @@ BOOST_AUTO_TEST_CASE(Tens) {
}
BOOST_AUTO_TEST_CASE(Pointers) {
/*
for (uintptr_t i = 1; i < std::numeric_limits<uintptr_t>::max() / 10; i *= 10) {
TestValue((const void*)i);
}
@ -72,7 +76,6 @@ BOOST_AUTO_TEST_CASE(Pointers) {
TestValue((const void*)i);
TestValue((const void*)(i + 0xf00));
}
*/
}
}} // namespaces

View File

@ -296,6 +296,13 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
}
}
ConstIterator RawBegin() const {
return begin_;
}
ConstIterator RawEnd() const {
return end_;
}
private:
friend class AutoProbing<Entry, Hash, Equal>;
@ -379,6 +386,13 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
backend_.Clear();
}
ConstIterator RawBegin() const {
return backend_.RawBegin();
}
ConstIterator RawEnd() const {
return backend_.RawEnd();
}
private:
void DoubleIfNeeded() {
if (UTIL_LIKELY(Size() < threshold_))

View File

@ -54,4 +54,27 @@ BOOST_AUTO_TEST_CASE(EnumCase) {
TestEqual(EnumValue);
}
BOOST_AUTO_TEST_CASE(Strings) {
TestEqual("foo");
const char *a = "bar";
TestEqual(a);
StringPiece piece("abcdef");
TestEqual(piece);
TestEqual(StringPiece());
char non_const[3];
non_const[0] = 'b';
non_const[1] = 'c';
non_const[2] = 0;
std::string out;
StringStream(out) << "a" << non_const << 'c';
BOOST_CHECK_EQUAL("abcc", out);
// Now test as a separate object.
out.clear();
StringStream stream(out);
stream << "a" << non_const << 'c' << piece;
BOOST_CHECK_EQUAL("abccabcdef", out);
}
}} // namespaces

View File

@ -286,7 +286,7 @@ template <class Num> uint64_t ParseNum(const std::string &arg) {
return static_cast<uint64_t>(static_cast<double>(value) * static_cast<double>(mem) / 100.0);
}
if (after == "k") after == "K";
if (after == "k") after = "K";
std::string units("bKMGTPEZY");
std::string::size_type index = units.find(after[0]);
UTIL_THROW_IF_ARG(index == std::string::npos, SizeParseError, (arg), "the allowed suffixes are " << units << "%.");