Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2015-06-04 16:47:32 +04:00
commit 1c4df9fc5e
20 changed files with 117 additions and 118 deletions

View File

@ -67,7 +67,9 @@ private:
protected:
/// For child classes only: retrieve filebuf.
__gnu_cxx::stdio_filebuf<char> *get_filebuf() { return _filebuf; }
__gnu_cxx::stdio_filebuf<char> *get_filebuf() {
return _filebuf;
}
};
class ifdstream : public _fdstream

View File

@ -55,10 +55,10 @@ int main(int argc, char** argv)
size_t quantize = 0;
size_t threads =
#ifdef WITH_THREADS
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
#endif
1;
#ifdef WITH_THREADS
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
#endif
1;
if(1 >= argc) {
printHelp(argv);

View File

@ -68,10 +68,10 @@ int main(int argc, char **argv)
size_t sortScoreIndex = 2;
bool warnMe = true;
size_t threads =
#ifdef WITH_THREADS
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
#endif
1;
#ifdef WITH_THREADS
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
#endif
1;
if(1 >= argc) {
printHelp(argv);

View File

@ -112,7 +112,7 @@ void GlobalLexicalModel::Load()
void GlobalLexicalModel::InitializeForInput(ttasksptr const& ttask)
{
UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
"GlobalLexicalModel works only with sentence input.");
"GlobalLexicalModel works only with sentence input.");
Sentence const* s = reinterpret_cast<Sentence const*>(ttask->GetSource().get());
m_local.reset(new ThreadLocalStorage);
m_local->input = s;

View File

@ -108,7 +108,7 @@ bool GlobalLexicalModelUnlimited::Load(const std::string &filePathSource,
void GlobalLexicalModelUnlimited::InitializeForInput(ttasksptr const& ttask)
{
UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
"GlobalLexicalModel works only with sentence input.");
"GlobalLexicalModel works only with sentence input.");
Sentence const* s = reinterpret_cast<Sentence const*>(ttask->GetSource().get());
m_local.reset(new ThreadLocalStorage);
m_local->input = s;

View File

@ -303,12 +303,11 @@ ReadInput()
boost::lock_guard<boost::mutex> lock(m_lock);
#endif
boost::shared_ptr<InputType> source = GetBufferedInput();
if (source)
{
source->SetTranslationId(m_currentLine++);
if (m_look_ahead || m_look_back)
this->set_context_for(*source);
}
if (source) {
source->SetTranslationId(m_currentLine++);
if (m_look_ahead || m_look_back)
this->set_context_for(*source);
}
m_past_input.push_back(source);
return source;
}

View File

@ -63,8 +63,8 @@ StaticData::StaticData()
: m_sourceStartPosMattersForRecombination(false)
, m_requireSortingAfterSourceContext(false)
, m_inputType(SentenceInput)
// , m_onlyDistinctNBest(false)
// , m_needAlignmentInfo(false)
// , m_onlyDistinctNBest(false)
// , m_needAlignmentInfo(false)
, m_lmEnableOOVFeature(false)
, m_isAlwaysCreateDirectTranslationOption(false)
, m_currentWeightSetting("default")
@ -621,10 +621,9 @@ bool StaticData::LoadData(Parameter *parameter)
#ifdef HAVE_PROTOBUF
|| m_outputSearchGraphPB
#endif
|| m_latticeSamplesFilePath.size())
{
m_nbest_options.enabled = true;
}
|| m_latticeSamplesFilePath.size()) {
m_nbest_options.enabled = true;
}
// S2T decoder
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",

View File

@ -1,18 +1,18 @@
#include "BookkeepingOptions.h"
namespace Moses {
bool
BookkeepingOptions::
init(Parameter const& P)
{
bool& x = need_alignment_info;
P.SetParameter(x, "print-alignment-info", false);
if (!x) P.SetParameter(x, "print-alignment-info-in-n-best", false);
if (!x)
{
PARAM_VEC const* params = P.GetParam("alignment-output-file");
x = params && params->size();
}
return true;
namespace Moses
{
bool
BookkeepingOptions::
init(Parameter const& P)
{
bool& x = need_alignment_info;
P.SetParameter(x, "print-alignment-info", false);
if (!x) P.SetParameter(x, "print-alignment-info-in-n-best", false);
if (!x) {
PARAM_VEC const* params = P.GetParam("alignment-output-file");
x = params && params->size();
}
return true;
}
}

View File

@ -2,13 +2,13 @@
#include "moses/Parameter.h"
// #include <string>
namespace Moses {
namespace Moses
{
struct BookkeepingOptions
{
bool need_alignment_info;
bool init(Parameter const& param);
};
struct BookkeepingOptions {
bool need_alignment_info;
bool init(Parameter const& param);
};

View File

@ -2,7 +2,8 @@
#include "moses/Parameter.h"
#include "NBestOptions.h"
namespace Moses {
namespace Moses
{
bool
NBestOptions::
@ -10,21 +11,16 @@ init(Parameter const& P)
{
const PARAM_VEC *params;
params = P.GetParam("n-best-list");
if (params)
{
if (params->size() >= 2)
{
output_file_path = params->at(0);
nbest_size = Scan<size_t>( params->at(1) );
only_distinct = (params->size()>2 && params->at(2)=="distinct");
}
else
{
std::cerr << "wrong format for switch -n-best-list file size [disinct]";
return false;
}
if (params) {
if (params->size() >= 2) {
output_file_path = params->at(0);
nbest_size = Scan<size_t>( params->at(1) );
only_distinct = (params->size()>2 && params->at(2)=="distinct");
} else {
std::cerr << "wrong format for switch -n-best-list file size [disinct]";
return false;
}
else nbest_size = 0;
} else nbest_size = 0;
P.SetParameter<size_t>(factor, "n-best-factor", 20);
P.SetParameter(include_alignment_info, "print-alignment-info-in-n-best", false );

View File

@ -1,27 +1,27 @@
// -*- mode: c++; cc-style: gnu -*-
#include <string>
namespace Moses {
namespace Moses
{
struct NBestOptions
{
size_t nbest_size;
size_t factor;
bool enabled;
bool print_trees;
bool only_distinct;
struct NBestOptions {
size_t nbest_size;
size_t factor;
bool enabled;
bool print_trees;
bool only_distinct;
bool include_alignment_info;
bool include_segmentation;
bool include_feature_labels;
bool include_passthrough;
bool include_alignment_info;
bool include_segmentation;
bool include_feature_labels;
bool include_passthrough;
bool include_all_factors;
bool include_all_factors;
std::string output_file_path;
std::string output_file_path;
bool init(Parameter const& param);
bool init(Parameter const& param);
};
};
}

View File

@ -22,7 +22,8 @@
#include <map>
#include <string>
namespace MosesTraining {
namespace MosesTraining
{
struct SyntaxNode {
typedef std::map<std::string, std::string> AttributeMap;

View File

@ -42,7 +42,7 @@ void SyntaxNodeCollection::Clear()
}
SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
const std::string &label)
const std::string &label)
{
SyntaxNode* newNode = new SyntaxNode(label, startPos, endPos);
m_nodes.push_back( newNode );
@ -57,7 +57,7 @@ bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
}
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes(
int startPos, int endPos ) const
int startPos, int endPos ) const
{
NodeIndex::const_iterator startIndex = m_index.find( startPos );
if (startIndex == m_index.end() )

View File

@ -51,7 +51,9 @@ public:
const std::vector< SyntaxNode* >& GetNodes( int startPos, int endPos ) const;
//! Get a vector of pointers to all SyntaxNodes (unordered).
const std::vector< SyntaxNode* >& GetAllNodes() { return m_nodes; };
const std::vector< SyntaxNode* >& GetAllNodes() {
return m_nodes;
};
size_t GetNumWords() const {
return m_numWords;

View File

@ -73,17 +73,17 @@ int main(int argc, char* argv[])
if (argc < 4) {
std::cerr <<
"syntax: "
"consolidate phrase-table.direct "
"phrase-table.indirect "
"phrase-table.consolidated "
"[--Hierarchical] [--OnlyDirect] [--PhraseCount] "
"[--GoodTuring counts-of-counts-file] "
"[--KneserNey counts-of-counts-file] [--LowCountFeature] "
"[--SourceLabels source-labels-file] "
"[--PartsOfSpeech parts-of-speech-file] "
"[--MinScore id:threshold[,id:threshold]*]"
<< std::endl;
"syntax: "
"consolidate phrase-table.direct "
"phrase-table.indirect "
"phrase-table.consolidated "
"[--Hierarchical] [--OnlyDirect] [--PhraseCount] "
"[--GoodTuring counts-of-counts-file] "
"[--KneserNey counts-of-counts-file] [--LowCountFeature] "
"[--SourceLabels source-labels-file] "
"[--PartsOfSpeech parts-of-speech-file] "
"[--MinScore id:threshold[,id:threshold]*]"
<< std::endl;
exit(1);
}
const std::string fileNameDirect = argv[1];

View File

@ -219,7 +219,7 @@ Node *AlignmentGraph::CopyParseTree(const SyntaxTree *root)
if (nodeType == TREE) {
float score = 0.0f;
SyntaxNode::AttributeMap::const_iterator p =
root->value().attributes.find("pcfg");
root->value().attributes.find("pcfg");
if (p != root->value().attributes.end()) {
score = std::atof(p->second.c_str());
}

View File

@ -232,7 +232,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
// Initialize phrase orientation scoring object
PhraseOrientation phraseOrientation(sourceTokens.size(),
targetXmlTreeParser.words().size(), alignment);
targetXmlTreeParser.words().size(), alignment);
// Write the rules, subject to scope pruning.
const std::vector<Node *> &targetNodes = graph.GetTargetNodes();
@ -413,21 +413,21 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
<< "\nThe parse tree is assumed to contain part-of-speech preterminal nodes.\n"
<< "\n"
<< "For the composed rule constraints: rule depth is the "
"maximum distance from the\nrule's root node to a sink "
"node, not counting preterminal expansions or word\n"
"alignments. Rule size is the measure defined in DeNeefe "
"et al (2007): the\nnumber of non-part-of-speech, non-leaf "
"constituent labels in the target tree.\nNode count is the "
"number of target tree nodes (excluding target words).\n"
"maximum distance from the\nrule's root node to a sink "
"node, not counting preterminal expansions or word\n"
"alignments. Rule size is the measure defined in DeNeefe "
"et al (2007): the\nnumber of non-part-of-speech, non-leaf "
"constituent labels in the target tree.\nNode count is the "
"number of target tree nodes (excluding target words).\n"
<< "\n"
<< "Scope pruning (Hopkins and Langmead, 2010) is applied to both minimal and\ncomposed rules.\n"
<< "\n"
<< "Unaligned source words are attached to the tree using the "
"following heuristic:\nif there are aligned source words to "
"both the left and the right of an unaligned\nsource word "
"then it is attached to the lowest common ancestor of its "
"nearest\nsuch left and right neighbours. Otherwise, it is "
"attached to the root of the\nparse tree.\n"
"following heuristic:\nif there are aligned source words to "
"both the left and the right of an unaligned\nsource word "
"then it is attached to the lowest common ancestor of its "
"nearest\nsuch left and right neighbours. Otherwise, it is "
"attached to the root of the\nparse tree.\n"
<< "\n"
<< "Unless the --AllowUnary option is given, unary rules containing no lexical\nsource items are eliminated using the method described in Chung et al. (2011).\nThe parsing algorithm used in Moses is unable to handle such rules.\n"
<< "\n"

View File

@ -87,13 +87,13 @@ class ExtractTask
{
public:
ExtractTask(
size_t id, SentenceAlignment &sentence,
PhraseExtractionOptions &initoptions,
Moses::OutputFileStream &extractFile,
Moses::OutputFileStream &extractFileInv,
Moses::OutputFileStream &extractFileOrientation,
Moses::OutputFileStream &extractFileContext,
Moses::OutputFileStream &extractFileContextInv):
size_t id, SentenceAlignment &sentence,
PhraseExtractionOptions &initoptions,
Moses::OutputFileStream &extractFile,
Moses::OutputFileStream &extractFileInv,
Moses::OutputFileStream &extractFileOrientation,
Moses::OutputFileStream &extractFileContext,
Moses::OutputFileStream &extractFileContextInv):
m_sentence(sentence),
m_options(initoptions),
m_extractFile(extractFile),

View File

@ -137,7 +137,7 @@ void FilterRuleTable::ReadTestSet(
continue;
}
sentences.push_back(
boost::shared_ptr<SyntaxTree>(parser.Parse(line).release()));
boost::shared_ptr<SyntaxTree>(parser.Parse(line).release()));
}
}

View File

@ -131,14 +131,14 @@ int main(int argc, char* argv[])
ScoreFeatureManager featureManager;
if (argc < 4) {
std::cerr <<
"syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] "
"[--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] "
"[--NoWordAlignment] [--UnalignedPenalty] "
"[--UnalignedFunctionWordPenalty function-word-file] "
"[--MinCountHierarchical count] [--PartsOfSpeech] [--PCFG] "
"[--TreeFragments] [--SourceLabels] [--SourceLabelCountsLHS] "
"[--TargetPreferenceLabels] [--UnpairedExtractFormat] "
"[--ConditionOnTargetLHS] [--CrossedNonTerm]" << std::endl;
"syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] "
"[--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] "
"[--NoWordAlignment] [--UnalignedPenalty] "
"[--UnalignedFunctionWordPenalty function-word-file] "
"[--MinCountHierarchical count] [--PartsOfSpeech] [--PCFG] "
"[--TreeFragments] [--SourceLabels] [--SourceLabelCountsLHS] "
"[--TargetPreferenceLabels] [--UnpairedExtractFormat] "
"[--ConditionOnTargetLHS] [--CrossedNonTerm]" << std::endl;
std::cerr << featureManager.usage() << std::endl;
exit(1);
}