mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-08-16 06:50:32 +03:00
Merge ../mosesdecoder into perf_moses2
This commit is contained in:
commit
ffaf24419d
@ -1,21 +1,25 @@
|
|||||||
# the tmp directory
|
# the tmp directory
|
||||||
MCC_TEMPDIR="/tmp/"
|
MCC_TEMPDIR="/disk4/cruise-control/cruise-control/tmp/"
|
||||||
|
|
||||||
# where logs are saved
|
# where logs are saved
|
||||||
MCC_LOGDIR="/home/cruise/"
|
MCC_LOGDIR="/disk4/cruise-control/cruise-control/"
|
||||||
|
|
||||||
# where moses gets checked out, tested etc.
|
|
||||||
MCC_WORKDIR="/home/cruise/work/"
|
|
||||||
|
|
||||||
# repository that will be checked out
|
# repository that will be checked out
|
||||||
MCC_GITREPO="https://github.com/moses-smt/mosesdecoder.git"
|
MCC_GITREPO="https://github.com/moses-smt/mosesdecoder.git"
|
||||||
|
|
||||||
# arguments passed to Moses configure
|
# arguments passed to Moses configure
|
||||||
MCC_CONFIGURE_ARGS=" --with-srilm=/path/to/srilm --with-irstlm=/path/to/irstlm "
|
MCC_CONFIGURE_ARGS=" --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
|
||||||
|
|
||||||
|
ALTERNATIVE_CONFIGURE_ARGS=(
|
||||||
|
" --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
|
||||||
|
" --with-srilm=/disk4/cruise-control/srilm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
|
||||||
|
" --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
|
||||||
|
" --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
|
||||||
|
" --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --no-xmlrpc-c -j8 "
|
||||||
|
)
|
||||||
|
|
||||||
# list of branches to be checked
|
# list of branches to be checked
|
||||||
MCC_SCAN_BRANCHES="master"
|
MCC_SCAN_BRANCHES="origin/master"
|
||||||
|
|
||||||
# regression test data archive (optional, this is the default)
|
|
||||||
MCC_REGTEST_ARCHIVE="http://www.statmt.org/moses/reg-testing/moses-reg-test-data-8.tgz"
|
|
||||||
|
|
||||||
|
# run full training/eval pipeline using EMS?
|
||||||
|
MCC_RUN_EMS="yes"
|
||||||
|
@ -116,8 +116,16 @@ function run_single_test () {
|
|||||||
#regtest_dir=$PWD/$(basename $regtest_file .tgz)
|
#regtest_dir=$PWD/$(basename $regtest_file .tgz)
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
|
# test build with different configurations
|
||||||
|
echo "## test build with different configurations" >> $longlog
|
||||||
|
for configArgs in "${ALTERNATIVE_CONFIGURE_ARGS[@]}"
|
||||||
|
do
|
||||||
|
echo "building with args: $configArgs" >> $longlog
|
||||||
|
./bjam clean -a $configArgs >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
|
||||||
|
done
|
||||||
|
|
||||||
echo "## ./bjam clean" >> $longlog
|
echo "## ./bjam clean" >> $longlog
|
||||||
./bjam clean $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
|
./bjam clean -a $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
|
||||||
|
|
||||||
echo "## ./bjam $MCC_CONFIGURE_ARGS" >> $longlog
|
echo "## ./bjam $MCC_CONFIGURE_ARGS" >> $longlog
|
||||||
if [ -z "$err" ]; then
|
if [ -z "$err" ]; then
|
||||||
@ -169,7 +177,7 @@ function run_single_test () {
|
|||||||
echo "## Status: $status" >> $longlog
|
echo "## Status: $status" >> $longlog
|
||||||
|
|
||||||
nicedate=$(date +"%Y%m%d-%H%M%S")
|
nicedate=$(date +"%Y%m%d-%H%M%S")
|
||||||
echo "$commit $status $configname $ccversion $nicedate" \
|
echo "$commit$status$configname$ccversion$nicedate" \
|
||||||
>> "$LOGDIR/brief.log"
|
>> "$LOGDIR/brief.log"
|
||||||
|
|
||||||
if [ -z "$err" ]; then
|
if [ -z "$err" ]; then
|
||||||
|
@ -101,7 +101,7 @@ OutputSurface(std::ostream &out, Phrase const& phrase) const
|
|||||||
std::vector<FactorType> const& factor_order = options()->output.factor_order;
|
std::vector<FactorType> const& factor_order = options()->output.factor_order;
|
||||||
|
|
||||||
bool markUnknown = options()->unk.mark;
|
bool markUnknown = options()->unk.mark;
|
||||||
std::string const& fd = options()->output.FactorDelimiter;
|
std::string const& fd = options()->output.factor_delimiter;
|
||||||
|
|
||||||
size_t size = phrase.GetSize();
|
size_t size = phrase.GetSize();
|
||||||
for (size_t pos = 0 ; pos < size ; pos++) {
|
for (size_t pos = 0 ; pos < size ; pos++) {
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
#include "moses/OutputCollector.h"
|
#include "moses/OutputCollector.h"
|
||||||
#include "moses/ChartKBestExtractor.h"
|
#include "moses/ChartKBestExtractor.h"
|
||||||
#include "moses/HypergraphOutput.h"
|
#include "moses/HypergraphOutput.h"
|
||||||
|
#include "moses/TranslationTask.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -50,7 +51,7 @@ ChartManager::ChartManager(ttasksptr const& ttask)
|
|||||||
, m_start(clock())
|
, m_start(clock())
|
||||||
, m_hypothesisId(0)
|
, m_hypothesisId(0)
|
||||||
, m_parser(ttask, m_hypoStackColl)
|
, m_parser(ttask, m_hypoStackColl)
|
||||||
, m_translationOptionList(StaticData::Instance().GetRuleLimit(), m_source)
|
, m_translationOptionList(ttask->options()->syntax.rule_limit, m_source)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
ChartManager::~ChartManager()
|
ChartManager::~ChartManager()
|
||||||
@ -297,8 +298,8 @@ OutputSearchGraphAsHypergraph(std::ostream& out) const
|
|||||||
|
|
||||||
void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
|
void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
|
||||||
{
|
{
|
||||||
ChartSearchGraphWriterMoses writer(options(), &outputSearchGraphStream,
|
ChartSearchGraphWriterMoses writer(options(), &outputSearchGraphStream,
|
||||||
m_source.GetTranslationId());
|
m_source.GetTranslationId());
|
||||||
WriteSearchGraph(writer);
|
WriteSearchGraph(writer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -237,7 +237,10 @@ void ChartParser::CreateInputPaths(const InputType &input)
|
|||||||
m_inputPathMatrix.resize(size);
|
m_inputPathMatrix.resize(size);
|
||||||
|
|
||||||
UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType,
|
UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType,
|
||||||
"Input must be a sentence or a tree, not lattice or confusion networks");
|
"Input must be a sentence or a tree, " <<
|
||||||
|
"not lattice or confusion networks");
|
||||||
|
|
||||||
|
TranslationTask const* ttask = m_ttask.lock().get();
|
||||||
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
|
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
|
||||||
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
|
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
|
||||||
size_t endPos = startPos + phaseSize -1;
|
size_t endPos = startPos + phaseSize -1;
|
||||||
@ -249,11 +252,11 @@ void ChartParser::CreateInputPaths(const InputType &input)
|
|||||||
|
|
||||||
InputPath *node;
|
InputPath *node;
|
||||||
if (range.GetNumWordsCovered() == 1) {
|
if (range.GetNumWordsCovered() == 1) {
|
||||||
node = new InputPath(m_ttask, subphrase, labels, range, NULL, NULL);
|
node = new InputPath(ttask, subphrase, labels, range, NULL, NULL);
|
||||||
vec.push_back(node);
|
vec.push_back(node);
|
||||||
} else {
|
} else {
|
||||||
const InputPath &prevNode = GetInputPath(startPos, endPos - 1);
|
const InputPath &prevNode = GetInputPath(startPos, endPos - 1);
|
||||||
node = new InputPath(m_ttask, subphrase, labels, range, &prevNode, NULL);
|
node = new InputPath(ttask, subphrase, labels, range, &prevNode, NULL);
|
||||||
vec.push_back(node);
|
vec.push_back(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,8 +158,8 @@ void ChartTranslationOptionList::ApplyThreshold(float const threshold)
|
|||||||
scoreThreshold += threshold; // StaticData::Instance().GetTranslationOptionThreshold();
|
scoreThreshold += threshold; // StaticData::Instance().GetTranslationOptionThreshold();
|
||||||
|
|
||||||
CollType::iterator bound = std::partition(m_collection.begin(),
|
CollType::iterator bound = std::partition(m_collection.begin(),
|
||||||
m_collection.begin()+m_size,
|
m_collection.begin()+m_size,
|
||||||
ScoreThresholdPred(scoreThreshold));
|
ScoreThresholdPred(scoreThreshold));
|
||||||
|
|
||||||
m_size = std::distance(m_collection.begin(), bound);
|
m_size = std::distance(m_collection.begin(), bound);
|
||||||
}
|
}
|
||||||
|
@ -68,7 +68,7 @@ void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input,
|
|||||||
{
|
{
|
||||||
SetInputPath(&inputPath);
|
SetInputPath(&inputPath);
|
||||||
// if (StaticData::Instance().GetPlaceholderFactor() != NOT_FOUND) {
|
// if (StaticData::Instance().GetPlaceholderFactor() != NOT_FOUND) {
|
||||||
if (inputPath.ttask.lock()->options()->input.placeholder_factor != NOT_FOUND) {
|
if (inputPath.ttask->options()->input.placeholder_factor != NOT_FOUND) {
|
||||||
CreateSourceRuleFromInputPath();
|
CreateSourceRuleFromInputPath();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -283,7 +283,7 @@ CreateTranslationOptionCollection(ttasksptr const& ttask) const
|
|||||||
// float translationOptionThreshold
|
// float translationOptionThreshold
|
||||||
// = ttask->options()->search.trans_opt_threshold;
|
// = ttask->options()->search.trans_opt_threshold;
|
||||||
TranslationOptionCollection *rv
|
TranslationOptionCollection *rv
|
||||||
= new TranslationOptionCollectionConfusionNet(ttask, *this);
|
= new TranslationOptionCollectionConfusionNet(ttask, *this);
|
||||||
//, maxNoTransOptPerCoverage, translationOptionThreshold);
|
//, maxNoTransOptPerCoverage, translationOptionThreshold);
|
||||||
assert(rv);
|
assert(rv);
|
||||||
return rv;
|
return rv;
|
||||||
|
@ -36,7 +36,7 @@ protected:
|
|||||||
#endif
|
#endif
|
||||||
SPTR<std::map<std::string,float> const> m_context_weights;
|
SPTR<std::map<std::string,float> const> m_context_weights;
|
||||||
public:
|
public:
|
||||||
|
typedef boost::shared_ptr<ContextScope> ptr;
|
||||||
template<typename T>
|
template<typename T>
|
||||||
boost::shared_ptr<void> const&
|
boost::shared_ptr<void> const&
|
||||||
set(void const* const key, boost::shared_ptr<T> const& val) {
|
set(void const* const key, boost::shared_ptr<T> const& val) {
|
||||||
|
@ -144,7 +144,7 @@ public:
|
|||||||
return m_avg_input_length;
|
return m_avg_input_length;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Load(AllOptions::ptr const& opts);
|
void Load(AllOptions::ptr const& opts);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static std::vector<BleuScoreFeature*> s_staticColl;
|
static std::vector<BleuScoreFeature*> s_staticColl;
|
||||||
|
@ -56,7 +56,7 @@ void ConstrainedDecoding::Load(AllOptions::ptr const& opts)
|
|||||||
for(size_t i = 0; i < m_paths.size(); ++i) {
|
for(size_t i = 0; i < m_paths.size(); ++i) {
|
||||||
InputFileStream constraintFile(m_paths[i]);
|
InputFileStream constraintFile(m_paths[i]);
|
||||||
std::string line;
|
std::string line;
|
||||||
long sentenceID = opts->output.start_translation_id - 1 ;
|
long sentenceID = opts->output.start_translation_id - 1 ;
|
||||||
while (getline(constraintFile, line)) {
|
while (getline(constraintFile, line)) {
|
||||||
vector<string> vecStr = Tokenize(line, "\t");
|
vector<string> vecStr = Tokenize(line, "\t");
|
||||||
|
|
||||||
|
@ -49,16 +49,16 @@ const FFState* DistortionScoreProducer::EmptyHypothesisState(const InputType &in
|
|||||||
NOT_FOUND);
|
NOT_FOUND);
|
||||||
}
|
}
|
||||||
|
|
||||||
float
|
float
|
||||||
DistortionScoreProducer::
|
DistortionScoreProducer::
|
||||||
CalculateDistortionScore(const Hypothesis& hypo,
|
CalculateDistortionScore(const Hypothesis& hypo,
|
||||||
const Range &prev, const Range &curr, const int FirstGap)
|
const Range &prev, const Range &curr, const int FirstGap)
|
||||||
{
|
{
|
||||||
// if(!StaticData::Instance().UseEarlyDistortionCost()) {
|
// if(!StaticData::Instance().UseEarlyDistortionCost()) {
|
||||||
if(!hypo.GetManager().options()->reordering.use_early_distortion_cost) {
|
if(!hypo.GetManager().options()->reordering.use_early_distortion_cost) {
|
||||||
return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr);
|
return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr);
|
||||||
} // else {
|
} // else {
|
||||||
|
|
||||||
/* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
|
/* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
|
||||||
Definitions:
|
Definitions:
|
||||||
S : current source range
|
S : current source range
|
||||||
|
@ -55,7 +55,9 @@ void GlobalLexicalModel::Load(AllOptions::ptr const& opts)
|
|||||||
{
|
{
|
||||||
m_options = opts;
|
m_options = opts;
|
||||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
const std::string& oFactorDelimiter = opts->output.factor_delimiter;
|
||||||
|
const std::string& iFactorDelimiter = opts->input.factor_delimiter;
|
||||||
|
|
||||||
|
|
||||||
VERBOSE(2, "Loading global lexical model from file " << m_filePath << endl);
|
VERBOSE(2, "Loading global lexical model from file " << m_filePath << endl);
|
||||||
|
|
||||||
@ -76,21 +78,23 @@ void GlobalLexicalModel::Load(AllOptions::ptr const& opts)
|
|||||||
|
|
||||||
// create the output word
|
// create the output word
|
||||||
Word *outWord = new Word();
|
Word *outWord = new Word();
|
||||||
vector<string> factorString = Tokenize( token[0], factorDelimiter );
|
vector<string> factorString = Tokenize( token[0], oFactorDelimiter );
|
||||||
for (size_t i=0 ; i < m_outputFactorsVec.size() ; i++) {
|
for (size_t i=0 ; i < m_outputFactorsVec.size() ; i++) {
|
||||||
const FactorDirection& direction = Output;
|
const FactorDirection& direction = Output;
|
||||||
const FactorType& factorType = m_outputFactorsVec[i];
|
const FactorType& factorType = m_outputFactorsVec[i];
|
||||||
const Factor* factor = factorCollection.AddFactor( direction, factorType, factorString[i] );
|
const Factor* factor
|
||||||
|
= factorCollection.AddFactor( direction, factorType, factorString[i] );
|
||||||
outWord->SetFactor( factorType, factor );
|
outWord->SetFactor( factorType, factor );
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the input word
|
// create the input word
|
||||||
Word *inWord = new Word();
|
Word *inWord = new Word();
|
||||||
factorString = Tokenize( token[1], factorDelimiter );
|
factorString = Tokenize( token[1], iFactorDelimiter );
|
||||||
for (size_t i=0 ; i < m_inputFactorsVec.size() ; i++) {
|
for (size_t i=0 ; i < m_inputFactorsVec.size() ; i++) {
|
||||||
const FactorDirection& direction = Input;
|
const FactorDirection& direction = Input;
|
||||||
const FactorType& factorType = m_inputFactorsVec[i];
|
const FactorType& factorType = m_inputFactorsVec[i];
|
||||||
const Factor* factor = factorCollection.AddFactor( direction, factorType, factorString[i] );
|
const Factor* factor
|
||||||
|
= factorCollection.AddFactor( direction, factorType, factorString[i] );
|
||||||
inWord->SetFactor( factorType, factor );
|
inWord->SetFactor( factorType, factor );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,8 +73,8 @@ public:
|
|||||||
class ChartSearchGraphWriterMoses : public virtual ChartSearchGraphWriter
|
class ChartSearchGraphWriterMoses : public virtual ChartSearchGraphWriter
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ChartSearchGraphWriterMoses(AllOptions::ptr const& opts,
|
ChartSearchGraphWriterMoses(AllOptions::ptr const& opts,
|
||||||
std::ostream* out, size_t lineNumber)
|
std::ostream* out, size_t lineNumber)
|
||||||
: ChartSearchGraphWriter(opts), m_out(out), m_lineNumber(lineNumber) {}
|
: ChartSearchGraphWriter(opts), m_out(out), m_lineNumber(lineNumber) {}
|
||||||
virtual void WriteHeader(size_t, size_t) const {
|
virtual void WriteHeader(size_t, size_t) const {
|
||||||
/* do nothing */
|
/* do nothing */
|
||||||
@ -91,7 +91,7 @@ private:
|
|||||||
class ChartSearchGraphWriterHypergraph : public virtual ChartSearchGraphWriter
|
class ChartSearchGraphWriterHypergraph : public virtual ChartSearchGraphWriter
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ChartSearchGraphWriterHypergraph(AllOptions::ptr const& opts, std::ostream* out)
|
ChartSearchGraphWriterHypergraph(AllOptions::ptr const& opts, std::ostream* out)
|
||||||
: ChartSearchGraphWriter(opts), m_out(out), m_nodeId(0) { }
|
: ChartSearchGraphWriter(opts), m_out(out), m_nodeId(0) { }
|
||||||
virtual void WriteHeader(size_t winners, size_t losers) const;
|
virtual void WriteHeader(size_t winners, size_t losers) const;
|
||||||
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
|
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
|
||||||
|
@ -11,7 +11,7 @@ using namespace std;
|
|||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
InputPath::
|
InputPath::
|
||||||
InputPath(ttaskwptr const theTask,
|
InputPath(TranslationTask const* theTask,
|
||||||
Phrase const& phrase,
|
Phrase const& phrase,
|
||||||
NonTerminalSet const& sourceNonTerms,
|
NonTerminalSet const& sourceNonTerms,
|
||||||
Range const& range, InputPath const *prevNode,
|
Range const& range, InputPath const *prevNode,
|
||||||
|
@ -40,7 +40,8 @@ public:
|
|||||||
TargetPhrases;
|
TargetPhrases;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ttaskwptr const ttask;
|
// ttaskwptr const ttask;
|
||||||
|
TranslationTask const* ttask;
|
||||||
protected:
|
protected:
|
||||||
const InputPath *m_prevPath;
|
const InputPath *m_prevPath;
|
||||||
Phrase m_phrase;
|
Phrase m_phrase;
|
||||||
@ -65,7 +66,7 @@ public:
|
|||||||
, m_nextNode(NOT_FOUND) {
|
, m_nextNode(NOT_FOUND) {
|
||||||
}
|
}
|
||||||
|
|
||||||
InputPath(ttaskwptr const ttask,
|
InputPath(TranslationTask const* ttask, // ttaskwptr const ttask,
|
||||||
Phrase const& phrase,
|
Phrase const& phrase,
|
||||||
NonTerminalSet const& sourceNonTerms,
|
NonTerminalSet const& sourceNonTerms,
|
||||||
Range const& range,
|
Range const& range,
|
||||||
|
@ -123,7 +123,6 @@ public:
|
|||||||
Input,
|
Input,
|
||||||
outputFactorOrder,
|
outputFactorOrder,
|
||||||
"the",
|
"the",
|
||||||
// StaticData::Instance().GetFactorDelimiter(),
|
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||||
@ -150,7 +149,6 @@ public:
|
|||||||
Input,
|
Input,
|
||||||
outputFactorOrder,
|
outputFactorOrder,
|
||||||
"the licenses",
|
"the licenses",
|
||||||
// StaticData::Instance().GetFactorDelimiter(),
|
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
BOOST_CHECK( phrase.GetSize() == 2 );
|
BOOST_CHECK( phrase.GetSize() == 2 );
|
||||||
@ -179,7 +177,6 @@ public:
|
|||||||
Input,
|
Input,
|
||||||
outputFactorOrder,
|
outputFactorOrder,
|
||||||
"the licenses for",
|
"the licenses for",
|
||||||
// StaticData::Instance().GetFactorDelimiter(),
|
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
BOOST_CHECK( phrase.GetSize() == 3 );
|
BOOST_CHECK( phrase.GetSize() == 3 );
|
||||||
@ -206,7 +203,6 @@ public:
|
|||||||
Input,
|
Input,
|
||||||
outputFactorOrder,
|
outputFactorOrder,
|
||||||
"the licenses for most",
|
"the licenses for most",
|
||||||
// StaticData::Instance().GetFactorDelimiter(),
|
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
BOOST_CHECK( phrase.GetSize() == 4 );
|
BOOST_CHECK( phrase.GetSize() == 4 );
|
||||||
@ -252,7 +248,6 @@ public:
|
|||||||
Input,
|
Input,
|
||||||
outputFactorOrder,
|
outputFactorOrder,
|
||||||
"the",
|
"the",
|
||||||
// StaticData::Instance().GetFactorDelimiter(),
|
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||||
@ -280,7 +275,6 @@ public:
|
|||||||
Input,
|
Input,
|
||||||
outputFactorOrder,
|
outputFactorOrder,
|
||||||
"licenses",
|
"licenses",
|
||||||
// StaticData::Instance().GetFactorDelimiter(),
|
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||||
@ -308,7 +302,6 @@ public:
|
|||||||
Input,
|
Input,
|
||||||
outputFactorOrder,
|
outputFactorOrder,
|
||||||
"for",
|
"for",
|
||||||
// StaticData::Instance().GetFactorDelimiter(),
|
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||||
@ -336,7 +329,6 @@ public:
|
|||||||
Input,
|
Input,
|
||||||
outputFactorOrder,
|
outputFactorOrder,
|
||||||
"most",
|
"most",
|
||||||
// StaticData::Instance().GetFactorDelimiter(),
|
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||||
|
@ -1699,7 +1699,7 @@ OutputSurface(std::ostream &out, Hypothesis const& edge, bool const recursive) c
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool markUnknown = options()->unk.mark;
|
bool markUnknown = options()->unk.mark;
|
||||||
std::string const& fd = options()->output.FactorDelimiter;
|
std::string const& fd = options()->output.factor_delimiter;
|
||||||
|
|
||||||
TargetPhrase const& phrase = edge.GetCurrTargetPhrase();
|
TargetPhrase const& phrase = edge.GetCurrTargetPhrase();
|
||||||
size_t size = phrase.GetSize();
|
size_t size = phrase.GetSize();
|
||||||
|
@ -116,6 +116,7 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
|||||||
|
|
||||||
|
|
||||||
// convert into TargetPhrases
|
// convert into TargetPhrases
|
||||||
|
std::string fd = m_obj->options()->output.factor_delimiter;
|
||||||
for(size_t i=0; i<cands.size(); ++i) {
|
for(size_t i=0; i<cands.size(); ++i) {
|
||||||
TargetPhrase targetPhrase(m_obj);
|
TargetPhrase targetPhrase(m_obj);
|
||||||
|
|
||||||
@ -134,7 +135,8 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
|||||||
targetPhrase.GetScoreBreakdown().Assign(m_obj, *cands[i].fnames[j], cands[i].fvalues[j]);
|
targetPhrase.GetScoreBreakdown().Assign(m_obj, *cands[i].fnames[j], cands[i].fvalues[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
CreateTargetPhrase(targetPhrase,factorStrings,scoreVector, Scores(0), &wacands[i], &src);
|
CreateTargetPhrase(targetPhrase,factorStrings, fd, scoreVector, Scores(0),
|
||||||
|
&wacands[i], &src);
|
||||||
|
|
||||||
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
||||||
tCands.push_back(targetPhrase);
|
tCands.push_back(targetPhrase);
|
||||||
@ -375,6 +377,7 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
|
|||||||
TargetPhrase targetPhrase(m_obj);
|
TargetPhrase targetPhrase(m_obj);
|
||||||
CreateTargetPhrase(targetPhrase
|
CreateTargetPhrase(targetPhrase
|
||||||
, j ->first
|
, j ->first
|
||||||
|
, m_obj->options()->output.factor_delimiter
|
||||||
, scores.transScore
|
, scores.transScore
|
||||||
, scores.inputScores
|
, scores.inputScores
|
||||||
, NULL
|
, NULL
|
||||||
@ -403,6 +406,7 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
|
|||||||
|
|
||||||
void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
|
void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
|
||||||
StringTgtCand::Tokens const& factorStrings,
|
StringTgtCand::Tokens const& factorStrings,
|
||||||
|
std::string const& factorDelimiter,
|
||||||
Scores const& transVector,
|
Scores const& transVector,
|
||||||
Scores const& inputVector,
|
Scores const& inputVector,
|
||||||
const std::string *alignmentString,
|
const std::string *alignmentString,
|
||||||
@ -411,7 +415,8 @@ void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
|
|||||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||||
|
|
||||||
for(size_t k=0; k<factorStrings.size(); ++k) {
|
for(size_t k=0; k<factorStrings.size(); ++k) {
|
||||||
util::TokenIter<util::MultiCharacter, false> word(*factorStrings[k], StaticData::Instance().GetFactorDelimiter());
|
util::TokenIter<util::MultiCharacter, false>
|
||||||
|
word(*factorStrings[k], factorDelimiter);
|
||||||
Word& w=targetPhrase.AddWord();
|
Word& w=targetPhrase.AddWord();
|
||||||
for(size_t l=0; l<m_output.size(); ++l, ++word) {
|
for(size_t l=0; l<m_output.size(); ++l, ++word) {
|
||||||
w[m_output[l]]= factorCollection.AddFactor(*word);
|
w[m_output[l]]= factorCollection.AddFactor(*word);
|
||||||
|
@ -116,6 +116,7 @@ public:
|
|||||||
|
|
||||||
void CreateTargetPhrase(TargetPhrase& targetPhrase,
|
void CreateTargetPhrase(TargetPhrase& targetPhrase,
|
||||||
StringTgtCand::Tokens const& factorStrings,
|
StringTgtCand::Tokens const& factorStrings,
|
||||||
|
std::string const& factorDelimiter,
|
||||||
Scores const& transVector,
|
Scores const& transVector,
|
||||||
Scores const& inputVector,
|
Scores const& inputVector,
|
||||||
const std::string *alignmentString,
|
const std::string *alignmentString,
|
||||||
|
@ -55,18 +55,6 @@ protected:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// /// return shared pointer to ttask
|
|
||||||
// // only TargetPhrases have non-NULL ttaskptrs!
|
|
||||||
// virtual ttasksptr GetTtask() const {
|
|
||||||
// return ttasksptr();
|
|
||||||
// }
|
|
||||||
|
|
||||||
// /// check if this phrase belongs to a valid ttask
|
|
||||||
// // only TargetPhrases have non-NULL ttaskptrs!
|
|
||||||
// virtual bool HasTtaskSPtr() const {
|
|
||||||
// return false;
|
|
||||||
// }
|
|
||||||
|
|
||||||
virtual bool HasScope() const {
|
virtual bool HasScope() const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -257,7 +257,7 @@ Sentence::
|
|||||||
CreateTranslationOptionCollection(ttasksptr const& ttask) const
|
CreateTranslationOptionCollection(ttasksptr const& ttask) const
|
||||||
{
|
{
|
||||||
TranslationOptionCollection *rv
|
TranslationOptionCollection *rv
|
||||||
= new TranslationOptionCollectionText(ttask, *this);
|
= new TranslationOptionCollectionText(ttask, *this);
|
||||||
assert(rv);
|
assert(rv);
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
@ -53,7 +53,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#ifdef HAVE_CMPH
|
#ifdef HAVE_CMPH
|
||||||
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
|
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
|
||||||
#endif
|
#endif
|
||||||
#if !defined WIN32 || defined __MINGW32__ || defined HAVE_CMPH
|
#if defined HAVE_CMPH
|
||||||
#include "moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
|
#include "moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -127,18 +127,10 @@ StaticData
|
|||||||
::ini_output_options()
|
::ini_output_options()
|
||||||
{
|
{
|
||||||
const PARAM_VEC *params;
|
const PARAM_VEC *params;
|
||||||
|
|
||||||
// verbose level
|
// verbose level
|
||||||
m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
|
m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
m_parameter->SetParameter<string>(m_outputUnknownsFile,
|
m_parameter->SetParameter<string>(m_outputUnknownsFile,
|
||||||
"output-unknowns", "");
|
"output-unknowns", "");
|
||||||
|
|
||||||
// m_parameter->SetParameter<long>(m_startTranslationId,
|
|
||||||
// "start-translation-id", 0);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -206,16 +198,10 @@ bool StaticData::LoadData(Parameter *parameter)
|
|||||||
// threading etc.
|
// threading etc.
|
||||||
if (!ini_performance_options()) return false;
|
if (!ini_performance_options()) return false;
|
||||||
|
|
||||||
// Compact phrase table and reordering model
|
|
||||||
// m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false );
|
|
||||||
// m_parameter->SetParameter(m_minlexrMemory, "minlexr-memory", false );
|
|
||||||
|
|
||||||
// S2T decoder
|
|
||||||
|
|
||||||
// FEATURE FUNCTION INITIALIZATION HAPPENS HERE ===============================
|
// FEATURE FUNCTION INITIALIZATION HAPPENS HERE ===============================
|
||||||
|
|
||||||
// set class-specific default parameters
|
// set class-specific default parameters
|
||||||
#if !defined WIN32 || defined __MINGW32__ || defined HAVE_CMPH
|
#if defined HAVE_CMPH
|
||||||
LexicalReorderingTableCompact::SetStaticDefaultParameters(*parameter);
|
LexicalReorderingTableCompact::SetStaticDefaultParameters(*parameter);
|
||||||
PhraseDictionaryCompact::SetStaticDefaultParameters(*parameter);
|
PhraseDictionaryCompact::SetStaticDefaultParameters(*parameter);
|
||||||
#endif
|
#endif
|
||||||
@ -322,8 +308,6 @@ void StaticData::LoadChartDecodingParameters()
|
|||||||
// source label overlap
|
// source label overlap
|
||||||
m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap",
|
m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap",
|
||||||
SourceLabelOverlapAdd);
|
SourceLabelOverlapAdd);
|
||||||
m_parameter->SetParameter(m_ruleLimit, "rule-limit",
|
|
||||||
DEFAULT_MAX_TRANS_OPT_SIZE);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -438,7 +422,7 @@ LoadDecodeGraphsOld(const vector<string> &mappingVector,
|
|||||||
UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
|
UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
|
||||||
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
|
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
|
||||||
DecodeGraph *decodeGraph;
|
DecodeGraph *decodeGraph;
|
||||||
if (is_syntax(m_options->search.algo)) {
|
if (is_syntax(m_options->search.algo)) {
|
||||||
size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
|
size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
|
||||||
VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
|
VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
|
||||||
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
|
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
|
||||||
@ -506,7 +490,7 @@ void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVect
|
|||||||
UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
|
UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
|
||||||
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
|
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
|
||||||
DecodeGraph *decodeGraph;
|
DecodeGraph *decodeGraph;
|
||||||
if (is_syntax(m_options->search.algo)) {
|
if (is_syntax(m_options->search.algo)) {
|
||||||
size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
|
size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
|
||||||
VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
|
VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
|
||||||
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
|
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
|
||||||
@ -619,7 +603,6 @@ void StaticData::LoadFeatureFunctions()
|
|||||||
m_requireSortingAfterSourceContext = true;
|
m_requireSortingAfterSourceContext = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (PhraseDictionary *ffCast = dynamic_cast<PhraseDictionary*>(ff)) {
|
|
||||||
if (dynamic_cast<PhraseDictionary*>(ff)) {
|
if (dynamic_cast<PhraseDictionary*>(ff)) {
|
||||||
doLoad = false;
|
doLoad = false;
|
||||||
}
|
}
|
||||||
|
@ -104,12 +104,6 @@ protected:
|
|||||||
|
|
||||||
std::string m_outputUnknownsFile; //! output unknowns in this file
|
std::string m_outputUnknownsFile; //! output unknowns in this file
|
||||||
|
|
||||||
size_t m_ruleLimit;
|
|
||||||
|
|
||||||
// Whether to load compact phrase table and reordering table into memory
|
|
||||||
bool m_minphrMemory;
|
|
||||||
bool m_minlexrMemory;
|
|
||||||
|
|
||||||
// Initial = 0 = can be used when creating poss trans
|
// Initial = 0 = can be used when creating poss trans
|
||||||
// Other = 1 = used to calculate LM score once all steps have been processed
|
// Other = 1 = used to calculate LM score once all steps have been processed
|
||||||
Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
|
Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
|
||||||
@ -146,7 +140,6 @@ protected:
|
|||||||
|
|
||||||
void NoCache();
|
void NoCache();
|
||||||
|
|
||||||
bool m_continuePartialTranslation;
|
|
||||||
std::string m_binPath;
|
std::string m_binPath;
|
||||||
|
|
||||||
// soft NT lookup for chart models
|
// soft NT lookup for chart models
|
||||||
@ -252,17 +245,10 @@ public:
|
|||||||
return m_unknownLHS;
|
return m_unknownLHS;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t GetRuleLimit() const {
|
|
||||||
return m_ruleLimit;
|
|
||||||
}
|
|
||||||
float GetRuleCountThreshold() const {
|
float GetRuleCountThreshold() const {
|
||||||
return 999999; /* TODO wtf! */
|
return 999999; /* TODO wtf! */
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ContinuePartialTranslation() const {
|
|
||||||
return m_continuePartialTranslation;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ReLoadBleuScoreFeatureParameter(float weight);
|
void ReLoadBleuScoreFeatureParameter(float weight);
|
||||||
|
|
||||||
Parameter* GetParameter() {
|
Parameter* GetParameter() {
|
||||||
@ -273,10 +259,6 @@ public:
|
|||||||
return m_threadCount;
|
return m_threadCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
// long GetStartTranslationId() const {
|
|
||||||
// return m_startTranslationId;
|
|
||||||
// }
|
|
||||||
|
|
||||||
void SetExecPath(const std::string &path);
|
void SetExecPath(const std::string &path);
|
||||||
const std::string &GetBinDirectory() const;
|
const std::string &GetBinDirectory() const;
|
||||||
|
|
||||||
|
@ -60,9 +60,9 @@ void Manager<RuleMatcher>::Decode()
|
|||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
|
|
||||||
// Get various pruning-related constants.
|
// Get various pruning-related constants.
|
||||||
const std::size_t popLimit = staticData.options()->cube.pop_limit;
|
const std::size_t popLimit = options()->cube.pop_limit;
|
||||||
const std::size_t ruleLimit = staticData.GetRuleLimit();
|
const std::size_t ruleLimit = options()->syntax.rule_limit;
|
||||||
const std::size_t stackLimit = staticData.options()->search.stack_size;
|
const std::size_t stackLimit = options()->search.stack_size;
|
||||||
|
|
||||||
// Initialize the stacks.
|
// Initialize the stacks.
|
||||||
InitializeStacks();
|
InitializeStacks();
|
||||||
|
@ -163,9 +163,9 @@ void Manager<Parser>::Decode()
|
|||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
|
|
||||||
// Get various pruning-related constants.
|
// Get various pruning-related constants.
|
||||||
const std::size_t popLimit = staticData.options()->cube.pop_limit;
|
const std::size_t popLimit = options()->cube.pop_limit;
|
||||||
const std::size_t ruleLimit = staticData.GetRuleLimit();
|
const std::size_t ruleLimit = options()->syntax.rule_limit;
|
||||||
const std::size_t stackLimit = staticData.options()->search.stack_size;
|
const std::size_t stackLimit = options()->search.stack_size;
|
||||||
|
|
||||||
// Initialise the PChart and SChart.
|
// Initialise the PChart and SChart.
|
||||||
InitializeCharts();
|
InitializeCharts();
|
||||||
|
@ -94,11 +94,11 @@ void Manager<RuleMatcher>::InitializeStacks()
|
|||||||
template<typename RuleMatcher>
|
template<typename RuleMatcher>
|
||||||
void Manager<RuleMatcher>::Decode()
|
void Manager<RuleMatcher>::Decode()
|
||||||
{
|
{
|
||||||
const StaticData &staticData = StaticData::Instance();
|
// const StaticData &staticData = StaticData::Instance();
|
||||||
|
|
||||||
// Get various pruning-related constants.
|
// Get various pruning-related constants.
|
||||||
const std::size_t popLimit = this->options()->cube.pop_limit;
|
const std::size_t popLimit = this->options()->cube.pop_limit;
|
||||||
const std::size_t ruleLimit = staticData.GetRuleLimit();
|
const std::size_t ruleLimit = this->options()->syntax.rule_limit;
|
||||||
const std::size_t stackLimit = this->options()->search.stack_size;
|
const std::size_t stackLimit = this->options()->search.stack_size;
|
||||||
|
|
||||||
// Initialize the stacks.
|
// Initialize the stacks.
|
||||||
@ -215,8 +215,8 @@ void Manager<RuleMatcher>::ExtractKBest(
|
|||||||
// than k. The k-best factor determines how much bigger the limit should be,
|
// than k. The k-best factor determines how much bigger the limit should be,
|
||||||
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
||||||
// too many translations are identical.
|
// too many translations are identical.
|
||||||
const StaticData &staticData = StaticData::Instance();
|
// const StaticData &staticData = StaticData::Instance();
|
||||||
const std::size_t nBestFactor = staticData.options()->nbest.factor;
|
const std::size_t nBestFactor = this->options()->nbest.factor;
|
||||||
std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
|
std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor;
|
||||||
|
|
||||||
// Extract the derivations.
|
// Extract the derivations.
|
||||||
|
@ -44,9 +44,6 @@ bool RuleTrieLoader::Load(Moses::AllOptions const& opts,
|
|||||||
{
|
{
|
||||||
PrintUserTime(std::string("Start loading text phrase table. Moses format"));
|
PrintUserTime(std::string("Start loading text phrase table. Moses format"));
|
||||||
|
|
||||||
// const StaticData &staticData = StaticData::Instance();
|
|
||||||
// const std::string &factorDelimiter = staticData.GetFactorDelimiter();
|
|
||||||
|
|
||||||
std::size_t count = 0;
|
std::size_t count = 0;
|
||||||
|
|
||||||
std::ostream *progress = NULL;
|
std::ostream *progress = NULL;
|
||||||
|
@ -53,7 +53,6 @@ TargetPhrase::TargetPhrase( std::string out_string, const PhraseDictionary *pt)
|
|||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
// XXX should this really be InputFactorOrder???
|
// XXX should this really be InputFactorOrder???
|
||||||
CreateFromString(Output, staticData.options()->input.factor_order, out_string,
|
CreateFromString(Output, staticData.options()->input.factor_order, out_string,
|
||||||
// staticData.GetFactorDelimiter(), // eliminated [UG]
|
|
||||||
NULL);
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,8 +45,8 @@ ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory(
|
|||||||
{
|
{
|
||||||
|
|
||||||
size_t sourceSize = parser.GetSize();
|
size_t sourceSize = parser.GetSize();
|
||||||
|
size_t ruleLimit = parser.options()->syntax.rule_limit;
|
||||||
m_completedRules.resize(sourceSize);
|
m_completedRules.resize(sourceSize, CompletedRuleCollection(ruleLimit));
|
||||||
|
|
||||||
m_isSoftMatching = !m_softMatchingMap.empty();
|
m_isSoftMatching = !m_softMatchingMap.empty();
|
||||||
}
|
}
|
||||||
|
@ -45,8 +45,8 @@ ChartRuleLookupManagerMemoryPerSentence::ChartRuleLookupManagerMemoryPerSentence
|
|||||||
{
|
{
|
||||||
|
|
||||||
size_t sourceSize = parser.GetSize();
|
size_t sourceSize = parser.GetSize();
|
||||||
|
size_t ruleLimit = parser.options()->syntax.rule_limit;
|
||||||
m_completedRules.resize(sourceSize);
|
m_completedRules.resize(sourceSize, CompletedRuleCollection(ruleLimit));
|
||||||
|
|
||||||
m_isSoftMatching = !m_softMatchingMap.empty();
|
m_isSoftMatching = !m_softMatchingMap.empty();
|
||||||
}
|
}
|
||||||
|
@ -27,7 +27,8 @@ using namespace std;
|
|||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
|
|
||||||
CompletedRuleCollection::CompletedRuleCollection() : m_ruleLimit(StaticData::Instance().GetRuleLimit())
|
CompletedRuleCollection::CompletedRuleCollection(size_t rule_limit)
|
||||||
|
: m_ruleLimit(rule_limit)
|
||||||
{
|
{
|
||||||
m_scoreThreshold = numeric_limits<float>::infinity();
|
m_scoreThreshold = numeric_limits<float>::infinity();
|
||||||
}
|
}
|
||||||
|
@ -74,7 +74,7 @@ public:
|
|||||||
struct CompletedRuleCollection {
|
struct CompletedRuleCollection {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
CompletedRuleCollection();
|
CompletedRuleCollection(size_t rule_limit);
|
||||||
~CompletedRuleCollection();
|
~CompletedRuleCollection();
|
||||||
|
|
||||||
CompletedRuleCollection(const CompletedRuleCollection &old)
|
CompletedRuleCollection(const CompletedRuleCollection &old)
|
||||||
|
@ -84,9 +84,9 @@ public:
|
|||||||
void
|
void
|
||||||
Load(std::string filePath);
|
Load(std::string filePath);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
SetStaticDefaultParameters(Parameter const& param);
|
SetStaticDefaultParameters(Parameter const& param);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -70,8 +70,8 @@ void PhraseDictionaryCompact::Load(AllOptions::ptr const& opts)
|
|||||||
if (!FileExists(tFilePath))
|
if (!FileExists(tFilePath))
|
||||||
throw runtime_error("Error: File " + tFilePath + " does not exist.");
|
throw runtime_error("Error: File " + tFilePath + " does not exist.");
|
||||||
|
|
||||||
m_phraseDecoder
|
m_phraseDecoder
|
||||||
= new PhraseDecoder(*this, &m_input, &m_output, m_numScoreComponents);
|
= new PhraseDecoder(*this, &m_input, &m_output, m_numScoreComponents);
|
||||||
|
|
||||||
std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
|
std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
|
||||||
|
|
||||||
@ -155,7 +155,7 @@ PhraseDictionaryCompact::
|
|||||||
delete m_phraseDecoder;
|
delete m_phraseDecoder;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
PhraseDictionaryCompact::
|
PhraseDictionaryCompact::
|
||||||
CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
|
CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
|
||||||
{
|
{
|
||||||
@ -164,12 +164,12 @@ CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
|
|||||||
m_sentenceCache->push_back(tpc);
|
m_sentenceCache->push_back(tpc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
PhraseDictionaryCompact::
|
PhraseDictionaryCompact::
|
||||||
AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
|
AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
void
|
void
|
||||||
PhraseDictionaryCompact::
|
PhraseDictionaryCompact::
|
||||||
CleanUpAfterSentenceProcessing(const InputType &source)
|
CleanUpAfterSentenceProcessing(const InputType &source)
|
||||||
{
|
{
|
||||||
|
@ -35,16 +35,6 @@ namespace Moses
|
|||||||
{
|
{
|
||||||
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
|
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
|
||||||
|
|
||||||
// CacheColl::~CacheColl()
|
|
||||||
// {
|
|
||||||
// // not needed any more since the switch to shared pointers
|
|
||||||
// // for (iterator iter = begin(); iter != end(); ++iter) {
|
|
||||||
// // std::pair<TargetPhraseCollection::shared_ptr , clock_t> &key = iter->second;
|
|
||||||
// // TargetPhraseCollection::shared_ptr tps = key.first;
|
|
||||||
// // delete tps;
|
|
||||||
// // }
|
|
||||||
// }
|
|
||||||
|
|
||||||
PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow)
|
PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow)
|
||||||
: DecodeFeature(line, registerNow)
|
: DecodeFeature(line, registerNow)
|
||||||
, m_tableLimit(20) // default
|
, m_tableLimit(20) // default
|
||||||
@ -82,8 +72,7 @@ GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
|||||||
ret.reset(new TargetPhraseCollection(*ret));
|
ret.reset(new TargetPhraseCollection(*ret));
|
||||||
}
|
}
|
||||||
cache[hash] = entry(ret, clock());
|
cache[hash] = entry(ret, clock());
|
||||||
} else {
|
} else { // in cache. just use it
|
||||||
// in cache. just use it
|
|
||||||
iter->second.second = clock();
|
iter->second.second = clock();
|
||||||
ret = iter->second.first;
|
ret = iter->second.first;
|
||||||
}
|
}
|
||||||
@ -175,31 +164,6 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// persistent cache handling
|
|
||||||
// saving presistent cache to disk
|
|
||||||
//void PhraseDictionary::SaveCache() const
|
|
||||||
//{
|
|
||||||
// CacheColl &cache = GetCache();
|
|
||||||
// for( std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter,
|
|
||||||
// iter != cache.end(),
|
|
||||||
// iter++ ) {
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
|
|
||||||
// loading persistent cache from disk
|
|
||||||
//void PhraseDictionary::LoadCache() const
|
|
||||||
//{
|
|
||||||
// CacheColl &cache = GetCache();
|
|
||||||
// std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter;
|
|
||||||
// iter = cache.begin();
|
|
||||||
// while( iter != cache.end() ) {
|
|
||||||
// std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iterRemove = iter++;
|
|
||||||
// delete iterRemove->second.first;
|
|
||||||
// cache.erase(iterRemove);
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
|
|
||||||
// reduce presistent cache by half of maximum size
|
// reduce presistent cache by half of maximum size
|
||||||
void PhraseDictionary::ReduceCache() const
|
void PhraseDictionary::ReduceCache() const
|
||||||
{
|
{
|
||||||
@ -233,7 +197,9 @@ void PhraseDictionary::ReduceCache() const
|
|||||||
<< reduceCacheTime << " seconds." << std::endl);
|
<< reduceCacheTime << " seconds." << std::endl);
|
||||||
}
|
}
|
||||||
|
|
||||||
CacheColl &PhraseDictionary::GetCache() const
|
CacheColl &
|
||||||
|
PhraseDictionary::
|
||||||
|
GetCache() const
|
||||||
{
|
{
|
||||||
CacheColl *cache;
|
CacheColl *cache;
|
||||||
cache = m_cache.get();
|
cache = m_cache.get();
|
||||||
|
@ -44,6 +44,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include "moses/TargetPhraseCollection.h"
|
#include "moses/TargetPhraseCollection.h"
|
||||||
#include "moses/InputPath.h"
|
#include "moses/InputPath.h"
|
||||||
#include "moses/FF/DecodeFeature.h"
|
#include "moses/FF/DecodeFeature.h"
|
||||||
|
#include "moses/ContextScope.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
@ -58,15 +59,6 @@ class ChartParser;
|
|||||||
// typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> TPCollLastUse;
|
// typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> TPCollLastUse;
|
||||||
typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> CacheCollEntry;
|
typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> CacheCollEntry;
|
||||||
typedef boost::unordered_map<size_t, CacheCollEntry> CacheColl;
|
typedef boost::unordered_map<size_t, CacheCollEntry> CacheColl;
|
||||||
// class CacheColl : public boost::unordered_map<size_t, TPCollLastUse>
|
|
||||||
// {
|
|
||||||
// // 1st = hash of source phrase/ address of phrase-table node
|
|
||||||
// // 2nd = all translations
|
|
||||||
// // 3rd = time of last access
|
|
||||||
|
|
||||||
// public:
|
|
||||||
// ~CacheColl();
|
|
||||||
// };
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract base class for phrase dictionaries (tables).
|
* Abstract base class for phrase dictionaries (tables).
|
||||||
|
@ -330,7 +330,6 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseStr
|
|||||||
{
|
{
|
||||||
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl);
|
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl);
|
||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
// const std::string& factorDelimiter = staticData.GetFactorDelimiter();
|
|
||||||
Phrase sourcePhrase(0);
|
Phrase sourcePhrase(0);
|
||||||
Phrase targetPhrase(0);
|
Phrase targetPhrase(0);
|
||||||
|
|
||||||
@ -428,7 +427,6 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector<std::string> ent
|
|||||||
{
|
{
|
||||||
VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
|
VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
|
||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
// const std::string& factorDelimiter = staticData.GetFactorDelimiter();
|
|
||||||
Phrase sourcePhrase(0);
|
Phrase sourcePhrase(0);
|
||||||
|
|
||||||
std::vector<std::string>::iterator it;
|
std::vector<std::string>::iterator it;
|
||||||
@ -517,7 +515,6 @@ void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, s
|
|||||||
{
|
{
|
||||||
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)" << std::endl);
|
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)" << std::endl);
|
||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
// const std::string& factorDelimiter = staticData.GetFactorDelimiter();
|
|
||||||
Phrase sourcePhrase(0);
|
Phrase sourcePhrase(0);
|
||||||
TargetPhrase targetPhrase(0);
|
TargetPhrase targetPhrase(0);
|
||||||
|
|
||||||
|
@ -144,7 +144,10 @@ TargetPhraseCollection::shared_ptr PhraseDictionaryMultiModelCounts::GetTargetPh
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void PhraseDictionaryMultiModelCounts::CollectSufficientStats(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats) const
|
void
|
||||||
|
PhraseDictionaryMultiModelCounts::
|
||||||
|
CollectSufficientStats(const Phrase& src, vector<float> &fs,
|
||||||
|
map<string,multiModelCountsStats*>* allStats) const
|
||||||
//fill fs and allStats with statistics from models
|
//fill fs and allStats with statistics from models
|
||||||
{
|
{
|
||||||
for(size_t i = 0; i < m_numModels; ++i) {
|
for(size_t i = 0; i < m_numModels; ++i) {
|
||||||
|
@ -77,7 +77,7 @@ void ProbingPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQue
|
|||||||
InputPath &inputPath = **iter;
|
InputPath &inputPath = **iter;
|
||||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||||
|
|
||||||
if (sourcePhrase.GetSize() > StaticData::Instance().options()->search.max_phrase_length) {
|
if (sourcePhrase.GetSize() > m_options->search.max_phrase_length) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,11 +20,11 @@ namespace Moses
|
|||||||
|
|
||||||
/** constructor; just initialize the base class */
|
/** constructor; just initialize the base class */
|
||||||
TranslationOptionCollectionConfusionNet::
|
TranslationOptionCollectionConfusionNet::
|
||||||
TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
|
TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
|
||||||
const ConfusionNet &input)
|
const ConfusionNet &input)
|
||||||
// , size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
// , size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
||||||
: TranslationOptionCollection(ttask,input)//
|
: TranslationOptionCollection(ttask,input)//
|
||||||
// , maxNoTransOptPerCoverage, translationOptionThreshold)
|
// , maxNoTransOptPerCoverage, translationOptionThreshold)
|
||||||
{
|
{
|
||||||
size_t maxNoTransOptPerCoverage = ttask->options()->search.max_trans_opt_per_cov;
|
size_t maxNoTransOptPerCoverage = ttask->options()->search.max_trans_opt_per_cov;
|
||||||
float translationOptionThreshold = ttask->options()->search.trans_opt_threshold;
|
float translationOptionThreshold = ttask->options()->search.trans_opt_threshold;
|
||||||
@ -63,7 +63,8 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
|
|||||||
const ScorePair &scores = col[i].second;
|
const ScorePair &scores = col[i].second;
|
||||||
ScorePair *inputScore = new ScorePair(scores);
|
ScorePair *inputScore = new ScorePair(scores);
|
||||||
|
|
||||||
InputPath *path = new InputPath(ttask, subphrase, labels, range, NULL, inputScore);
|
InputPath* path = new InputPath(ttask.get(), subphrase, labels,
|
||||||
|
range, NULL, inputScore);
|
||||||
list.push_back(path);
|
list.push_back(path);
|
||||||
|
|
||||||
m_inputPathQueue.push_back(path);
|
m_inputPathQueue.push_back(path);
|
||||||
@ -114,7 +115,8 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
|
|||||||
ScorePair *inputScore = new ScorePair(*prevInputScore);
|
ScorePair *inputScore = new ScorePair(*prevInputScore);
|
||||||
inputScore->PlusEquals(scores);
|
inputScore->PlusEquals(scores);
|
||||||
|
|
||||||
InputPath *path = new InputPath(ttask, subphrase, labels, range, &prevPath, inputScore);
|
InputPath *path = new InputPath(ttask.get(), subphrase, labels, range,
|
||||||
|
&prevPath, inputScore);
|
||||||
list.push_back(path);
|
list.push_back(path);
|
||||||
|
|
||||||
m_inputPathQueue.push_back(path);
|
m_inputPathQueue.push_back(path);
|
||||||
|
@ -36,7 +36,7 @@ protected:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
TranslationOptionCollectionConfusionNet
|
TranslationOptionCollectionConfusionNet
|
||||||
(ttasksptr const& ttask, const ConfusionNet &source);
|
(ttasksptr const& ttask, const ConfusionNet &source);
|
||||||
// , size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
|
// , size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
|
||||||
|
|
||||||
void ProcessUnknownWord(size_t sourcePos);
|
void ProcessUnknownWord(size_t sourcePos);
|
||||||
|
@ -22,9 +22,9 @@ namespace Moses
|
|||||||
TranslationOptionCollectionLattice
|
TranslationOptionCollectionLattice
|
||||||
::TranslationOptionCollectionLattice
|
::TranslationOptionCollectionLattice
|
||||||
( ttasksptr const& ttask, const WordLattice &input)
|
( ttasksptr const& ttask, const WordLattice &input)
|
||||||
// , size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
// , size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
||||||
: TranslationOptionCollection(ttask, input)//
|
: TranslationOptionCollection(ttask, input)//
|
||||||
// , maxNoTransOptPerCoverage, translationOptionThreshold)
|
// , maxNoTransOptPerCoverage, translationOptionThreshold)
|
||||||
{
|
{
|
||||||
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
|
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
|
||||||
"Not for models using the legqacy binary phrase table");
|
"Not for models using the legqacy binary phrase table");
|
||||||
@ -65,7 +65,7 @@ TranslationOptionCollectionLattice
|
|||||||
ScorePair *inputScore = new ScorePair(scores);
|
ScorePair *inputScore = new ScorePair(scores);
|
||||||
|
|
||||||
InputPath *path
|
InputPath *path
|
||||||
= new InputPath(ttask, subphrase, labels, range, NULL, inputScore);
|
= new InputPath(ttask.get(), subphrase, labels, range, NULL, inputScore);
|
||||||
|
|
||||||
path->SetNextNode(nextNode);
|
path->SetNextNode(nextNode);
|
||||||
m_inputPathQueue.push_back(path);
|
m_inputPathQueue.push_back(path);
|
||||||
|
@ -35,14 +35,14 @@ namespace Moses
|
|||||||
/** constructor; just initialize the base class */
|
/** constructor; just initialize the base class */
|
||||||
TranslationOptionCollectionText::
|
TranslationOptionCollectionText::
|
||||||
TranslationOptionCollectionText(ttasksptr const& ttask, Sentence const &input)
|
TranslationOptionCollectionText(ttasksptr const& ttask, Sentence const &input)
|
||||||
//, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
//, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
||||||
: TranslationOptionCollection(ttask,input)
|
: TranslationOptionCollection(ttask,input)
|
||||||
// , maxNoTransOptPerCoverage, translationOptionThreshold)
|
// , maxNoTransOptPerCoverage, translationOptionThreshold)
|
||||||
{
|
{
|
||||||
size_t maxNoTransOptPerCoverage
|
size_t maxNoTransOptPerCoverage
|
||||||
= ttask->options()->search.max_trans_opt_per_cov;
|
= ttask->options()->search.max_trans_opt_per_cov;
|
||||||
float translationOptionThreshold
|
float translationOptionThreshold
|
||||||
= ttask->options()->search.trans_opt_threshold;
|
= ttask->options()->search.trans_opt_threshold;
|
||||||
size_t size = input.GetSize();
|
size_t size = input.GetSize();
|
||||||
m_inputPathMatrix.resize(size);
|
m_inputPathMatrix.resize(size);
|
||||||
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
|
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
|
||||||
@ -56,11 +56,11 @@ TranslationOptionCollectionText(ttasksptr const& ttask, Sentence const &input)
|
|||||||
|
|
||||||
InputPath *path;
|
InputPath *path;
|
||||||
if (range.GetNumWordsCovered() == 1) {
|
if (range.GetNumWordsCovered() == 1) {
|
||||||
path = new InputPath(ttask, subphrase, labels, range, NULL, NULL);
|
path = new InputPath(ttask.get(), subphrase, labels, range, NULL, NULL);
|
||||||
vec.push_back(path);
|
vec.push_back(path);
|
||||||
} else {
|
} else {
|
||||||
const InputPath &prevPath = GetInputPath(startPos, endPos - 1);
|
const InputPath &prevPath = GetInputPath(startPos, endPos - 1);
|
||||||
path = new InputPath(ttask, subphrase, labels, range, &prevPath, NULL);
|
path = new InputPath(ttask.get(), subphrase, labels, range, &prevPath, NULL);
|
||||||
vec.push_back(path);
|
vec.push_back(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -213,7 +213,8 @@ TO_STRING_BODY(Word);
|
|||||||
ostream& operator<<(ostream& out, const Word& word)
|
ostream& operator<<(ostream& out, const Word& word)
|
||||||
{
|
{
|
||||||
util::StringStream strme;
|
util::StringStream strme;
|
||||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
const std::string& factorDelimiter
|
||||||
|
= StaticData::Instance().options()->output.factor_delimiter;
|
||||||
bool firstPass = true;
|
bool firstPass = true;
|
||||||
unsigned int stop = max_fax();
|
unsigned int stop = max_fax();
|
||||||
for (unsigned int currFactor = 0 ; currFactor < stop; currFactor++) {
|
for (unsigned int currFactor = 0 ; currFactor < stop; currFactor++) {
|
||||||
|
@ -17,6 +17,7 @@ namespace Moses {
|
|||||||
xml_brackets.first = "<";
|
xml_brackets.first = "<";
|
||||||
xml_brackets.second = ">";
|
xml_brackets.second = ">";
|
||||||
factor_order.assign(1,0);
|
factor_order.assign(1,0);
|
||||||
|
factor_delimiter = "|";
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@ -76,6 +77,7 @@ namespace Moses {
|
|||||||
if (factor_order.empty()) factor_order.assign(1,0);
|
if (factor_order.empty()) factor_order.assign(1,0);
|
||||||
param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND);
|
param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND);
|
||||||
|
|
||||||
|
param.SetParameter<std::string>(factor_delimiter, "factor-delimiter", "|");
|
||||||
param.SetParameter<std::string>(input_file_path,"input-file","");
|
param.SetParameter<std::string>(input_file_path,"input-file","");
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -14,10 +14,9 @@ namespace Moses
|
|||||||
InputTypeEnum input_type;
|
InputTypeEnum input_type;
|
||||||
XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
|
XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
|
||||||
std::vector<FactorType> factor_order; // input factor order
|
std::vector<FactorType> factor_order; // input factor order
|
||||||
|
std::string factor_delimiter;
|
||||||
FactorType placeholder_factor; // where to store original text for placeholders
|
FactorType placeholder_factor; // where to store original text for placeholders
|
||||||
std::string input_file_path;
|
std::string input_file_path;
|
||||||
|
|
||||||
std::pair<std::string,std::string> xml_brackets;
|
std::pair<std::string,std::string> xml_brackets;
|
||||||
// strings to use as XML tags' opening and closing brackets.
|
// strings to use as XML tags' opening and closing brackets.
|
||||||
// Default are "<" and ">"
|
// Default are "<" and ">"
|
||||||
|
@ -24,6 +24,7 @@ namespace Moses {
|
|||||||
, lattice_sample_size(0)
|
, lattice_sample_size(0)
|
||||||
{
|
{
|
||||||
factor_order.assign(1,0);
|
factor_order.assign(1,0);
|
||||||
|
factor_delimiter = "|";
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@ -94,14 +95,14 @@ namespace Moses {
|
|||||||
params= param.GetParam("output-factors");
|
params= param.GetParam("output-factors");
|
||||||
if (params) factor_order = Scan<FactorType>(*params);
|
if (params) factor_order = Scan<FactorType>(*params);
|
||||||
if (factor_order.empty()) factor_order.assign(1,0);
|
if (factor_order.empty()) factor_order.assign(1,0);
|
||||||
|
|
||||||
if (ReportAllFactors) {
|
if (ReportAllFactors) {
|
||||||
for (size_t i = 1; i < MAX_NUM_FACTORS; ++i)
|
for (size_t i = 1; i < MAX_NUM_FACTORS; ++i)
|
||||||
factor_order.push_back(i);
|
factor_order.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
param.SetParameter(FactorDelimiter, "factor-delimiter", std::string("|"));
|
param.SetParameter(factor_delimiter, "factor-delimiter", std::string("|"));
|
||||||
param.SetParameter(FactorDelimiter, "output-factor-delimiter", FactorDelimiter);
|
param.SetParameter(factor_delimiter, "output-factor-delimiter", factor_delimiter);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -134,12 +135,12 @@ namespace Moses {
|
|||||||
|
|
||||||
m = param.find("factor-delimiter");
|
m = param.find("factor-delimiter");
|
||||||
if (m != param.end()) {
|
if (m != param.end()) {
|
||||||
FactorDelimiter = Trim(xmlrpc_c::value_string(m->second));
|
factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
|
||||||
}
|
}
|
||||||
|
|
||||||
m = param.find("output-factor-delimiter");
|
m = param.find("output-factor-delimiter");
|
||||||
if (m != param.end()) {
|
if (m != param.end()) {
|
||||||
FactorDelimiter = Trim(xmlrpc_c::value_string(m->second));
|
factor_delimiter = Trim(xmlrpc_c::value_string(m->second));
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -13,6 +13,7 @@ namespace Moses
|
|||||||
long start_translation_id;
|
long start_translation_id;
|
||||||
|
|
||||||
std::vector<FactorType> factor_order;
|
std::vector<FactorType> factor_order;
|
||||||
|
std::string factor_delimiter;
|
||||||
|
|
||||||
bool ReportAllFactors; // m_reportAllFactors;
|
bool ReportAllFactors; // m_reportAllFactors;
|
||||||
int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
|
int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
|
||||||
@ -23,7 +24,6 @@ namespace Moses
|
|||||||
|
|
||||||
WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
|
WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
|
||||||
std::string AlignmentOutputFile;
|
std::string AlignmentOutputFile;
|
||||||
std::string FactorDelimiter;
|
|
||||||
|
|
||||||
bool WordGraph;
|
bool WordGraph;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user