Merge remote-tracking branch 'legacy/master'

Conflicts:
	moses/server/TranslationRequest.cpp
This commit is contained in:
Ulrich Germann 2015-11-24 19:22:37 +00:00
commit c8b859de67
51 changed files with 1143 additions and 384 deletions

View File

@ -3,5 +3,6 @@
# you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake
set -e -o pipefail
./bjam --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@
opt=$(pwd)/opt
./bjam --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@

View File

@ -2430,6 +2430,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.h</locationURI>
</link>
<link>
<name>parameters/OOVHandlingOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OOVHandlingOptions.cpp</locationURI>
</link>
<link>
<name>parameters/OOVHandlingOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OOVHandlingOptions.h</locationURI>
</link>
<link>
<name>parameters/OptionsBaseClass.cpp</name>
<type>1</type>

View File

@ -51,27 +51,8 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,

View File

@ -123,22 +123,6 @@ public:
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
int featureID,
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
bool Enabled() const {
return m_enabled;

View File

@ -42,24 +42,6 @@ public:
return true;
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -58,23 +58,6 @@ public:
return true;
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -30,11 +30,6 @@ bool CoveredReferenceState::operator==(const FFState& other) const
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
{}
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath

View File

@ -53,10 +53,6 @@ public:
return new CoveredReferenceState();
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
@ -64,10 +60,6 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -47,23 +47,6 @@ public:
throw std::logic_error("DistortionScoreProducer not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
};
}

View File

@ -64,28 +64,6 @@ public:
UTIL_THROW2("LexicalReordering is not valid for chart decoder");
}
void
EvaluateWithSourceContext
(const InputType &input,
const InputPath &inputPath,
const TargetPhrase &targetPhrase,
const StackVec *stackVec,
ScoreComponentCollection& scoreBreakdown,
ScoreComponentCollection* estimatedScores = NULL) const
{ }
void
EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &transOptList) const
{ }
void
EvaluateInIsolation(const Phrase &source,
const TargetPhrase &targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const
{ }
bool
GetHaveDefaultScores() {
return m_haveDefaultScores;

View File

@ -37,18 +37,6 @@ public:
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown

View File

@ -54,23 +54,6 @@ public:
throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void SetParameter(const std::string& key, const std::string& value);
private:

View File

@ -296,18 +296,6 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const
{};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -15,12 +15,19 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
ReadParameters();
}
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
{}
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
@ -29,8 +36,11 @@ void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
, ScoreComponentCollection *estimatedScores) const
{}
void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &translationOptionList) const
{}
FFState* SkeletonStatefulFF::EvaluateWhenApplied(

View File

@ -37,19 +37,35 @@ public:
return new SkeletonState(0);
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const;
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove this declaration here
// and the empty skeleton implementation from the corresponding .cpp
// file to reduce code clutter.
void
EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const;
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove this declaration here
// and the empty skeleton implementation from the corresponding .cpp
// file to reduce code clutter.
void
EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const;
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove this declaration here
// and the empty skeleton implementation from the corresponding .cpp
// file to reduce code clutter.
void
EvaluateTranslationOptionListWithSourceContext
( const InputType &input , const TranslationOptionList &translationOptionList) const;
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,

View File

@ -70,6 +70,23 @@ public:
return false;
}
virtual void
EvaluateInIsolation
(Phrase const& source, TargetPhrase const& targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const {}
virtual void
EvaluateWithSourceContext
(InputType const&input, InputPath const& inputPath, TargetPhrase const& targetPhrase,
StackVec const* stackVec, ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection *estimatedFutureScore = NULL) const {}
virtual void
EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &translationOptionList) const {}
};

View File

@ -48,22 +48,6 @@ public:
ScoreComponentCollection* ) const {
throw std::logic_error("TargetBigramFeature not valid in chart decoder");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void SetParameter(const std::string& key, const std::string& value);

View File

@ -215,24 +215,6 @@ public:
virtual FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId,
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void SetParameter(const std::string& key, const std::string& value);
private:

View File

@ -63,22 +63,6 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {};
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -268,8 +268,8 @@ public:
#ifdef HAVE_XMLRPC_C
// these are implemented in moses/server/Hypothesis_4server.cpp !
void OutputWordAlignment(std::vector<xmlrpc_c::value>& out, const ReportingOptions &options) const;
void OutputLocalWordAlignment(std::vector<xmlrpc_c::value>& dest, const ReportingOptions &options) const;
void OutputWordAlignment(std::vector<xmlrpc_c::value>& out) const;
void OutputLocalWordAlignment(std::vector<xmlrpc_c::value>& dest) const;
#endif
bool beats(Hypothesis const& b) const;

View File

@ -57,10 +57,11 @@ void LanguageModel::ReportHistoryOrder(std::ostream &out,const Phrase &phrase) c
// out << "ReportHistoryOrder not implemented";
}
void LanguageModel::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
void
LanguageModel::
EvaluateInIsolation(Phrase const& source, TargetPhrase const& targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const
{
// contains factors used by this LM
float fullScore, nGramScore;

View File

@ -90,18 +90,6 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
};
}

View File

@ -188,22 +188,6 @@ size_t BilingualLM::getState(const Hypothesis& cur_hypo) const
return hashCode;
}
void BilingualLM::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {}
void BilingualLM::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores) const
{
}
FFState* BilingualLM::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -119,23 +119,6 @@ public:
void Load();
void EvaluateInIsolation(
const Phrase &source,
const TargetPhrase &targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(
const InputType &input,
const InputPath &inputPath,
const TargetPhrase &targetPhrase,
const StackVec *stackVec,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection *estimatedScores = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {};
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -196,18 +196,7 @@ public:
}
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {};
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -55,9 +55,6 @@ void ThreadPool::Execute()
}
//Execute job
if (task) {
// must read from task before run. otherwise task may be deleted by main thread
// race condition
task->DeleteAfterExecution();
task->Run();
}
m_threadAvailable.notify_all();

View File

@ -53,9 +53,6 @@ class Task
{
public:
virtual void Run() = 0;
virtual bool DeleteAfterExecution() {
return true;
}
virtual ~Task() {}
};

View File

@ -53,16 +53,16 @@ int main(int argc, char* argv[])
interpret_args(argc, argv);
if (Q1.empty() && Q2.empty()) exit(0);
mmbitext B; string w;
B.open(bname, L1, L2);
boost::shared_ptr<mmbitext> B(new mmbitext); string w;
B->open(bname, L1, L2);
Bitext<Token>::iter m1(B.I1.get(), *B.V1, Q1);
Bitext<Token>::iter m1(B->I1.get(), *B->V1, Q1);
if (Q1.size() && m1.size() == 0) exit(0);
Bitext<Token>::iter m2(B.I2.get(), *B.V2, Q2);
Bitext<Token>::iter m2(B->I2.get(), *B->V2, Q2);
if (Q2.size() && m2.size() == 0) exit(0);
bitvector check(B.T1->size());
bitvector check(B->T1->size());
if (Q1.size() == 0 || Q2.size() == 0) check.set();
else (m2.markSentences(check));
@ -87,23 +87,23 @@ int main(int argc, char* argv[])
size_t s1,s2,e1,e2; int po_fwd=-1,po_bwd=-1;
std::vector<unsigned char> caln;
// cout << sid << " " << B.docname(sid) << std::endl;
if (!B.find_trg_phr_bounds(sid, off, off+m.size(),
// cout << sid << " " << B->docname(sid) << std::endl;
if (!B->find_trg_phr_bounds(sid, off, off+m.size(),
s1,s2,e1,e2,po_fwd,po_bwd,
&caln, NULL, &m == &m2))
{
// cout << "alignment failure" << std::endl;
}
std::cout << sid << " " << B.docname(sid)
std::cout << sid << " " << B->sid2docname(sid)
<< " dfwd=" << po_fwd << " dbwd=" << po_bwd
<< "\n";
write_sentence(*B.T1, sid, *B.V1, std::cout); std::cout << "\n";
write_sentence(*B.T2, sid, *B.V2, std::cout); std::cout << "\n";
B.write_yawat_alignment(sid,
m1.size() ? &m1 : NULL,
m2.size() ? &m2 : NULL, std::cout);
write_sentence(*B->T1, sid, *B->V1, std::cout); std::cout << "\n";
write_sentence(*B->T2, sid, *B->V2, std::cout); std::cout << "\n";
B->write_yawat_alignment(sid,
m1.size() ? &m1 : NULL,
m2.size() ? &m2 : NULL, std::cout);
std::cout << std::endl;
}

View File

@ -41,8 +41,8 @@ basename(string const path, string const suffix)
int main(int argc, char* argv[])
{
bitext_t B;
B.open(argv[1],argv[2],argv[3]);
boost::shared_ptr<bitext_t> B(new bitext_t);
B->open(argv[1],argv[2],argv[3]);
string line;
string ifile = argv[4];
string docname = basename(ifile, string(".") + argv[2] + ".gz");
@ -52,10 +52,10 @@ int main(int argc, char* argv[])
{
cout << line << " [" << docname << "]" << endl;
vector<id_type> snt;
B.V1->fillIdSeq(line,snt);
B->V1->fillIdSeq(line,snt);
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B.I1.get());
bitext_t::iter m(B->I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
{
if (m.ca() > 500) continue;
@ -65,9 +65,10 @@ int main(int argc, char* argv[])
while (I.next != stop)
{
m.root->readEntry(I.next,I);
++cnt[B.docname(I.sid)];
++cnt[B->sid2docname(I.sid)];
}
cout << setw(8) << int(m.ca()) << " " << B.V1->toString(&snt[i],&snt[k+1]) << endl;
cout << setw(8) << int(m.ca()) << " "
<< B->V1->toString(&snt[i],&snt[k+1]) << endl;
typedef pair<string,uint32_t> entry;
vector<entry> ranked; ranked.reserve(cnt.size());
BOOST_FOREACH(entry const& e, cnt) ranked.push_back(e);

View File

@ -37,7 +37,7 @@ basename(string const path, string const suffix)
size_t k = path.size() - suffix.size();
cout << path << " " << suffix << endl;
cout << path.substr(0,p) << " " << path.substr(k) << endl;
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
return path.substr(p+1, suffix == &path[k] ? k-p-1 : path.size() - p);
}
int main(int argc, char* argv[])
@ -47,6 +47,7 @@ int main(int argc, char* argv[])
string line;
string ifile = argv[4];
string docname = basename(ifile, string(".") + argv[2] + ".gz");
id_type docid = B->docname2docid(docname);
boost::iostreams::filtering_istream in;
ugdiss::open_input_stream(ifile,in);
while(getline(in,line))
@ -57,13 +58,70 @@ int main(int argc, char* argv[])
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B->I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k);
for (size_t num_occurrences = m.ca(); m.size(); m.up())
{
if (size_t(m.ca()) == num_occurrences) continue;
num_occurrences = m.ca();
SPTR<SamplingBias const> zilch;
BitextSampler<Token> s(B.get(), m, zilch, 1000, 1000,
sapt::random_sampling);
s();
cout << m.size() << " " << s.stats()->trg.size() << endl;
if (s.stats()->trg.size() == 0) continue;
// if (s.stats()->indoc[docname] > 10) continue;
sapt::pstats::indoc_map_t::const_iterator d
= s.stats()->indoc.find(docid);
size_t indoccnt = d != s.stats()->indoc.end() ? d->second : 0;
cout << m.size() << " : " << m.str(B->V1.get()) << " ("
<< s.stats()->trg.size() << " entries; "
<< indoccnt << "/" << s.stats()->good
<< " samples in domain)" << endl;
vector<PhrasePair<Token> > ppairs;
PhrasePair<Token>::SortDescendingByJointCount sorter;
expand(m,*B,*s.stats(),ppairs,NULL);
sort(ppairs.begin(),ppairs.end(),sorter);
boost::format fmt("%4d/%d/%d |%s| (%4.2f : %4.2f)");
BOOST_FOREACH(PhrasePair<Token>& ppair, ppairs)
{
if (ppair.joint * 100 < ppair.good1) break;
ppair.good2 = ppair.raw2 * float(ppair.good1)/ppair.raw1;
ppair.good2 = max(ppair.good2, ppair.joint);
#if 0
cout << "\t"
<< (fmt % ppair.joint % ppair.good1 % ppair.good2
% B->T2->pid2str(B->V2.get(),ppair.p2)
% (float(ppair.joint)/ppair.good1)
% (float(ppair.joint)/ppair.good2)
) << "\n";
typedef std::map<uint32_t, uint32_t>::const_iterator iter;
for (iter d = ppair.indoc.begin(); d != ppair.indoc.end(); ++d)
{
// if (d != ppair.indoc.begin()) cout << "; ";
cout << (boost::format("\t\t%4d %s") % d->second
% B->docid2name(d->first))
<< endl;
}
cout << endl;
#else
cout << "\t"
<< (fmt % ppair.joint % ppair.good1 % ppair.good2
% B->T2->pid2str(B->V2.get(),ppair.p2)
% (float(ppair.joint)/ppair.good1)
% (float(ppair.joint)/ppair.good2)
) << " [";
typedef std::map<uint32_t, uint32_t>::const_iterator iter;
for (iter d = ppair.indoc.begin(); d != ppair.indoc.end(); ++d)
{
if (d != ppair.indoc.begin()) cout << "; ";
cout << (boost::format("%s: %d") % B->docid2name(d->first)
% d->second) ;
}
cout << "]" << endl;
#endif
}
}
}
}

View File

@ -217,17 +217,42 @@ namespace sapt
write_yawat_alignment
( id_type const sid, iter const* m1, iter const* m2, std::ostream& out ) const;
std::string docname(id_type const sid) const;
std::string sid2docname(id_type const sid) const;
std::string docid2name(id_type const sid) const;
int docname2docid(std::string const& name) const;
std::vector<id_type> const* sid2did() const;
int sid2did(uint32_t sid) const;
};
#include "ug_bitext_agenda.h"
template<typename Token>
int
Bitext<Token>::
docname2docid(std::string const& name) const
{
std::map<std::string,id_type>::const_iterator m;
m = m_docname2docid.find(name);
if (m != m_docname2docid.end()) return m->second;
return -1;
}
template<typename Token>
std::string
Bitext<Token>::
docname(id_type const sid) const
docid2name(id_type const did) const
{
if (did < m_docname.size())
return m_docname[did];
else
return (boost::format("%d") % did).str();
}
template<typename Token>
std::string
Bitext<Token>::
sid2docname(id_type const sid) const
{
if (sid < m_sid2docid->size() && (*m_sid2docid)[sid] < m_docname.size())
return m_docname[(*m_sid2docid)[sid]];
@ -243,6 +268,17 @@ namespace sapt
return m_sid2docid.get();
}
template<typename Token>
int
Bitext<Token>::
sid2did(uint32_t sid) const
{
if (m_sid2docid)
return m_sid2docid->at(sid);
return -1;
}
template<typename Token>
SPTR<SentenceBias>
Bitext<Token>::

View File

@ -52,7 +52,7 @@ BitextSampler : public Moses::reference_counter
// const members
// SPTR<bitext const> const m_bitext; // keep bitext alive while I am
// should be an
iptr<bitext const> const m_bitext; // keep bitext alive as long as I am
SPTR<bitext const> const m_bitext; // keep bitext alive as long as I am
size_t const m_plen; // length of lookup phrase
bool const m_fwd; // forward or backward direction?
SPTR<tsa const> const m_root; // root of suffix array
@ -275,7 +275,7 @@ consider_sample(TokenPosition const& p)
bitvector full_aln(100*100);
PhraseExtractionRecord
rec(p.sid, p.offset, p.offset + m_plen, !m_fwd, &aln, &full_aln);
int docid = m_bias ? m_bias->GetClass(p.sid) : -1;
int docid = m_bias ? m_bias->GetClass(p.sid) : m_bitext->sid2did(p.sid);
if (!m_bitext->find_trg_phr_bounds(rec))
{ // no good, probably because phrase is not coherent
m_stats->count_sample(docid, 0, rec.po_fwd, rec.po_bwd);

View File

@ -16,7 +16,7 @@ struct StatsCollector
typedef lru_cache::LRU_Cache< uint64_t, pstats > hcache_t;
typedef ThreadSafeContainer<uint64_t, SPTR<pstats> > pcache_t;
typedef map<uint64_t, SPTR<pstats> > lcache_t;
iptr<Bitext<Token> const> bitext; // underlying bitext
SPTR<Bitext<Token> const> bitext; // underlying bitext
sampling_method method; // sampling method
size_t sample_size; // sample size
SPTR<SamplingBias const> bias; // sampling bias
@ -26,7 +26,7 @@ struct StatsCollector
SPTR<lcache_t> lcache; // local cache
ug::ThreadPool* tpool; // thread pool to run jobs on
StatsCollector(iptr<Bitext<Token> > xbitext,
StatsCollector(SPTR<Bitext<Token> > xbitext,
SPTR<SamplingBias> const xbias)
: method(ranked_sampling)
, sample_size(100)

View File

@ -71,7 +71,7 @@ namespace Moses
typedef sapt::PhraseScorer<Token> pscorer;
private:
// vector<SPTR<bitext> > shards;
iptr<mmbitext> btfix;
SPTR<mmbitext> btfix;
SPTR<imbitext> btdyn;
std::string m_bname, m_extra_data, m_bias_file,m_bias_server;
std::string L1;
@ -160,7 +160,7 @@ namespace Moses
#if PROVIDES_RANKED_SAMPLING
void
set_bias_for_ranking(ttasksptr const& ttask, iptr<sapt::Bitext<Token> const> bt);
set_bias_for_ranking(ttasksptr const& ttask, SPTR<sapt::Bitext<Token> const> bt);
#endif
private:

View File

@ -65,6 +65,7 @@ namespace Moses
// set m_nbest_options.enabled = true if necessary:
nbest.enabled = (nbest.enabled || mira || search.consensus
|| nbest.nbest_size > 0
|| mbr.enabled || lmbr.enabled
|| !output.SearchGraph.empty()
|| !output.SearchGraphExtended.empty()
|| !output.SearchGraphSLF.empty()

View File

@ -43,7 +43,7 @@ update(std::map<std::string,xmlrpc_c::value>const& param)
params_t::const_iterator si = param.find("nbest");
if (si != param.end())
nbest_size = xmlrpc_c::value_int(si->second);
only_distinct = check(param, "nbest-distinct");
only_distinct = check(param, "nbest-distinct", only_distinct);
enabled = (nbest_size > 0);
return true;
}

View File

@ -1,5 +1,7 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
#include "OptionsBaseClass.h"
#include "moses/Util.h"
namespace Moses {
#ifdef HAVE_XMLRPC_C
@ -15,10 +17,12 @@ namespace Moses {
bool
OptionsBaseClass::
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key)
std::string const key, bool dfltval)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (param.find(key) != param.end());
m = param.find(key);
if (m == param.end()) return dfltval;
return Scan<bool>(xmlrpc_c::value_string(m->second));
}
#endif
}

View File

@ -13,6 +13,6 @@ namespace Moses
#endif
bool
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key);
std::string const key, bool dfltval);
};
}

View File

@ -75,7 +75,7 @@ namespace Moses {
ReportingOptions::
update(std::map<std::string, xmlrpc_c::value>const& param)
{
ReportAllFactors = check(param, "report-all-factors");
ReportAllFactors = check(param, "report-all-factors", ReportAllFactors);
return true;
}
#endif

View File

@ -5,14 +5,15 @@
namespace Moses {
void
Hypothesis::
OutputLocalWordAlignment(std::vector<xmlrpc_c::value>& dest, const ReportingOptions &options) const
OutputLocalWordAlignment(std::vector<xmlrpc_c::value>& dest) const
{
using namespace std;
Range const& src = this->GetCurrSourceWordsRange();
Range const& trg = this->GetCurrTargetWordsRange();
WordAlignmentSort waso = m_manager.options().output.WA_SortOrder;
vector<pair<size_t,size_t> const* > a
= this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(options.WA_SortOrder);
= this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(waso);
typedef pair<size_t,size_t> item;
map<string, xmlrpc_c::value> M;
BOOST_FOREACH(item const* p, a) {
@ -24,13 +25,13 @@ namespace Moses {
void
Hypothesis::
OutputWordAlignment(std::vector<xmlrpc_c::value>& out, const ReportingOptions &options) const
OutputWordAlignment(std::vector<xmlrpc_c::value>& out) const
{
std::vector<Hypothesis const*> tmp;
for (Hypothesis const* h = this; h; h = h->GetPrevHypo())
tmp.push_back(h);
for (size_t i = tmp.size(); i-- > 0;)
tmp[i]->OutputLocalWordAlignment(out, options);
tmp[i]->OutputLocalWordAlignment(out);
}
}

View File

@ -3,6 +3,8 @@
#include "moses/ContextScope.h"
#include <boost/foreach.hpp>
#include "moses/Util.h"
#include "moses/Hypothesis.h"
namespace MosesServer
{
using namespace std;
@ -164,15 +166,15 @@ insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
retData["sg"] = xmlrpc_c::value_array(searchGraphXml);
}
void
TranslationRequest::
output_phrase(ostream& out, Phrase const& phrase) const
{
if (!m_options.output.ReportAllFactors) {
for (size_t i = 0 ; i < phrase.GetSize(); ++i)
out << *phrase.GetFactor(i, 0) << " ";
} else out << phrase;
}
// void
// TranslationRequest::
// output_phrase(ostream& out, Phrase const& phrase) const
// {
// if (!m_options.output.ReportAllFactors) {
// for (size_t i = 0 ; i < phrase.GetSize(); ++i)
// out << *phrase.GetFactor(i, 0) << " ";
// } else out << phrase;
// }
void
TranslationRequest::
@ -193,7 +195,7 @@ outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
vector<const Hypothesis *> const& E = path->GetEdges();
if (!E.size()) continue;
std::map<std::string, xmlrpc_c::value> nBestXmlItem;
pack_hypothesis(E, "hyp", nBestXmlItem);
pack_hypothesis(manager, E, "hyp", nBestXmlItem);
if (m_withScoreBreakdown) {
// should the score breakdown be reported in a more structured manner?
ostringstream buf;
@ -262,8 +264,11 @@ bool
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (param.find(key) != param.end());
std::map<std::string, xmlrpc_c::value>::const_iterator m = param.find(key);
if(m == param.end()) return false;
std::string val = string(xmlrpc_c::value_string(m->second));
if(val == "true" || val == "True" || val == "TRUE" || val == "1") return true;
return false;
}
void
@ -367,15 +372,17 @@ run_chart_decoder()
void
TranslationRequest::
pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
pack_hypothesis(const Moses::Manager& manager, vector<Hypothesis const* > const& edges, string const& key,
map<string, xmlrpc_c::value> & dest) const
{
// target string
ostringstream target;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
output_phrase(target, e->GetCurrTargetPhrase());
XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl);
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) {
manager.OutputSurface(target, *e, m_options.output.factor_order,
m_options.output.ReportSegmentation, m_options.output.ReportAllFactors);
}
XVERBOSE(1, "BEST TRANSLATION: " << *(manager.GetBestHypothesis()) << std::endl);
// XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl);
dest[key] = xmlrpc_c::value_string(target.str());
if (m_withAlignInfo) {
@ -383,7 +390,7 @@ pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
vector<xmlrpc_c::value> p_aln;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
add_phrase_aln_info(*e, p_aln);
add_phrase_aln_info(*e, p_aln);
dest["align"] = xmlrpc_c::value_array(p_aln);
}
@ -391,21 +398,21 @@ pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
// word alignment, if requested
vector<xmlrpc_c::value> w_aln;
BOOST_FOREACH(Hypothesis const* e, edges)
e->OutputLocalWordAlignment(w_aln, m_options.output);
e->OutputLocalWordAlignment(w_aln);
dest["word-align"] = xmlrpc_c::value_array(w_aln);
}
}
void
TranslationRequest::
pack_hypothesis(Hypothesis const* h, string const& key,
pack_hypothesis(const Moses::Manager& manager, Hypothesis const* h, string const& key,
map<string, xmlrpc_c::value>& dest) const
{
using namespace std;
vector<Hypothesis const*> edges;
for (; h; h = h->GetPrevHypo())
edges.push_back(h);
pack_hypothesis(edges, key, dest);
pack_hypothesis(manager, edges, key, dest);
}
@ -422,7 +429,7 @@ run_phrase_decoder()
manager.Decode();
pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
pack_hypothesis(manager, manager.GetBestHypothesis(), "text", m_retData);
if (m_session_id)
m_retData["session-id"] = xmlrpc_c::value_int(m_session_id);

View File

@ -58,17 +58,16 @@ TranslationRequest : public virtual Moses::TranslationTask
run_phrase_decoder();
void
pack_hypothesis(std::vector<Moses::Hypothesis const* > const& edges,
pack_hypothesis(const Moses::Manager& manager, std::vector<Moses::Hypothesis const* > const& edges,
std::string const& key,
std::map<std::string, xmlrpc_c::value> & dest) const;
void
pack_hypothesis(Moses::Hypothesis const* h, std::string const& key,
pack_hypothesis(const Moses::Manager& manager, Moses::Hypothesis const* h, std::string const& key,
std::map<std::string, xmlrpc_c::value> & dest) const;
void
output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
// void
// output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
void
add_phrase_aln_info(Moses::Hypothesis const& h,

View File

@ -1,6 +1,7 @@
import option path ;
with-regtest = [ option.get "with-regtest" ] ;
with-xmlrpc = [ option.get "with-xmlrpc-c" ] ;
if $(with-regtest) {
with-regtest = [ path.root $(with-regtest) [ path.pwd ] ] ;
@ -24,9 +25,20 @@ if $(with-regtest) {
actions reg_test_decode {
$(TOP)/regression-testing/run-single-test.perl --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
}
reg_test phrase : [ glob $(test-dir)/phrase.* ] : ../moses-cmd//moses : @reg_test_decode ;
reg_test chart : [ glob $(test-dir)/chart.* ] : ../moses-cmd//moses : @reg_test_decode ;
if $(with-xmlrpc) {
actions reg_test_decode_server {
$(TOP)/regression-testing/run-single-test.perl --server --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
}
reg_test phrase-server : [ glob $(test-dir)/phrase-server.* ] : ../moses-cmd//moses : @reg_test_decode_server ;
}
reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
reg_test chart : [ glob $(test-dir)/chart.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
if [ option.get "with-dalm" : : "yes" ] {
reg_test dalm : [ glob $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
} else {
alias dalm ;
}
actions reg_test_score {
$(TOP)/regression-testing/run-test-scorer.perl --scorer=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
}
@ -55,5 +67,5 @@ if $(with-regtest) {
reg_test misc : [ glob $(test-dir)/misc.* : $(test-dir)/misc.mml* ] : ..//prefix-bin ..//prefix-lib : @reg_test_misc ;
reg_test misc-mml : [ glob $(test-dir)/misc.mml* ] : $(TOP)/scripts/ems/support/mml-filter.py $(TOP)/scripts/ems/support/defaultconfig.py : @reg_test_misc ;
alias all : phrase chart mert score extract extractrules misc misc-mml ;
alias all : phrase chart mert score extract extractrules misc misc-mml dalm ;
}

View File

@ -2,6 +2,8 @@
# $Id$
use Encode;
use utf8;
use warnings;
use strict;
my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
@ -9,6 +11,7 @@ use MosesRegressionTesting;
use Getopt::Long;
use File::Temp qw ( tempfile );
use POSIX qw ( strftime );
use POSIX ":sys_wait_h";
my @SIGS = qw ( SIGHUP SIGINT SIGQUIT SIGILL SIGTRAP SIGABRT SIGIOT SIGBUS SIGFPE SIGKILL SIGUSR1 SIGSEGV SIGUSR2 SIGPIPE SIGALRM SIGTERM SIGSTKFLT SIGCHLD SIGCONT SIGSTOP SIGTSTP SIGTTIN SIGTTOU SIGURG SIGXCPU SIGXFSZ SIGVTALRM SIGPROF SIGWINCH SIGIO SIGPWR SIGSYS SIGUNUSED SIGRTMIN );
my ($decoder, $test_name);
@ -17,14 +20,31 @@ my $data_dir;
my $BIN_TEST = $script_dir;
my $results_dir;
my $NBEST = 0;
my $run_server_test = 0;
my $serverport = int(rand(9999)) + 10001;
my $url = "http://localhost:$serverport/RPC2";
my $startupTest = 0;
GetOptions("decoder=s" => \$decoder,
"test=s" => \$test_name,
"data-dir=s"=> \$data_dir,
"test-dir=s"=> \$test_dir,
"results-dir=s"=> \$results_dir,
"server"=> \$run_server_test,
"startuptest"=> \$startupTest
) or exit 1;
if($run_server_test)
{
eval {
require XMLRPC::Lite;
import XMLRPC::Lite;
};
if ($@) {
die "Error: XMLRPC::Lite not installed, moses server regression tests will not be run. $@";
}
exit(0) if($startupTest);
}
die "Please specify a decoder with --decoder\n" unless $decoder;
die "Please specify a test to run with --test\n" unless $test_name;
@ -72,8 +92,13 @@ if (!-d $truth) {
}
print "RESULTS AVAILABLE IN: $results\n\n";
my ($o, $elapsed, $ec, $sig) = exec_moses($decoder, $local_moses_ini, $input, $results);
my ($o, $elapsed, $ec, $sig);
if($run_server_test) {
($o, $elapsed, $ec, $sig) = exec_moses_server($decoder, $local_moses_ini, $input, $results);
}
else {
($o, $elapsed, $ec, $sig) = exec_moses($decoder, $local_moses_ini, $input, $results);
}
my $error = ($sig || $ec > 0);
if ($error) {
open OUT, ">$results/Summary";
@ -139,6 +164,54 @@ sub exec_moses {
return ($o, $elapsed, $ec, $sig);
}
sub exec_moses_server {
my ($decoder, $conf, $input, $results) = @_;
my $start_time = time;
my ($o, $ec, $sig);
$ec = 0; $sig = 0; $o = 0;
my $pid = fork();
if (not defined $pid) {
warn "resources not avilable to fork Moses server\n";
$ec = 1; # to generate error
} elsif ($pid == 0) {
setpgrp(0, 0);
warn "Starting Moses server on port $serverport ...\n";
($o, $ec, $sig) = run_command("$decoder --server --server-port $serverport -f $conf -verbose 2 --server-log $results/run.stderr.server 2> $results/run.stderr ");
exit;
# this should not be reached unless the server fails to start
}
while( 1==1 ) # wait until the server is listening for requests
{
sleep 5;
my $str = `grep "Listening on port $serverport" $results/run.stderr`;
last if($str =~ /Listening/);
}
my $proxy = XMLRPC::Lite->proxy($url);
warn "Opening file $input to write to $results\n";
open(TEXTIN, "$input") or die "Can not open the input file to translate with Moses server\n";
binmode TEXTIN, ':utf8';
open(TEXTOUT, ">$results/run.stdout");
binmode TEXTOUT, ':utf8';
while(<TEXTIN>)
{
chop;
my $encoded = SOAP::Data->type(string => $_); # NOTE: assuming properly encoded UTF-8 input: check tests before adding them!
my %param = ("text" => $encoded);
my $result = $proxy->call("translate",\%param)->result;
print TEXTOUT $result->{'text'} . "\n";
}
close(TEXTIN);
close(TEXTOUT);
my $elapsed = time - $start_time;
print STDERR "Finished translating file $input\n";
if(waitpid($pid, WNOHANG) <= 0)
{
warn "Killing process group $pid of the $decoder --server ... \n";
kill 9, -$pid;
}
return ($o, $elapsed, $ec, $sig);
}
sub run_command {
my ($cmd) = @_;
my $o = `$cmd`;

@ -1 +1 @@
Subproject commit e07a00c9733e0fecb8433f1c9d5805d3f0b35c6f
Subproject commit 37a595fd7bf41226933c0fdb6fb792bdc877c3fd

View File

@ -2,13 +2,21 @@
# this script assumes that all 3rd-party dependencies are installed under ./opt
# you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake
set -e -o pipefail
set -e -o pipefail -x
opt=$(pwd)/opt
git submodule init
git submodule update regtest
if [ "$RECOMPILE" == "NO" ] ; then
RECOMPILE=
else
RECOMPILE="-a"
fi
# test compilation without xmlrpc-c
./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --no-xmlrpc-c --with-regtest=./regtest -a -q $@ || exit $?
./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --no-xmlrpc-c --with-regtest=$(pwd)/regtest -a -q $@ || exit $?
# test compilation with xmlrpc-c
./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-regtest=./regtest -a -q $@
if ./regression-testing/run-single-test.perl --server --startuptest ; then
./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-regtest=$(pwd)/regtest $RECOMPILE -q $@
fi

View File

@ -729,19 +729,21 @@ extract-phrases
in: corpus-mml-postfilter=OR=word-alignment scored-corpus
out: extracted-phrases
rerun-on-change: max-phrase-length translation-factors reordering-factors hierarchical-rule-set extract-settings training-options script use-ghkm domain-features baseline-extract lexicalized-reordering
pass-if: mmsapt
only-existence-matters: domain-features
default-name: model/extract
build-reordering
in: extracted-phrases
out: reordering-table
ignore-unless: lexicalized-reordering
pass-if: mmsapt
rerun-on-change: lexicalized-reordering reordering-factors
default-name: model/reordering-table
final-model: yes
build-ttable
in: extracted-phrases lexical-translation-table corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains
out: phrase-translation-table
rerun-on-change: translation-factors hierarchical-rule-set score-settings training-options script EVALUATION:report-precision-by-coverage include-word-alignment-in-rules domain-features
rerun-on-change: translation-factors hierarchical-rule-set score-settings training-options script include-word-alignment-in-rules domain-features
default-name: model/phrase-table
ignore-if: suffix-array mmsapt
final-model: yes

View File

@ -2571,7 +2571,7 @@ sub get_config_tables {
$cmd .= ":$numFF" if defined($numFF);
$cmd .= " ";
$cmd .= &get_table_name_settings("reordering-factors","reordering-table",$reordering_table) if $reordering_table;
$cmd .= &get_table_name_settings("reordering-factors","reordering-table",$reordering_table) if $reordering_table && !defined($mmsapt);
$cmd .= &get_table_name_settings("generation-factors","generation-table",$generation_table) if $generation_table;
$cmd .= "-config $config ";

View File

@ -90,9 +90,10 @@ def run_instance(cmd_base, threads, tasks, n_best=False):
cmd.append('--threads')
cmd.append(str(threads))
try:
# Queue of tasks instance is currently working on, limited to the number of
# threads. The queue should be kept full for optimal CPU usage.
work = Queue.Queue(maxsize=threads)
# Queue of tasks instance is currently working on, limited to the number
# of threads * 2 (minimal buffering). The queue should be kept full for
# optimal CPU usage.
work = Queue.Queue(maxsize=(threads * 2))
# Multi-threaded instance
moses = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)

775
scripts/generic/score_parallel.py Executable file
View File

@ -0,0 +1,775 @@
#! /usr/bin/env python
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
#
# Script contributed by Precision Translation Tools.
"""Run Moses `score` jobs in parallel.
This script is a replacement for `score-parallel.perl`. The two are similar,
but there are differences in usage. In addition, this script can be called
directly from Python code without the need to run it as a separate process.
"""
from __future__ import (
absolute_import,
print_function,
unicode_literals,
)
__metaclass__ = type
from argparse import ArgumentParser
from contextlib import contextmanager
from datetime import datetime
import errno
import gzip
from multiprocessing import Pool
import os
import os.path
import pipes
from shutil import rmtree
from subprocess import check_call
import sys
import tempfile
def get_unicode_type():
"""Return the Unicode string type appropriate to this Python version."""
if sys.version_info.major <= 2:
# Unicode string type. In Python 2 this is the "unicode" type,
# while "str" is a binary string type.
return unicode
else:
# Unicode string type. In Python 3 this is the default "str" type.
# The binary string type is now called "bytes".
return str
UNICODE_TYPE = get_unicode_type()
class CommandLineError(Exception):
"""Invalid command line."""
class ProgramFailure(Exception):
"""Failure, not a bug, which is reported neatly to the user."""
def parse_args():
"""Parse command line arguments, return as `Namespace`."""
parser = ArgumentParser(description=__doc__)
parser.add_argument(
'--extract-file', '-e', metavar='PATH', required=True,
help=(
"Path to input file: extract file (e.g. 'extract.sorted.gz' or "
"'extract.inv.sorted.gz'). Required."))
parser.add_argument(
'--lex-file', '-l', metavar='PATH', required=True,
help=(
"Path to input file: lex file (e.g. 'lex.f2e' or 'lex.e2f'). "
"Required."))
parser.add_argument(
'--output', '-o', metavar='PATH', required=True,
help=(
"Write phrase table to file PATH (e.g. 'phrase-table.half.f2e' "
"or 'phrase-table.half.e2f'). Required."))
parser.add_argument(
'--inverse', '-i', action='store_true',
help="Inverse scoring. Defaults to direct scoring.")
parser.add_argument(
'--labels-file', '-L', metavar='PATH',
help="Also write source labels to file PATH.")
parser.add_argument(
'--parts-of-speech', '-p', metavar='PATH',
help="Also write parts-of-speech file to PATH.")
parser.add_argument(
'--flexibility-score', '-F', metavar='PATH',
help="Path to the 'flexibility_score.py' script. Defaults to none.")
parser.add_argument(
'--hierarchical', '-H', action='store_true',
help="Process hierarchical rules.")
parser.add_argument(
'--args', '-a', metavar='ARGUMENTS',
help="Additional arguments for `score` and `flexibility_score`.")
parser.add_argument(
'--sort', '-s', action='store_true',
help="Sort output file.")
parser.add_argument(
'--jobs', '-j', metavar='N', type=int, default=1,
help="Run up to N jobs in parallel. Defaults to %(default)s.")
parser.add_argument(
'--score-exe', '-x', metavar='PROGRAM',
help="Name of, or path to, the 'score' executable.")
parser.add_argument(
'--sort-command', '-S', metavar='COMMAND-LINE',
help=(
"Command line for sorting text files to standard output. "
"Must support operation as a pipe, as well as input files named "
"as command-line arguments."))
parser.add_argument(
'--gzip-command', '-z', metavar='PROGRAM',
help="Path to a gzip or pigz executable.")
parser.add_argument(
'--verbose', '-v', action='store_true',
help="Print what's going on.")
parser.add_argument(
'--debug', '-d', action='store_true',
help="Don't delete temporary directories when done.")
return parser.parse_args()
def normalize_path(optional_path=None):
"""Return a cleaned-up version of a given filesystem path, or None.
Converts the path to the operating system's native conventions, and
removes redundancies like `.`.
The return value will be `None`, an absolute path, or a relative path,
same as the argument. But it will have redundant path separators,
unnecessary detours through parent directories, and use of the current
directory "." removed.
"""
if optional_path is None:
return None
else:
path = os.path.normpath(optional_path)
path = path.replace('/', os.path.sep)
path = path.replace('\\', os.path.sep)
return path
def quote(path):
"""Quote and escape a filename for use in a shell command.
The Windows implementation is very limited and will break on anything
more advanced than a space.
"""
if os.name == 'posix':
return pipes.quote(path)
else:
# TODO: Improve escaping for Windows.
return '"%s"' % path
def sanitize_args(args):
"""Check `args` for sanity, clean up, and set nontrivial defaults."""
if args.jobs < 1:
raise CommandLineError("Number of parallel jobs must be 1 or more.")
if args.sort_command is None:
args.sort_command = find_first_executable(
['neandersort', 'gsort', 'sort'])
if args.sort_command is None:
raise CommandLineError(
"No 'sort' command is available. "
"Choose one using the --sort-command option.")
if args.gzip_command is None:
args.gzip_command = find_first_executable(['pigz', 'gzip'])
if args.gzip_command is None:
raise CommandLineError(
"No 'gzip' or 'pigz' command is available. "
"Choose one using the --gzip-command option.")
if args.score_exe is None:
# Look for "score" executable. It may be in the current project
# directory somewhere, or in the PATH.
moses_dir = os.path.dirname(os.path.dirname(
os.path.abspath(__file__)))
args.score_exe = find_first_executable(
['score'],
[
moses_dir,
os.path.join(moses_dir, 'phrase-extract'),
os.path.join(moses_dir, 'binaries'),
])
args.extract_file = normalize_path(args.extract_file)
args.lex_file = normalize_path(args.lex_file)
args.output = normalize_path(args.output)
args.labels_file = normalize_path(args.labels_file)
args.parts_of_speech = normalize_path(args.parts_of_speech)
args.flexibility_score = normalize_path(args.flexibility_score)
args.score_exe = normalize_path(args.score_exe)
def add_exe_suffix(program):
"""Return the full filename for an executable.
On Windows, this adds a `.exe` suffix to the name. On other
systems, it returns the original name unchanged.
"""
if os.name == 'nt':
# Windows.
return program + '.exe'
else:
# Assume POSIX or similar.
return program
def find_executable(exe, extra_path=None):
"""Return full path to an executable of the given name, or `None`.
If the given name is a qualified path to an executable, it will be returned
unchanged. A qualified path where no executable is found results in a
`CommandLineError`.
"""
if extra_path is None:
extra_path = []
if os.path.sep in exe:
# The executable name includes a path. Only one place it can be.
if not os.path.isfile(exe) or not os.access(exe, os.X_OK):
raise CommandLineError("Not an executable: '%s'." % exe)
return exe
for path in extra_path + os.getenv('PATH').split(os.pathsep):
full_path = os.path.join(path, exe)
if os.access(full_path, os.X_OK):
return full_path
return None
def find_first_executable(candidates, extra_path=None):
"""Find the first available of the given candidate programs.
:raise ProgramFailure: If none of `candidates` was found.
"""
for program in candidates:
executable = find_executable(add_exe_suffix(program), extra_path)
if executable is not None:
return executable
raise ProgramFailure(
"Could not find any of these executables in path: %s."
% ', '.join(candidates))
def execute_shell(command, verbose=False):
"""Run `command` string through the shell.
Inherits environment, but sets `LC_ALL` to `C` for predictable results,
especially from sort commands.
This uses a full-featured shell, including pipes, substitution, etc. So
remember to quote/escape arguments where appropriate!
"""
assert isinstance(command, UNICODE_TYPE), (
"Wrong argument for execute_shell.")
if verbose:
print("Executing: %s" % command)
env = os.environ.copy()
if os.name == 'posix':
env['LC_ALL'] = 'C'
check_call(command, shell=True, env=env)
@contextmanager
def tempdir(keep=False):
"""Context manager: temporary directory."""
directory = tempfile.mkdtemp()
yield directory
if not keep:
rmtree(directory)
def make_dirs(path):
"""Equivalent to `mkdir -p -- path`."""
try:
os.makedirs(path)
except OSError as error:
if error.errno != errno.EEXIST:
raise
def open_file(path, mode='r'):
"""Open a file, which may be gzip-compressed."""
if path.endswith('.gz'):
return gzip.open(path, mode)
else:
return open(path, mode)
def count_lines(filename):
"""Count the number of lines in `filename` (may be gzip-compressed)."""
count = 0
with open_file(filename) as stream:
for _ in stream:
count += 1
return count
def set_temp_dir():
"""Set temporary directory to `$MOSES_TEMP_DIR`, if set.
Create the directory if necessary.
"""
temp_dir = os.getenv('MOSES_TEMP_DIR')
if temp_dir is not None:
make_dirs(temp_dir)
tempfile.tempdir = temp_dir
def strip_newline(line):
"""Remove trailing carriage return and/or line feed, if present."""
if line.endswith('\n'):
line = line[:-1]
if line.endswith('\r'):
line = line[:-1]
return line
def open_chunk_file(split_dir, chunk_number):
"""Open a file to write one chunk of the extract file."""
return open_file(
os.path.join(split_dir, 'extract.%d.gz' % chunk_number), 'w')
def name_context_chunk_file(split_dir, chunk_number):
"""Compose file name for one chunk of the extract context file."""
return os.path.join(
split_dir, 'extract.context.%d.gz' % chunk_number)
def extract_source_phrase(line):
"""Extract the source phrase from an extract-file line."""
return line.split(b'|||', 1)[0]
def cut_context_file(last_source_phrase, chunk_file, last_line,
context_stream):
"""Write one chunk of extract context file into its own file.
:param last_source_phrase: Last source phrase that should be in the
chunk. Stop processing after this source phrase.
:param chunk_file: Path to the extract context file for this chunk.
:param last_line: Previously read line that may still need writing.
:param context_stream: Extract context file, opened for reading.
:return: Last line read from `context_stream`. This line will still
need processing.
"""
# TODO: Use open_file.
with gzip.open(chunk_file, 'w') as chunk:
if last_line is not None:
chunk.write('%s\n' % last_line)
# Are we processing our last source phrase yet?
on_last_source_phrase = False
# Write all lines in context file until we meet last source phrase
# in extract file.
for line in context_stream:
# Reading from a gzip file returns lines *including the newline*.
# Either way, we want to ignore carriage returns as well.
line = strip_newline(line)
source_phrase = extract_source_phrase(line)
if on_last_source_phrase and source_phrase != last_source_phrase:
# First new source phrase after our last one. We're done.
return line
else:
# Still adding lines to our chunk.
chunk.write('%s\n' % line)
if source_phrase == last_source_phrase:
# We're on our last source phrase now.
on_last_source_phrase = True
def split_extract_files(split_dir, extract_file, extract_context_file=None,
jobs=1):
"""Split extract file into chunks, so we can process them in parallel.
:param split_dir: A temporary directory where this function can write
temporary files. The caller must ensure that this directory will be
cleaned up after it's done with the files.
:return: An iterable of tuples. Each tuple hols a partial extract file,
and the corresponding context file. The files may be in `split_dir`,
or there may just be the original extract file.
"""
if jobs == 1:
# No splitting needed. Read the original file(s).
return [(extract_file, extract_context_file)]
# Otherwise: split files.
files = []
num_lines = count_lines(extract_file)
chunk_size = (num_lines + jobs - 1) / jobs
assert isinstance(chunk_size, int)
line_count = 0
chunk_number = 0
prev_source_phrase = None
last_line_context = None
extract_stream = open_file(extract_file)
chunk_file = open_chunk_file(split_dir, chunk_number)
if extract_context_file is None:
chunk_context_file = None
if extract_context_file is not None:
context_stream = open_file(extract_context_file)
for line in extract_stream:
line_count += 1
line = line.decode('utf-8')
line = strip_newline(line)
if line_count >= chunk_size:
# At or over chunk size. Cut off at next source phrase change.
source_phrase = extract_source_phrase(line)
if prev_source_phrase is None:
# Start looking for a different source phrase.
prev_source_phrase = source_phrase
elif source_phrase == prev_source_phrase:
# Can't cut yet. Still working on the same source phrase.
pass
else:
# Hit first new source phrase after chunk limit. Cut new
# file(s).
chunk_file.close()
if extract_context_file is not None:
chunk_context_file = name_context_chunk_file(
split_dir, chunk_number)
last_line_context = cut_context_file(
prev_source_phrase, chunk_context_file,
last_line_context, context_stream)
files.append((chunk_file.name, chunk_context_file))
# Start on new chunk.
prev_source_phrase = None
line_count = 0
chunk_number += 1
chunk_file = open_chunk_file(split_dir, chunk_number)
chunk_file.write(('%s\n' % line).encode('utf-8'))
chunk_file.close()
if extract_context_file is not None:
chunk_context_file = name_context_chunk_file(split_dir, chunk_number)
last_line_context = cut_context_file(
prev_source_phrase, chunk_number, last_line_context,
context_stream)
files.append((chunk_file.name, chunk_context_file))
return files
def compose_score_command(extract_file, context_file, half_file,
flex_half_file, args):
"""Compose command line text to run one instance of `score`.
:param extract_file: One chunk of extract file.
:param context_file: If doing flexibility scoring, one chunk of
extract context file. Otherwise, None.
:param half_file: ???
:param flex_half_file: ???
:param args: Arguments namespace.
"""
command = [
args.score_exe,
extract_file,
args.lex_file,
half_file,
]
if args.args not in (None, ''):
command.append(args.args)
other_args = build_score_args(args)
if other_args != '':
command.append(other_args)
if context_file is not None:
command += [
'&&',
find_first_executable(['bzcat']),
'|',
quote(args.flexibility_score),
quote(context_file),
]
if args.inverse:
command.append('--Inverse')
if args.hierarchical:
command.append('--Hierarchical')
command += [
'|',
quote(args.gzip_command),
'-c',
'>%s' % quote(flex_half_file),
]
return ' '.join(command)
def score_parallel(split_dir, file_pairs, args):
"""Run the `score` command in parallel.
:param split_dir: Temporary directory where we can create split files.
:param file_pairs: Sequence of tuples for the input files, one tuple
per chunk of the work. Each tuple consists of a partial extract
file, and optionally a partial extract context file.
:param args: Arguments namespace.
:return: A list of tuples. Each tuple contains two file paths. The first
is for a partial half-phrase-table file. The second is for the
corresponding partial flex file, if a context file is given; or
`None` otherwise.
"""
partial_files = []
# Pool of worker processes for executing the partial "score" invocations
# concurrently.
pool = Pool(args.jobs)
try:
for chunk_num, file_pair in enumerate(file_pairs):
half_file = os.path.join(
split_dir, 'phrase-table.half.%06d.gz' % chunk_num)
extract_file, context_file = file_pair
if context_file is None:
flex_half_file = None
else:
flex_half_file = os.path.join(
split_dir, 'phrase-table.half.%06d.flex.gz' % chunk_num)
# Pickling of arguments for the pool is awkward on Windows, so
# keep them simple. Compose the command line in the parent
# process, then hand them to worker processes which execute them.
command_line = compose_score_command(
extract_file, context_file, half_file, flex_half_file, args)
pool.apply_async(
execute_shell, (command_line, ), {'verbose': args.verbose})
partial_files.append((half_file, flex_half_file))
pool.close()
except BaseException:
pool.terminate()
raise
finally:
pool.join()
return partial_files
def merge_and_sort(files, output, sort_command=None, gzip_exe=None,
verbose=False):
"""Merge partial files.
:param files: List of partial half-phrase-table files.
:param output: Path for resulting combined phrase-table file.
"""
# TODO: The Perl code mentioned "sort" and "flexibility_score" here.
# What do we do with those?
# Sort whether we're asked to or not, as a way of combining the input
# files.
if sort_command == 'neandersort':
# Neandersort transparently decompresses input and compresses output.
check_call([
'neandersort',
'-o', output,
] + files)
else:
command = (
"%(gzip)s -c -d %(files)s | "
"%(sort)s | "
"%(gzip)s -c >>%(output)s"
% {
'gzip': quote(gzip_exe),
'sort': sort_command,
'files': ' '.join(map(quote, files)),
'output': quote(output),
})
execute_shell(command, verbose=verbose)
def build_score_args(args):
"""Compose command line for the `score` program."""
command_line = []
if args.labels_file:
command_line += [
'--SourceLabels',
'--SourceLabelCountsLHS',
'--SourceLabelSet',
]
if args.parts_of_speech:
command_line.append('--PartsOfSpeech')
if args.inverse:
command_line.append('--Inverse')
if args.args is not None:
command_line.append(args.args)
return ' '.join(command_line)
def list_existing(paths):
"""Return, in the same order, those of the given files which exist."""
return filter(os.path.exists, paths)
def compose_coc_path_for(path):
"""Compose COC-file path for the given file."""
return '%s.coc' % path
def read_cocs(path):
"""Read COC file at `path`, return contents as tuple of ints."""
with open(path) as lines:
return tuple(
int(line.rstrip('\r\n'))
for line in lines
)
def add_cocs(original, additional):
"""Add two tuples of COCs. Extend as needed."""
assert not (original is None and additional is None), "No COCs to add!"
if original is None:
return additional
elif additional is None:
return original
else:
common = tuple(lhs + rhs for lhs, rhs in zip(original, additional))
return (
common +
tuple(original[len(common):]) +
tuple(additional[len(common):]))
def merge_coc(files, output):
"""Merge COC files for the given partial files.
Each COC file is a series of integers, one per line. This reads them, and
adds them up line-wise into one file of the same format: the sum of the
numbers the respective files have at line 1, the sum of the numbers the
respective files have at line 2, and so on.
"""
assert len(files) > 0, "No partial files - no work to do."
extract_files = [extract_file for extract_file, _ in files]
if not os.path.exists(compose_coc_path_for(extract_files[0])):
# Nothing to merge.
return
totals = None
# TODO: Shouldn't we just fail if any of these files is missing?
for coc_path in list_existing(map(compose_coc_path_for, extract_files)):
totals = add_cocs(totals, read_cocs(coc_path))
# Write to output file.
with open(output, 'w') as output_stream:
for entry in totals:
output_stream.write('%d\n' % entry)
def suffix_line_numbers(infile, outfile):
"""Rewrite `infile` to `outfile`; suffix line number to each line.
The line number is zero-based, and separated from the rest of the line
by a single space.
"""
temp_file = '%s.numbering' % outfile
with open(infile, 'r') as instream, open(outfile, 'w') as outstream:
line_no = 0
for line in instream:
outstream.write(line)
outstream.write(' %d\n' % line_no)
line_no += 1
os.rename(temp_file, outfile)
def compose_source_labels_path_for(path):
"""Return source labels file path for given file."""
return '%s.syntaxLabels.src' % path
def merge_numbered_files(inputs, output, header_lines, sort_command,
verbose=False):
"""Sort and merge files `inputs`, add header and line numbers.
:param inputs: Iterable of input files.
:param output: Output file.
:header_lines: Iterable of header lines.
:sort_command: Command line for sorting input files.
"""
sort_temp = '%s.sorting' % output
with open(sort_temp, 'w') as stream:
for line in header_lines:
stream.write(line)
stream.write('\n')
execute_shell(
"%s %s >>%s" % (
sort_command,
' '.join(map(quote, inputs)),
quote(sort_temp)),
verbose=verbose)
suffix_line_numbers(sort_temp, output)
def merge_source_labels(files, output, sort_command, verbose=False):
"""Merge source labels files."""
# TODO: Shouldn't we just fail if any of these files is missing?
labels_files = list_existing(map(compose_source_labels_path_for, files))
header = [
'GlueTop',
'GlueX',
'SSTART',
'SEND',
]
merge_numbered_files(
labels_files, output, header, sort_command, verbose=verbose)
def compose_parts_of_speech_path_for(path):
"""Return parts-of-speech file path for given file."""
return '%s.partsOfSpeech' % path
def merge_parts_of_speech(files, output, sort_command, verbose=False):
"""Merge parts-of-speech files into output."""
# TODO: Shouldn't we just fail if any of these files is missing?
parts_files = list_existing(map(compose_parts_of_speech_path_for, files))
header = [
'SSTART',
'SEND',
]
merge_numbered_files(
parts_files, output, header, sort_command, verbose=verbose)
def main():
"""Command-line entry point. Marshals and forwards to `score_parallel`."""
args = parse_args()
sanitize_args(args)
set_temp_dir()
if args.flexibility_score is None:
extract_context_file = None
else:
extract_context_file = args.extract_file.replace(
'extract.', 'extract.context.')
if args.verbose:
print("Started %s." % datetime.now())
print("Using '%s' for gzip." % args.gzip_command)
with tempdir(args.debug) as split_dir:
extract_files = split_extract_files(
split_dir, args.extract_file,
extract_context_file=extract_context_file, jobs=args.jobs)
scored_files = score_parallel(split_dir, extract_files, args)
if args.verbose:
sys.stderr.write("Finished score %s.\n" % datetime.now())
# TODO: Pass on "sort" and "flexibility-score" arguments?
merge_and_sort(
[phrase_chunk for phrase_chunk, _ in scored_files], args.output,
sort_command=args.sort_command, gzip_exe=args.gzip_command,
verbose=args.verbose)
merge_coc(extract_files, compose_coc_path_for(args.output))
if not args.inverse and args.labels_file is not None:
if args.verbose:
print("Merging source labels files.")
merge_source_labels(
extract_files, args.labels_file,
sort_command=args.sort_command, verbose=args.verbose)
if not args.inverse and args.parts_of_speech is not None:
if args.verbose:
print("Merging parts-of-speech files.")
merge_parts_of_speech(
extract_files, args.parts_of_speech,
sort_command=args.sort_command, verbose=args.verbose)
if __name__ == '__main__':
try:
main()
except ProgramFailure as error:
sys.stderr.write('%s\n' % error)
sys.exit(1)
except CommandLineError as error:
sys.stderr.write("Command line error: %s\n" % error)
sys.exit(2)

View File

@ -2141,7 +2141,8 @@ sub create_ini {
# sum up...
$feature_spec .= "$phrase_table_impl_name name=TranslationModel$i num-features=$basic_weight_count path=$file input-factor=$input_factor output-factor=$output_factor";
$feature_spec .= " L1=$___F L2=$___E " if defined($_MMSAPT); # extra settings for memory mapped suffix array phrase table
$feature_spec .= " L1=$___F L2=$___E" if defined($_MMSAPT); # extra settings for memory mapped suffix array phrase table
$feature_spec .= " lr-func=LexicalReordering0" if defined($_MMSAPT) && $i==0 && $REORDERING_LEXICAL;
$feature_spec .= "\n";
unless ($phrase_table_impl==11) { # suffix array provides its weights at first iteration
$weight_spec .= "TranslationModel$i=";
@ -2222,7 +2223,7 @@ sub create_ini {
$table_file .= ".";
$table_file .= $model->{"filename"};
$table_file .= ".gz";
$feature_spec .= "LexicalReordering name=LexicalReordering$i num-features=".$model->{"numfeatures"}." type=".$model->{"config"}." input-factor=$input_factor output-factor=$output_factor path=$table_file".(defined($_LEXICAL_REORDERING_DEFAULT_SCORES)?" default-scores=$_LEXICAL_REORDERING_DEFAULT_SCORES":"")."\n";
$feature_spec .= "LexicalReordering name=LexicalReordering$i num-features=".$model->{"numfeatures"}." type=".$model->{"config"}." input-factor=$input_factor output-factor=$output_factor".((defined($_MMSAPT)&&$i==0)?"":" path=$table_file").(defined($_LEXICAL_REORDERING_DEFAULT_SCORES)?" default-scores=$_LEXICAL_REORDERING_DEFAULT_SCORES":"")."\n";
$weight_spec .= "LexicalReordering$i=";
for(my $j=0;$j<$model->{"numfeatures"};$j++) { $weight_spec .= " 0.3"; }
$weight_spec .= "\n";