Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Barry Haddow 2015-11-23 12:16:46 +00:00
commit 10df006eed
35 changed files with 314 additions and 344 deletions

View File

@ -3,5 +3,6 @@
# you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake
set -e -o pipefail
./bjam --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@
opt=$(pwd)/opt
./bjam --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@

View File

@ -51,27 +51,8 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,

View File

@ -123,22 +123,6 @@ public:
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
int featureID,
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
bool Enabled() const {
return m_enabled;

View File

@ -42,24 +42,6 @@ public:
return true;
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -58,23 +58,6 @@ public:
return true;
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -30,11 +30,6 @@ bool CoveredReferenceState::operator==(const FFState& other) const
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
{}
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath

View File

@ -53,10 +53,6 @@ public:
return new CoveredReferenceState();
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
@ -64,10 +60,6 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -47,23 +47,6 @@ public:
throw std::logic_error("DistortionScoreProducer not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
};
}

View File

@ -64,28 +64,6 @@ public:
UTIL_THROW2("LexicalReordering is not valid for chart decoder");
}
void
EvaluateWithSourceContext
(const InputType &input,
const InputPath &inputPath,
const TargetPhrase &targetPhrase,
const StackVec *stackVec,
ScoreComponentCollection& scoreBreakdown,
ScoreComponentCollection* estimatedScores = NULL) const
{ }
void
EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &transOptList) const
{ }
void
EvaluateInIsolation(const Phrase &source,
const TargetPhrase &targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const
{ }
bool
GetHaveDefaultScores() {
return m_haveDefaultScores;

View File

@ -37,18 +37,6 @@ public:
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown

View File

@ -54,23 +54,6 @@ public:
throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void SetParameter(const std::string& key, const std::string& value);
private:

View File

@ -296,18 +296,6 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const
{};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -15,12 +15,19 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
ReadParameters();
}
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
{}
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
@ -29,8 +36,11 @@ void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
, ScoreComponentCollection *estimatedScores) const
{}
void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &translationOptionList) const
{}
FFState* SkeletonStatefulFF::EvaluateWhenApplied(

View File

@ -37,19 +37,35 @@ public:
return new SkeletonState(0);
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const;
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove this declaration here
// and the empty skeleton implementation from the corresponding .cpp
// file to reduce code clutter.
void
EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const;
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove this declaration here
// and the empty skeleton implementation from the corresponding .cpp
// file to reduce code clutter.
void
EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const;
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove this declaration here
// and the empty skeleton implementation from the corresponding .cpp
// file to reduce code clutter.
void
EvaluateTranslationOptionListWithSourceContext
( const InputType &input , const TranslationOptionList &translationOptionList) const;
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,

View File

@ -66,6 +66,23 @@ public:
return false;
}
virtual void
EvaluateInIsolation
(Phrase const& source, TargetPhrase const& targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const {}
virtual void
EvaluateWithSourceContext
(InputType const&input, InputPath const& inputPath, TargetPhrase const& targetPhrase,
StackVec const* stackVec, ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection *estimatedFutureScore = NULL) const {}
virtual void
EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &translationOptionList) const {}
};

View File

@ -48,22 +48,6 @@ public:
ScoreComponentCollection* ) const {
throw std::logic_error("TargetBigramFeature not valid in chart decoder");
}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void SetParameter(const std::string& key, const std::string& value);

View File

@ -215,24 +215,6 @@ public:
virtual FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId,
ScoreComponentCollection* accumulator) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
}
void SetParameter(const std::string& key, const std::string& value);
private:

View File

@ -63,22 +63,6 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {};
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -69,10 +69,11 @@ void LanguageModel::ReportHistoryOrder(std::ostream &out,const Phrase &phrase) c
// out << "ReportHistoryOrder not implemented";
}
void LanguageModel::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
void
LanguageModel::
EvaluateInIsolation(Phrase const& source, TargetPhrase const& targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const
{
// contains factors used by this LM
float fullScore, nGramScore;

View File

@ -94,18 +94,6 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
}
};
}

View File

@ -188,22 +188,6 @@ size_t BilingualLM::getState(const Hypothesis& cur_hypo) const
return hashCode;
}
void BilingualLM::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {}
void BilingualLM::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores) const
{
}
FFState* BilingualLM::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -119,23 +119,6 @@ public:
void Load();
void EvaluateInIsolation(
const Phrase &source,
const TargetPhrase &targetPhrase,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection &estimatedScores) const;
void EvaluateWithSourceContext(
const InputType &input,
const InputPath &inputPath,
const TargetPhrase &targetPhrase,
const StackVec *stackVec,
ScoreComponentCollection &scoreBreakdown,
ScoreComponentCollection *estimatedScores = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {};
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -196,18 +196,7 @@ public:
}
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {};
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,

View File

@ -37,7 +37,7 @@ basename(string const path, string const suffix)
size_t k = path.size() - suffix.size();
cout << path << " " << suffix << endl;
cout << path.substr(0,p) << " " << path.substr(k) << endl;
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
return path.substr(p+1, suffix == &path[k] ? k-p-1 : path.size() - p);
}
int main(int argc, char* argv[])
@ -47,6 +47,7 @@ int main(int argc, char* argv[])
string line;
string ifile = argv[4];
string docname = basename(ifile, string(".") + argv[2] + ".gz");
id_type docid = B->docname2docid(docname);
boost::iostreams::filtering_istream in;
ugdiss::open_input_stream(ifile,in);
while(getline(in,line))
@ -57,13 +58,70 @@ int main(int argc, char* argv[])
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B->I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k);
for (size_t num_occurrences = m.ca(); m.size(); m.up())
{
if (size_t(m.ca()) == num_occurrences) continue;
num_occurrences = m.ca();
SPTR<SamplingBias const> zilch;
BitextSampler<Token> s(B.get(), m, zilch, 1000, 1000,
sapt::random_sampling);
s();
cout << m.size() << " " << s.stats()->trg.size() << endl;
if (s.stats()->trg.size() == 0) continue;
// if (s.stats()->indoc[docname] > 10) continue;
sapt::pstats::indoc_map_t::const_iterator d
= s.stats()->indoc.find(docid);
size_t indoccnt = d != s.stats()->indoc.end() ? d->second : 0;
cout << m.size() << " : " << m.str(B->V1.get()) << " ("
<< s.stats()->trg.size() << " entries; "
<< indoccnt << "/" << s.stats()->good
<< " samples in domain)" << endl;
vector<PhrasePair<Token> > ppairs;
PhrasePair<Token>::SortDescendingByJointCount sorter;
expand(m,*B,*s.stats(),ppairs,NULL);
sort(ppairs.begin(),ppairs.end(),sorter);
boost::format fmt("%4d/%d/%d |%s| (%4.2f : %4.2f)");
BOOST_FOREACH(PhrasePair<Token>& ppair, ppairs)
{
if (ppair.joint * 100 < ppair.good1) break;
ppair.good2 = ppair.raw2 * float(ppair.good1)/ppair.raw1;
ppair.good2 = max(ppair.good2, ppair.joint);
#if 0
cout << "\t"
<< (fmt % ppair.joint % ppair.good1 % ppair.good2
% B->T2->pid2str(B->V2.get(),ppair.p2)
% (float(ppair.joint)/ppair.good1)
% (float(ppair.joint)/ppair.good2)
) << "\n";
typedef std::map<uint32_t, uint32_t>::const_iterator iter;
for (iter d = ppair.indoc.begin(); d != ppair.indoc.end(); ++d)
{
// if (d != ppair.indoc.begin()) cout << "; ";
cout << (boost::format("\t\t%4d %s") % d->second
% B->docid2name(d->first))
<< endl;
}
cout << endl;
#else
cout << "\t"
<< (fmt % ppair.joint % ppair.good1 % ppair.good2
% B->T2->pid2str(B->V2.get(),ppair.p2)
% (float(ppair.joint)/ppair.good1)
% (float(ppair.joint)/ppair.good2)
) << " [";
typedef std::map<uint32_t, uint32_t>::const_iterator iter;
for (iter d = ppair.indoc.begin(); d != ppair.indoc.end(); ++d)
{
if (d != ppair.indoc.begin()) cout << "; ";
cout << (boost::format("%s: %d") % B->docid2name(d->first)
% d->second) ;
}
cout << "]" << endl;
#endif
}
}
}
}

View File

@ -217,17 +217,42 @@ namespace sapt
write_yawat_alignment
( id_type const sid, iter const* m1, iter const* m2, std::ostream& out ) const;
std::string docname(id_type const sid) const;
std::string sid2docname(id_type const sid) const;
std::string docid2name(id_type const sid) const;
int docname2docid(std::string const& name) const;
std::vector<id_type> const* sid2did() const;
int sid2did(uint32_t sid) const;
};
#include "ug_bitext_agenda.h"
template<typename Token>
int
Bitext<Token>::
docname2docid(std::string const& name) const
{
std::map<std::string,id_type>::const_iterator m;
m = m_docname2docid.find(name);
if (m != m_docname2docid.end()) return m->second;
return -1;
}
template<typename Token>
std::string
Bitext<Token>::
docname(id_type const sid) const
docid2name(id_type const did) const
{
if (did < m_docname.size())
return m_docname[did];
else
return (boost::format("%d") % did).str();
}
template<typename Token>
std::string
Bitext<Token>::
sid2docname(id_type const sid) const
{
if (sid < m_sid2docid->size() && (*m_sid2docid)[sid] < m_docname.size())
return m_docname[(*m_sid2docid)[sid]];
@ -243,6 +268,17 @@ namespace sapt
return m_sid2docid.get();
}
template<typename Token>
int
Bitext<Token>::
sid2did(uint32_t sid) const
{
if (m_sid2docid)
return m_sid2docid->at(sid);
return -1;
}
template<typename Token>
SPTR<SentenceBias>
Bitext<Token>::

View File

@ -52,7 +52,7 @@ BitextSampler : public Moses::reference_counter
// const members
// SPTR<bitext const> const m_bitext; // keep bitext alive while I am
// should be an
iptr<bitext const> const m_bitext; // keep bitext alive as long as I am
SPTR<bitext const> const m_bitext; // keep bitext alive as long as I am
size_t const m_plen; // length of lookup phrase
bool const m_fwd; // forward or backward direction?
SPTR<tsa const> const m_root; // root of suffix array
@ -275,7 +275,7 @@ consider_sample(TokenPosition const& p)
bitvector full_aln(100*100);
PhraseExtractionRecord
rec(p.sid, p.offset, p.offset + m_plen, !m_fwd, &aln, &full_aln);
int docid = m_bias ? m_bias->GetClass(p.sid) : -1;
int docid = m_bias ? m_bias->GetClass(p.sid) : m_bitext->sid2did(p.sid);
if (!m_bitext->find_trg_phr_bounds(rec))
{ // no good, probably because phrase is not coherent
m_stats->count_sample(docid, 0, rec.po_fwd, rec.po_bwd);

View File

@ -16,7 +16,7 @@ struct StatsCollector
typedef lru_cache::LRU_Cache< uint64_t, pstats > hcache_t;
typedef ThreadSafeContainer<uint64_t, SPTR<pstats> > pcache_t;
typedef map<uint64_t, SPTR<pstats> > lcache_t;
iptr<Bitext<Token> const> bitext; // underlying bitext
SPTR<Bitext<Token> const> bitext; // underlying bitext
sampling_method method; // sampling method
size_t sample_size; // sample size
SPTR<SamplingBias const> bias; // sampling bias
@ -26,7 +26,7 @@ struct StatsCollector
SPTR<lcache_t> lcache; // local cache
ug::ThreadPool* tpool; // thread pool to run jobs on
StatsCollector(iptr<Bitext<Token> > xbitext,
StatsCollector(SPTR<Bitext<Token> > xbitext,
SPTR<SamplingBias> const xbias)
: method(ranked_sampling)
, sample_size(100)

View File

@ -71,7 +71,7 @@ namespace Moses
typedef sapt::PhraseScorer<Token> pscorer;
private:
// vector<SPTR<bitext> > shards;
iptr<mmbitext> btfix;
SPTR<mmbitext> btfix;
SPTR<imbitext> btdyn;
std::string m_bname, m_extra_data, m_bias_file,m_bias_server;
std::string L1;
@ -160,7 +160,7 @@ namespace Moses
#if PROVIDES_RANKED_SAMPLING
void
set_bias_for_ranking(ttasksptr const& ttask, iptr<sapt::Bitext<Token> const> bt);
set_bias_for_ranking(ttasksptr const& ttask, SPTR<sapt::Bitext<Token> const> bt);
#endif
private:

View File

@ -3,6 +3,8 @@
#include "moses/ContextScope.h"
#include <boost/foreach.hpp>
#include "moses/Util.h"
#include "moses/Hypothesis.h"
namespace MosesServer
{
using namespace std;
@ -164,15 +166,15 @@ insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
retData["sg"] = xmlrpc_c::value_array(searchGraphXml);
}
void
TranslationRequest::
output_phrase(ostream& out, Phrase const& phrase) const
{
if (!m_options.output.ReportAllFactors) {
for (size_t i = 0 ; i < phrase.GetSize(); ++i)
out << *phrase.GetFactor(i, 0) << " ";
} else out << phrase;
}
// void
// TranslationRequest::
// output_phrase(ostream& out, Phrase const& phrase) const
// {
// if (!m_options.output.ReportAllFactors) {
// for (size_t i = 0 ; i < phrase.GetSize(); ++i)
// out << *phrase.GetFactor(i, 0) << " ";
// } else out << phrase;
// }
void
TranslationRequest::
@ -193,7 +195,7 @@ outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
vector<const Hypothesis *> const& E = path->GetEdges();
if (!E.size()) continue;
std::map<std::string, xmlrpc_c::value> nBestXmlItem;
pack_hypothesis(E, "hyp", nBestXmlItem);
pack_hypothesis(manager, E, "hyp", nBestXmlItem);
if (m_withScoreBreakdown) {
// should the score breakdown be reported in a more structured manner?
ostringstream buf;
@ -262,8 +264,11 @@ bool
check(std::map<std::string, xmlrpc_c::value> const& param,
std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (param.find(key) != param.end());
std::map<std::string, xmlrpc_c::value>::const_iterator m = param.find(key);
if(m == param.end()) return false;
std::string val = string(xmlrpc_c::value_string(m->second));
if(val == "true" || val == "True" || val == "TRUE" || val == "1") return true;
return false;
}
void
@ -367,15 +372,17 @@ run_chart_decoder()
void
TranslationRequest::
pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
pack_hypothesis(const Moses::Manager& manager, vector<Hypothesis const* > const& edges, string const& key,
map<string, xmlrpc_c::value> & dest) const
{
// target string
ostringstream target;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
output_phrase(target, e->GetCurrTargetPhrase());
XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl);
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) {
manager.OutputSurface(target, *e, m_options.output.factor_order,
m_options.output.ReportSegmentation, m_options.output.ReportAllFactors);
}
XVERBOSE(1, "BEST TRANSLATION: " << *(manager.GetBestHypothesis()) << std::endl);
// XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl);
dest[key] = xmlrpc_c::value_string(target.str());
if (m_withAlignInfo) {
@ -383,7 +390,7 @@ pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
vector<xmlrpc_c::value> p_aln;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
add_phrase_aln_info(*e, p_aln);
add_phrase_aln_info(*e, p_aln);
dest["align"] = xmlrpc_c::value_array(p_aln);
}
@ -391,21 +398,21 @@ pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
// word alignment, if requested
vector<xmlrpc_c::value> w_aln;
BOOST_FOREACH(Hypothesis const* e, edges)
e->OutputLocalWordAlignment(w_aln);
e->OutputLocalWordAlignment(w_aln);
dest["word-align"] = xmlrpc_c::value_array(w_aln);
}
}
void
TranslationRequest::
pack_hypothesis(Hypothesis const* h, string const& key,
pack_hypothesis(const Moses::Manager& manager, Hypothesis const* h, string const& key,
map<string, xmlrpc_c::value>& dest) const
{
using namespace std;
vector<Hypothesis const*> edges;
for (; h; h = h->GetPrevHypo())
edges.push_back(h);
pack_hypothesis(edges, key, dest);
pack_hypothesis(manager, edges, key, dest);
}
@ -422,7 +429,7 @@ run_phrase_decoder()
manager.Decode();
pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
pack_hypothesis(manager, manager.GetBestHypothesis(), "text", m_retData);
if (m_session_id)
m_retData["session-id"] = xmlrpc_c::value_int(m_session_id);

View File

@ -58,17 +58,16 @@ TranslationRequest : public virtual Moses::TranslationTask
run_phrase_decoder();
void
pack_hypothesis(std::vector<Moses::Hypothesis const* > const& edges,
pack_hypothesis(const Moses::Manager& manager, std::vector<Moses::Hypothesis const* > const& edges,
std::string const& key,
std::map<std::string, xmlrpc_c::value> & dest) const;
void
pack_hypothesis(Moses::Hypothesis const* h, std::string const& key,
pack_hypothesis(const Moses::Manager& manager, Moses::Hypothesis const* h, std::string const& key,
std::map<std::string, xmlrpc_c::value> & dest) const;
void
output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
// void
// output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
void
add_phrase_aln_info(Moses::Hypothesis const& h,

View File

@ -1,6 +1,7 @@
import option path ;
with-regtest = [ option.get "with-regtest" ] ;
with-xmlrpc = [ option.get "with-xmlrpc-c" ] ;
if $(with-regtest) {
with-regtest = [ path.root $(with-regtest) [ path.pwd ] ] ;
@ -24,9 +25,20 @@ if $(with-regtest) {
actions reg_test_decode {
$(TOP)/regression-testing/run-single-test.perl --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
}
reg_test phrase : [ glob $(test-dir)/phrase.* ] : ../moses-cmd//moses : @reg_test_decode ;
reg_test chart : [ glob $(test-dir)/chart.* ] : ../moses-cmd//moses : @reg_test_decode ;
if $(with-xmlrpc) {
actions reg_test_decode_server {
$(TOP)/regression-testing/run-single-test.perl --server --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
}
reg_test phrase-server : [ glob $(test-dir)/phrase-server.* ] : ../moses-cmd//moses : @reg_test_decode_server ;
}
reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
reg_test chart : [ glob $(test-dir)/chart.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
if [ option.get "with-dalm" : : "yes" ] {
reg_test dalm : [ glob $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
} else {
alias dalm ;
}
actions reg_test_score {
$(TOP)/regression-testing/run-test-scorer.perl --scorer=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
}
@ -55,5 +67,5 @@ if $(with-regtest) {
reg_test misc : [ glob $(test-dir)/misc.* : $(test-dir)/misc.mml* ] : ..//prefix-bin ..//prefix-lib : @reg_test_misc ;
reg_test misc-mml : [ glob $(test-dir)/misc.mml* ] : $(TOP)/scripts/ems/support/mml-filter.py $(TOP)/scripts/ems/support/defaultconfig.py : @reg_test_misc ;
alias all : phrase chart mert score extract extractrules misc misc-mml ;
alias all : phrase chart mert score extract extractrules misc misc-mml dalm ;
}

View File

@ -2,6 +2,8 @@
# $Id$
use Encode;
use utf8;
use warnings;
use strict;
my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
@ -9,6 +11,7 @@ use MosesRegressionTesting;
use Getopt::Long;
use File::Temp qw ( tempfile );
use POSIX qw ( strftime );
use POSIX ":sys_wait_h";
my @SIGS = qw ( SIGHUP SIGINT SIGQUIT SIGILL SIGTRAP SIGABRT SIGIOT SIGBUS SIGFPE SIGKILL SIGUSR1 SIGSEGV SIGUSR2 SIGPIPE SIGALRM SIGTERM SIGSTKFLT SIGCHLD SIGCONT SIGSTOP SIGTSTP SIGTTIN SIGTTOU SIGURG SIGXCPU SIGXFSZ SIGVTALRM SIGPROF SIGWINCH SIGIO SIGPWR SIGSYS SIGUNUSED SIGRTMIN );
my ($decoder, $test_name);
@ -17,14 +20,31 @@ my $data_dir;
my $BIN_TEST = $script_dir;
my $results_dir;
my $NBEST = 0;
my $run_server_test = 0;
my $serverport = int(rand(9999)) + 10001;
my $url = "http://localhost:$serverport/RPC2";
my $startupTest = 0;
GetOptions("decoder=s" => \$decoder,
"test=s" => \$test_name,
"data-dir=s"=> \$data_dir,
"test-dir=s"=> \$test_dir,
"results-dir=s"=> \$results_dir,
"server"=> \$run_server_test,
"startuptest"=> \$startupTest
) or exit 1;
if($run_server_test)
{
eval {
require XMLRPC::Lite;
import XMLRPC::Lite;
};
if ($@) {
die "Error: XMLRPC::Lite not installed, moses server regression tests will not be run. $@";
}
exit(0) if($startupTest);
}
die "Please specify a decoder with --decoder\n" unless $decoder;
die "Please specify a test to run with --test\n" unless $test_name;
@ -72,8 +92,13 @@ if (!-d $truth) {
}
print "RESULTS AVAILABLE IN: $results\n\n";
my ($o, $elapsed, $ec, $sig) = exec_moses($decoder, $local_moses_ini, $input, $results);
my ($o, $elapsed, $ec, $sig);
if($run_server_test) {
($o, $elapsed, $ec, $sig) = exec_moses_server($decoder, $local_moses_ini, $input, $results);
}
else {
($o, $elapsed, $ec, $sig) = exec_moses($decoder, $local_moses_ini, $input, $results);
}
my $error = ($sig || $ec > 0);
if ($error) {
open OUT, ">$results/Summary";
@ -139,6 +164,54 @@ sub exec_moses {
return ($o, $elapsed, $ec, $sig);
}
sub exec_moses_server {
my ($decoder, $conf, $input, $results) = @_;
my $start_time = time;
my ($o, $ec, $sig);
$ec = 0; $sig = 0; $o = 0;
my $pid = fork();
if (not defined $pid) {
warn "resources not avilable to fork Moses server\n";
$ec = 1; # to generate error
} elsif ($pid == 0) {
setpgrp(0, 0);
warn "Starting Moses server on port $serverport ...\n";
($o, $ec, $sig) = run_command("$decoder --server --server-port $serverport -f $conf -verbose 2 --server-log $results/run.stderr.server 2> $results/run.stderr ");
exit;
# this should not be reached unless the server fails to start
}
while( 1==1 ) # wait until the server is listening for requests
{
sleep 5;
my $str = `grep "Listening on port $serverport" $results/run.stderr`;
last if($str =~ /Listening/);
}
my $proxy = XMLRPC::Lite->proxy($url);
warn "Opening file $input to write to $results\n";
open(TEXTIN, "$input") or die "Can not open the input file to translate with Moses server\n";
binmode TEXTIN, ':utf8';
open(TEXTOUT, ">$results/run.stdout");
binmode TEXTOUT, ':utf8';
while(<TEXTIN>)
{
chop;
my $encoded = SOAP::Data->type(string => $_); # NOTE: assuming properly encoded UTF-8 input: check tests before adding them!
my %param = ("text" => $encoded);
my $result = $proxy->call("translate",\%param)->result;
print TEXTOUT $result->{'text'} . "\n";
}
close(TEXTIN);
close(TEXTOUT);
my $elapsed = time - $start_time;
print STDERR "Finished translating file $input\n";
if(waitpid($pid, WNOHANG) <= 0)
{
warn "Killing process group $pid of the $decoder --server ... \n";
kill 9, -$pid;
}
return ($o, $elapsed, $ec, $sig);
}
sub run_command {
my ($cmd) = @_;
my $o = `$cmd`;

@ -1 +1 @@
Subproject commit e07a00c9733e0fecb8433f1c9d5805d3f0b35c6f
Subproject commit 37a595fd7bf41226933c0fdb6fb792bdc877c3fd

View File

@ -2,13 +2,21 @@
# this script assumes that all 3rd-party dependencies are installed under ./opt
# you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake
set -e -o pipefail
set -e -o pipefail -x
opt=$(pwd)/opt
git submodule init
git submodule update regtest
if [ "$RECOMPILE" == "NO" ] ; then
RECOMPILE=
else
RECOMPILE="-a"
fi
# test compilation without xmlrpc-c
./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --no-xmlrpc-c --with-regtest=./regtest -a -q $@ || exit $?
./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --no-xmlrpc-c --with-regtest=$(pwd)/regtest -a -q $@ || exit $?
# test compilation with xmlrpc-c
./bjam -j$(nproc) --with-irstlm=./opt --with-boost=./opt --with-cmph=./opt --with-xmlrpc-c=./opt --with-regtest=./regtest -a -q $@
if ./regression-testing/run-single-test.perl --server --startuptest ; then
./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-regtest=$(pwd)/regtest $RECOMPILE -q $@
fi

View File

@ -90,9 +90,10 @@ def run_instance(cmd_base, threads, tasks, n_best=False):
cmd.append('--threads')
cmd.append(str(threads))
try:
# Queue of tasks instance is currently working on, limited to the number of
# threads. The queue should be kept full for optimal CPU usage.
work = Queue.Queue(maxsize=threads)
# Queue of tasks instance is currently working on, limited to the number
# of threads * 2 (minimal buffering). The queue should be kept full for
# optimal CPU usage.
work = Queue.Queue(maxsize=(threads * 2))
# Multi-threaded instance
moses = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)