Merge ../mosesdecoder into perf_moses2

This commit is contained in:
Hieu Hoang 2015-12-04 23:41:52 +00:00
commit 042f304afc
50 changed files with 317 additions and 212 deletions

28
Jamroot
View File

@ -113,6 +113,10 @@ external-lib z ;
#requirements += <library>dl ;
#requirements += <cxxflags>-std=c++0x ;
# Allow moses to report the git commit hash of the version used for compilation
moses_githash = [ _shell "git describe --dirty" ] ;
requirements += <define>MOSES_VERSION_ID=\\\"$(moses_githash)\\\" ;
if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
if [ option.get "full-tcmalloc" : : "yes" ] {
external-lib unwind ;
@ -168,6 +172,8 @@ mmt = [ option.get "mmt" ] ;
if $(mmt) {
requirements += <define>MMT ;
requirements += <include>$(mmt) ;
mmt_githash = [ _shell "cd $(mmt) && git describe --dirty" ] ;
requirements += <define>MMT_VERSION_ID=\\\"$(mmt_githash)\\\" ;
}
requirements += [ option.get "notrace" : <define>TRACE_ENABLE=1 ] ;
@ -238,27 +244,36 @@ project : requirements
build-projects lm util phrase-extract phrase-extract/syntax-common search moses moses/LM mert moses-cmd scripts regression-testing ;
# contrib/mira
if [ option.get "with-mm" : : "yes" ]
if [ option.get "with-mm-extras" : : "yes" ]
{
alias mm :
alias mm-extras :
moses/TranslationModel/UG//bitext-find
moses/TranslationModel/UG//ptable-describe-features
moses/TranslationModel/UG//count-ptable-features
moses/TranslationModel/UG//ptable-lookup
moses/TranslationModel/UG//check-coverage
moses/TranslationModel/UG/mm//mtt-demo1
moses/TranslationModel/UG/mm//mtt-build
moses/TranslationModel/UG/mm//mtt-dump
moses/TranslationModel/UG/mm//symal2mam
moses/TranslationModel/UG/mm//mam2symal
moses/TranslationModel/UG/mm//mam_verify
moses/TranslationModel/UG/mm//mmlex-build
moses/TranslationModel/UG/mm//mmlex-lookup
moses/TranslationModel/UG/mm//mtt-count-words
moses/TranslationModel/UG/mm//calc-coverage
moses/TranslationModel/UG//check-coverage
moses/TranslationModel/UG//try-align
;
}
else
{
alias mm-extras ;
}
if [ option.get "with-mm" : : "yes" ]
{
alias mm :
moses/TranslationModel/UG/mm//mtt-build
moses/TranslationModel/UG/mm//symal2mam
moses/TranslationModel/UG/mm//mmlex-build
;
}
else
{
@ -297,6 +312,7 @@ biconcor
# contrib/mira//mira
contrib/server//mosesserver
mm
mm-extras
rephraser
contrib/c++tokenizer//tokenizer
contrib/expected-bleu-training//train-expected-bleu

View File

@ -94,31 +94,38 @@ OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision)
/***
* print surface factor only for the given phrase
*/
void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const
void
BaseManager::
OutputSurface(std::ostream &out, Phrase const& phrase) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Cannot be empty phrase");
if (reportAllFactors == true) {
out << phrase;
} else {
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor;
UTIL_THROW_IF2(factor == NULL,
"Empty factor 0 at position " << pos);
std::vector<FactorType> const& factor_order = options().output.factor_order;
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
"Empty factor " << i << " at position " << pos);
bool markUnknown = options().unk.mark;
std::string const& fd = options().output.FactorDelimiter;
out << "|" << *factor;
}
out << " ";
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, factor_order[0]);
UTIL_THROW_IF2(factor == NULL, "Empty factor 0 at position " << pos);
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << options().unk.prefix;
}
out << *factor;
for (size_t i = 1 ; i < factor_order.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, factor_order[i]);
UTIL_THROW_IF2(!factor, "Empty factor " << i << " at position " << pos);
out << fd << *factor;
}
if(markUnknown && word.IsOOV()) {
out << options().unk.suffix;
}
out << " ";
}
}

View File

@ -25,10 +25,8 @@ protected:
typedef std::vector<std::pair<Moses::Word, Moses::Range> > ApplicationContext;
typedef std::set< std::pair<size_t, size_t> > Alignments;
void OutputSurface(std::ostream &out,
const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const;
void OutputSurface(std::ostream &out, Phrase const& phrase) const;
void WriteApplicationContext(std::ostream &out,
const ApplicationContext &context) const;

View File

@ -57,8 +57,8 @@ private:
public:
HypothesisScoreOrdererWithDistortion(const Range* transOptRange,
const bool deterministic = false)
: m_transOptRange(transOptRange)
, m_deterministic(deterministic) {
: m_deterministic(deterministic)
, m_transOptRange(transOptRange) {
m_totalWeightDistortion = 0;
const StaticData &staticData = StaticData::Instance();
@ -128,8 +128,8 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
, m_parent(parent)
, m_translations(translations)
, m_estimatedScores(estimatedScores)
, m_seenPosition()
, m_deterministic(deterministic)
, m_seenPosition()
{
// If either dimension is empty, we haven't got anything to do.
@ -294,8 +294,8 @@ BitmapContainer::BitmapContainer(const Bitmap &bitmap
, bool deterministic)
: m_bitmap(bitmap)
, m_stack(stack)
, m_deterministic(deterministic)
, m_numStackInsertions(0)
, m_deterministic(deterministic)
{
m_hypotheses = HypothesisSet();
m_edges = BackwardsEdgeSet();

View File

@ -257,7 +257,6 @@ void ChartHypothesis::CleanupArcList()
* so we'll keep all of arc list if nedd distinct n-best list
*/
AllOptions const& opts = StaticData::Instance().options();
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = opts.nbest.nbest_size;
bool distinctNBest = (opts.nbest.only_distinct
|| opts.mbr.enabled

View File

@ -29,7 +29,7 @@ namespace Moses
{
class ChartSearchGraphWriter;
class AllOptions;
struct AllOptions;
//! functor to compare (chart) hypotheses by (descending) score
class ChartHypothesisScoreOrderer

View File

@ -315,15 +315,15 @@ void ChartManager::OutputBest(OutputCollector *collector) const
void ChartManager::OutputNBest(OutputCollector *collector) const
{
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.options().nbest.nbest_size;
// const StaticData &staticData = StaticData::Instance();
size_t nBestSize = options().nbest.nbest_size;
if (nBestSize > 0) {
const size_t translationId = m_source.GetTranslationId();
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
<< staticData.options().nbest.output_file_path << endl);
<< options().nbest.output_file_path << endl);
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList,staticData.options().nbest.only_distinct);
CalcNBest(nBestSize, nBestList, options().nbest.only_distinct);
OutputNBestList(collector, nBestList, translationId);
IFVERBOSE(2) {
PrintUserTime("N-Best Hypotheses Generation Time:");
@ -336,8 +336,8 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
const ChartKBestExtractor::KBestVec &nBestList,
long translationId) const
{
const StaticData &staticData = StaticData::Instance();
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
// const StaticData &staticData = StaticData::Instance();
// const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
std::ostringstream out;
@ -347,7 +347,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
FixPrecision(out);
}
NBestOptions const& nbo = StaticData::Instance().options().nbest;
NBestOptions const& nbo = options().nbest;
bool includeWordAlignment = nbo.include_alignment_info;
bool PrintNBestTrees = nbo.print_trees;
@ -366,7 +366,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
out << " ||| ";
boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation);
bool with_labels = options().nbest.include_feature_labels;

View File

@ -195,7 +195,7 @@ batch_run()
// ... or the surrounding context (--context-window ...)
size_t size_t_max = std::numeric_limits<size_t>::max();
bool use_context_window = ioWrapper->GetLookAhead() || ioWrapper->GetLookBack();
bool use_context = use_context_window || context_string.size();
// bool use_context = use_context_window || context_string.size();
bool use_sliding_context_window = (use_context_window
&& ioWrapper->GetLookAhead() != size_t_max);

View File

@ -10,7 +10,7 @@ class FFState;
namespace Syntax
{
class SHyperedge;
struct SHyperedge;
}
/** base class for all stateful feature functions.

View File

@ -8,7 +8,7 @@ namespace Moses
namespace Syntax
{
class SHyperedge;
struct SHyperedge;
}
/** base class for all stateless feature functions.

View File

@ -50,7 +50,7 @@ class FFState;
class StatelessFeatureFunction;
class StatefulFeatureFunction;
class Manager;
class ReportingOptions;
struct ReportingOptions;
typedef std::vector<Hypothesis*> ArcList;

View File

@ -349,7 +349,7 @@ OutputNBestList(OutputCollector *collector,
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
out << " ||| ";
bool with_labels = options().nbest.include_feature_labels;
features.OutputAllFeatureScores(out, with_labels);

View File

@ -248,8 +248,6 @@ void BilingualLM::getAllAlignments(const ChartHypothesis& cur_hypo, size_t featu
int source_word_mid_idx; //The word alignment
//Get source sent
const ChartManager& manager = cur_hypo.GetManager();
const Sentence& source_sent = static_cast<const Sentence&>(manager.GetSource());
const AlignmentInfo& alignments = targetPhrase.GetAlignTerm();
// get absolute position in source sentence for each source word in rule

View File

@ -131,7 +131,7 @@ void Manager::Decode()
TRACE_ERR("Line "<< m_source.GetTranslationId()
<< ": Collecting options took "
<< GetSentenceStats().GetTimeCollectOpts() << " seconds at "
<< __FILE__ << ":" << __LINE__ << endl);
<< __FILE__ << " Line " << __LINE__ << endl);
}
// search for best translation with the specified algorithm
@ -1666,7 +1666,7 @@ OutputNBest(std::ostream& out,
const std::vector<Moses::FactorType>& outputFactorOrder,
long translationId, char reportSegmentation) const
{
const StaticData &staticData = StaticData::Instance();
// const StaticData &staticData = StaticData::Instance();
NBestOptions const& nbo = options().nbest;
bool reportAllFactors = nbo.include_all_factors;
bool includeSegmentation = nbo.include_segmentation;
@ -1681,8 +1681,7 @@ OutputNBest(std::ostream& out,
out << translationId << " ||| ";
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge, outputFactorOrder, reportSegmentation,
reportAllFactors);
OutputSurface(out, edge); //, outputFactorOrder, reportSegmentation, reportAllFactors);
}
out << " |||";
@ -1743,57 +1742,59 @@ OutputNBest(std::ostream& out,
*/
void
Manager::
OutputSurface(std::ostream &out, const Hypothesis &edge,
const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
OutputSurface(std::ostream &out, const Hypothesis &edge) const
{
std::vector<FactorType> outputFactorOrder = options().output.factor_order;
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Must specific at least 1 output factor");
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
FactorType placeholderFactor = options().input.placeholder_factor;
std::map<size_t, const Factor*> placeholders;
if (placeholderFactor != NOT_FOUND) {
// creates map of target position -> factor for placeholders
placeholders = GetPlaceholders(edge, placeholderFactor);
}
bool markUnknown = options().unk.mark;
if (reportAllFactors == true) {
out << phrase;
} else {
FactorType placeholderFactor = options().input.placeholder_factor;
std::string const& fd = options().output.FactorDelimiter;
std::map<size_t, const Factor*> placeholders;
if (placeholderFactor != NOT_FOUND) {
// creates map of target position -> factor for placeholders
placeholders = GetPlaceholders(edge, placeholderFactor);
TargetPhrase const& phrase = edge.GetCurrTargetPhrase();
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
if (placeholders.size()) {
// do placeholders
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
if (iter != placeholders.end()) {
factor = iter->second;
}
}
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
UTIL_THROW_IF2(factor == NULL, "No factor 0 at position " << pos);
if (placeholders.size()) {
// do placeholders
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
if (iter != placeholders.end()) {
factor = iter->second;
}
}
UTIL_THROW_IF2(factor == NULL, "No factor 0 at position " << pos);
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << options().unk.prefix << *factor << options().unk.suffix;
} else {
out << *factor;
}
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor==NULL,"No factor "<<i<<" at position "<< pos);
out << "|" << *factor;
}
out << " ";
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << options().unk.prefix;
}
out << *factor;
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor==NULL,"No factor "<<i<<" at position "<< pos);
out << fd << *factor;
}
if(markUnknown && word.IsOOV()) {
out << options().unk.suffix;
}
out << " ";
}
// trace ("report segmentation") option "-t" / "-tt"
int reportSegmentation = options().output.ReportSegmentation;
if (reportSegmentation > 0 && phrase.GetSize() > 0) {
const Range &sourceRange = edge.GetCurrSourceWordsRange();
const int sourceStart = sourceRange.GetStartPos();
@ -2080,7 +2081,8 @@ OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
OutputSurface(out, edge);
// , StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
}
out << endl;
}

View File

@ -137,8 +137,9 @@ public:
, const std::vector<Moses::FactorType>& outputFactorOrder
, long translationId
, char reportSegmentation) const;
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const;
void OutputSurface(std::ostream &out, const Hypothesis &edge) const;
void OutputAlignment(std::ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const;
void OutputInput(std::ostream& os, const Hypothesis* hypo) const;
void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const;

View File

@ -35,6 +35,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "util/random.hh"
#include <boost/program_options.hpp>
#ifdef HAVE_XMLRPC_C
#include <xmlrpc_server.h>
#endif
using namespace std;
using namespace boost::algorithm;
@ -53,6 +56,7 @@ Parameter::Parameter()
AddParam(main_opts,"input-file", "i", "location of the input file to be translated");
AddParam(main_opts,"verbose", "v", "verbosity level of the logging");
AddParam(main_opts,"version", "show version of Moses and libraries used");
AddParam(main_opts,"show-weights", "print feature weights and exit");
AddParam(main_opts,"time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
@ -447,6 +451,35 @@ LoadParam(const string &filePath)
return LoadParam(3, (char const**) argv);
}
/// Print out version information about the things that went into this
/// executable.
void show_version()
{
std::cout << "\nMoses code version (git tag or commit hash):\n "
<< MOSES_VERSION_ID << std::endl
<< "Libraries used:" << std::endl
<< " Boost version "
<< BOOST_VERSION / 100000 << "." // major version
<< BOOST_VERSION / 100 % 1000 << "." // minor version
<< BOOST_VERSION % 100 // patch level
<< std::endl;
#ifdef HAVE_XMLRPC_C
unsigned int major, minor, point;
xmlrpc_server_version(&major, &minor, &point);
std::cout << " Xmlrpc-c version "
<< major << "." << minor << "." << point << std::endl;
#endif
#ifdef HAVE_CMPH
// there's no easy way to determine the cmph version at compile time
std::cout << " CMPH (version unknown)" << std::endl;
#endif
#ifdef MMT_VERSION_ID
std::cout << string(20,'-')
<< "\nMMT extras version: " << MMT_VERSION_ID << std::endl;
#endif
}
/** load all parameters from the configuration file and the command line switches */
bool
Parameter::
@ -459,8 +492,14 @@ LoadParam(int argc, char const* xargv[])
argv[i] = xargv[i];
if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-')
++argv[i];
if (!strcmp(argv[i],"-version")) {
show_version();
exit(0);
}
}
// config file (-f) arg mandatory
string configPath;
if ( (configPath = FindParam("-f", argc, argv)) == ""

View File

@ -10,11 +10,11 @@ namespace Moses
Search::Search(Manager& manager, const InputType &source)
: m_manager(manager)
, m_source(source)
, m_options(manager.options())
, m_inputPath()
, m_initialTransOpt()
, m_options(manager.options())
, interrupted_flag(0)
, m_bitmaps(source.GetSize(), source.m_sourceCompleted)
, interrupted_flag(0)
{
m_initialTransOpt.SetInputPath(m_inputPath);
}

View File

@ -128,8 +128,6 @@ void SearchCubePruning::Decode()
BCQueue.push(bmIter->second);
m_manager.GetSentenceStats().StopTimeManageCubes();
// old algorithm
// bmIter->second->EnsureMinStackHyps(PopLimit);
}
// main search loop, pop k best hyps

View File

@ -24,15 +24,6 @@ SearchNormal(Manager& manager, const InputType &source,
{
VERBOSE(1, "Translating: " << m_source << endl);
// m_beam_width = manager.options().search.beam_width;
// m_stack_size = manager.options().search.stack_size;
// m_stack_diversity = manager.options().search.stack_diversity;
// m_timeout = manager.options().search.timeout;
// m_max_distortion = manager.options().reordering.max_distortion;
// only if constraint decoding (having to match a specified output)
// long sentenceID = source.GetTranslationId();
// initialize the stacks: create data structure and set limits
std::vector < HypothesisStackNormal >::iterator iterStack;
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind) {
@ -82,8 +73,6 @@ ProcessOneStack(HypothesisStack* hstack)
*/
void SearchNormal::Decode()
{
// SentenceStats &stats = m_manager.GetSentenceStats();
// initial seed hypothesis: nothing translated, no words produced
const Bitmap &initBitmap = m_bitmaps.GetInitialBitmap();
Hypothesis *hypo = new Hypothesis(m_manager, m_source, m_initialTransOpt, initBitmap, m_manager.GetNextHypoId());
@ -109,7 +98,6 @@ SearchNormal::
ProcessOneHypothesis(const Hypothesis &hypothesis)
{
// since we check for reordering limits, its good to have that limit handy
// int maxDistortion = StaticData::Instance().GetMaxDistortion();
bool isWordLattice = m_source.GetType() == WordLatticeInput;
const Bitmap &hypoBitmap = hypothesis.GetWordsBitmap();
@ -297,7 +285,6 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis,
float estimatedScore,
const Bitmap &bitmap)
{
const StaticData &staticData = StaticData::Instance();
SentenceStats &stats = m_manager.GetSentenceStats();
Hypothesis *newHypo;

View File

@ -206,7 +206,7 @@ init(string line, std::vector<FactorType> const& factorOrder,
// only fill the vector if we are parsing XML
if (opts.input.xml_policy != XmlPassThrough) {
m_xmlCoverageMap.assign(GetSize(), false);
BOOST_FOREACH(XmlOption* o, m_xmlOptions) {
BOOST_FOREACH(XmlOption const* o, m_xmlOptions) {
Range const& r = o->range;
for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j)
m_xmlCoverageMap[j]=true;
@ -291,7 +291,7 @@ bool Sentence::XmlOverlap(size_t startPos, size_t endPos) const
void Sentence::GetXmlTranslationOptions(std::vector <TranslationOption*> &list) const
{
for (std::vector<XmlOption*>::const_iterator iterXMLOpts = m_xmlOptions.begin();
for (std::vector<XmlOption const*>::const_iterator iterXMLOpts = m_xmlOptions.begin();
iterXMLOpts != m_xmlOptions.end(); ++iterXMLOpts) {
const XmlOption &xmlOption = **iterXMLOpts;
const Range &range = xmlOption.range;
@ -305,7 +305,7 @@ void Sentence::GetXmlTranslationOptions(std::vector <TranslationOption*> &list,
{
//iterate over XmlOptions list, find exact source/target matches
for (std::vector<XmlOption*>::const_iterator iterXMLOpts = m_xmlOptions.begin();
for (std::vector<XmlOption const*>::const_iterator iterXMLOpts = m_xmlOptions.begin();
iterXMLOpts != m_xmlOptions.end(); ++iterXMLOpts) {
const XmlOption &xmlOption = **iterXMLOpts;
const Range &range = xmlOption.range;
@ -324,7 +324,6 @@ std::vector <ChartTranslationOptions*>
Sentence::
GetXmlChartTranslationOptions(AllOptions const& opts) const
{
const StaticData &staticData = StaticData::Instance();
std::vector <ChartTranslationOptions*> ret;
// XML Options
@ -339,7 +338,7 @@ GetXmlChartTranslationOptions(AllOptions const& opts) const
//iterXMLOpts will be empty for XmlIgnore
//look at each column
for(std::vector<XmlOption*>::const_iterator iterXmlOpts = m_xmlOptions.begin();
for(std::vector<XmlOption const*>::const_iterator iterXmlOpts = m_xmlOptions.begin();
iterXmlOpts != m_xmlOptions.end(); iterXmlOpts++) {
const XmlOption &xmlOption = **iterXmlOpts;

View File

@ -52,7 +52,7 @@ protected:
* Utility method that takes in a string representing an XML tag and the name of the attribute,
* and returns the value of that tag if present, empty string otherwise
*/
std::vector<XmlOption*> m_xmlOptions;
std::vector<XmlOption const*> m_xmlOptions;
std::vector <bool> m_xmlCoverageMap;
NonTerminalSet m_defaultLabelSet;

View File

@ -9,13 +9,6 @@ namespace Syntax
namespace F2S
{
HyperPathLoader::HyperPathLoader(FactorDirection direction,
const std::vector<FactorType> &factorOrder)
: m_direction(direction)
, m_factorOrder(factorOrder)
{
}
void HyperPathLoader::Load(const StringPiece &s, HyperPath &path)
{
path.nodeSeqs.clear();

View File

@ -30,8 +30,6 @@ namespace F2S
class HyperPathLoader
{
public:
HyperPathLoader(FactorDirection, const std::vector<FactorType> &);
void Load(const StringPiece &, HyperPath &);
private:
@ -58,8 +56,6 @@ private:
return FactorCollection::Instance().AddFactor(s, true);
}
FactorDirection m_direction;
const std::vector<FactorType> &m_factorOrder;
std::vector<TreeFragmentToken> m_tokenSeq;
std::vector<NodeTuple> m_nodeTupleSeq;
std::stack<int> m_parentStack;

View File

@ -60,7 +60,7 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
HyperPathLoader hyperPathLoader(Input, input);
HyperPathLoader hyperPathLoader;
Phrase dummySourcePhrase;
{
@ -87,9 +87,7 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
alignString = temp;
}
if (++pipes) {
StringPiece str(*pipes); //counts
}
++pipes; // counts
scoreVector.clear();
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {

View File

@ -107,7 +107,7 @@ void Manager::OutputNBestList(OutputCollector *collector,
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
out << " ||| ";
bool with_labels = options().nbest.include_feature_labels;
derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels);

View File

@ -77,9 +77,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
alignString = temp;
}
if (++pipes) {
StringPiece str(*pipes); //counts
}
++pipes; // counts
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {

View File

@ -135,8 +135,8 @@ TargetPhrase::TargetPhrase(const TargetPhrase &copy)
, m_scoreBreakdown(copy.m_scoreBreakdown)
, m_alignTerm(copy.m_alignTerm)
, m_alignNonTerm(copy.m_alignNonTerm)
, m_properties(copy.m_properties)
, m_scope(copy.m_scope)
, m_properties(copy.m_properties)
, m_container(copy.m_container)
{
if (copy.m_lhsTarget) {

View File

@ -19,8 +19,8 @@ namespace Moses
using namespace boost;
unique_lock<shared_mutex> lock(m_lock);
size_t ctr=0;
#if 0
size_t ctr=0;
std::cerr << "BEFORE" << std::endl;
for (cache_t::iterator m = m_qfirst; m != m_cache.end(); m = m->second->next)
{

View File

@ -32,8 +32,8 @@ namespace sapt
indoc = other.indoc;
for (int i = 0; i <= LRModel::NONE; i++)
{
ofwd[i] = other.ofwd[i];
obwd[i] = other.obwd[i];
ofwd[i] = other.ofwd[i];
obwd[i] = other.obwd[i];
}
}
@ -53,7 +53,7 @@ namespace sapt
return obwd[idx];
}
void
size_t
jstats::
add(float w, float b, std::vector<unsigned char> const& a, uint32_t const cnt2,
uint32_t fwd_orient, uint32_t bwd_orient, int const docid)
@ -65,24 +65,25 @@ namespace sapt
my_bcnt += b;
if (a.size())
{
size_t i = 0;
while (i < my_aln.size() && my_aln[i].second != a) ++i;
if (i == my_aln.size())
my_aln.push_back(std::pair<size_t,std::vector<unsigned char> >(1,a));
else
my_aln[i].first++;
if (my_aln[i].first > my_aln[i/2].first)
push_heap(my_aln.begin(),my_aln.begin()+i+1);
size_t i = 0;
while (i < my_aln.size() && my_aln[i].second != a) ++i;
if (i == my_aln.size())
my_aln.push_back(std::pair<size_t,std::vector<unsigned char> >(1,a));
else
my_aln[i].first++;
if (my_aln[i].first > my_aln[i/2].first)
push_heap(my_aln.begin(),my_aln.begin()+i+1);
}
++ofwd[fwd_orient];
++obwd[bwd_orient];
if (docid >= 0)
{
// while (int(indoc.size()) <= docid) indoc.push_back(0);
++indoc[docid];
// while (int(indoc.size()) <= docid) indoc.push_back(0);
++indoc[docid];
}
return my_rcnt;
}
std::vector<std::pair<size_t, std::vector<unsigned char> > > const&
jstats::
aln() const

View File

@ -39,7 +39,7 @@ namespace sapt
std::vector<std::pair<size_t, std::vector<unsigned char> > > const & aln() const;
void
size_t
add(float w, float b, std::vector<unsigned char> const& a, uint32_t const cnt2,
uint32_t fwd_orient, uint32_t bwd_orient, int const docid);

View File

@ -63,7 +63,7 @@ namespace sapt
}
}
bool
size_t
pstats::
add(uint64_t pid, float const w, float const b,
std::vector<unsigned char> const& a,
@ -73,13 +73,13 @@ namespace sapt
{
boost::lock_guard<boost::mutex> guard(this->lock);
jstats& entry = this->trg[pid];
entry.add(w, b, a, cnt2, fwd_o, bwd_o, docid);
size_t ret = entry.add(w, b, a, cnt2, fwd_o, bwd_o, docid);
if (this->good < entry.rcnt())
{
UTIL_THROW(util::Exception, "more joint counts than good counts:"
<< entry.rcnt() << "/" << this->good << "!");
UTIL_THROW(util::Exception, "more joint counts than good counts:"
<< entry.rcnt() << "/" << this->good << "!");
}
return true;
return ret;
}
void

View File

@ -41,7 +41,7 @@ namespace sapt
void register_worker();
size_t count_workers() { return in_progress; }
bool
size_t
add(uint64_t const pid, // target phrase id
float const w, // sample weight (1./(# of phrases extractable))
float const b, // sample bias score

View File

@ -45,7 +45,7 @@ BitextSampler : public Moses::reference_counter
{
typedef Bitext<Token> bitext;
typedef TSA<Token> tsa;
typedef SamplingBias bias;
typedef SamplingBias bias_t;
typedef typename Bitext<Token>::iter tsa_iter;
mutable boost::condition_variable m_ready;
mutable boost::mutex m_lock;
@ -59,7 +59,7 @@ BitextSampler : public Moses::reference_counter
char const* m_next; // current position
char const* m_stop; // end of search range
sampling_method const m_method; // look at all/random/ranked samples
SPTR<bias const> const m_bias; // bias over candidates
SPTR<bias_t const> const m_bias; // bias over candidates
size_t const m_samples; // how many samples at most
size_t const m_min_samples;
// non-const members
@ -67,20 +67,20 @@ BitextSampler : public Moses::reference_counter
size_t m_ctr; // number of samples considered
float m_total_bias; // for random sampling with bias
bool m_finished;
size_t m_num_occurrences; // estimated number of phrase occurrences in corpus
boost::taus88 m_rnd; // every job has its own pseudo random generator
// double m_rnd_denom; // denominator for scaling random sampling
double m_bias_total;
bool consider_sample(TokenPosition const& p);
size_t consider_sample(TokenPosition const& p);
size_t perform_random_sampling();
size_t perform_full_phrase_extraction();
int check_sample_distribution(uint64_t const& sid, uint64_t const& offset);
bool flip_coin(id_type & sid, ushort & offset);
bool flip_coin(id_type const& sid, ushort const& offset, SamplingBias const* bias);
public:
BitextSampler(BitextSampler const& other);
BitextSampler const& operator=(BitextSampler const& other);
// BitextSampler const& operator=(BitextSampler const& other);
BitextSampler(SPTR<bitext const> const& bitext,
typename bitext::iter const& phrase,
SPTR<SamplingBias const> const& bias,
@ -159,9 +159,9 @@ check_sample_distribution(uint64_t const& sid, uint64_t const& offset)
template<typename Token>
bool
BitextSampler<Token>::
flip_coin(id_type & sid, ushort & offset)
flip_coin(id_type const& sid, ushort const& offset, bias_t const* bias)
{
int no_maybe_yes = m_bias ? check_sample_distribution(sid, offset) : 1;
int no_maybe_yes = bias ? check_sample_distribution(sid, offset) : 1;
if (no_maybe_yes == 0) return false; // no
if (no_maybe_yes > 1) return true; // yes
// ... maybe: flip a coin
@ -170,8 +170,8 @@ flip_coin(id_type & sid, ushort & offset)
size_t options_left = (options_total - m_ctr);
size_t random_number = options_left * (m_rnd()/(m_rnd.max()+1.));
size_t threshold;
if (m_bias_total) // we have a bias and there are candidates with non-zero prob
threshold = ((*m_bias)[sid]/m_bias_total * options_total * m_samples);
if (bias && m_bias_total > 0) // we have a bias and there are candidates with non-zero prob
threshold = ((*bias)[sid]/m_bias_total * options_total * m_samples);
else // no bias, or all have prob 0 (can happen with a very opinionated bias)
threshold = m_samples;
return random_number + options_chosen < threshold;
@ -199,13 +199,12 @@ BitextSampler(SPTR<Bitext<Token> const> const& bitext,
, m_ctr(0)
, m_total_bias(0)
, m_finished(false)
, m_num_occurrences(phrase.ca())
, m_rnd(0)
// , m_rnd_denom(m_rnd.max() + 1)
{
m_stats.reset(new pstats);
m_stats->raw_cnt = phrase.ca();
m_stats->register_worker();
// cerr << phrase.str(bitext->V1.get()) << " [" << HERE << "]" << endl;
}
template<typename Token>
@ -221,8 +220,8 @@ BitextSampler(BitextSampler const& other)
, m_bias(other.m_bias)
, m_samples(other.m_samples)
, m_min_samples(other.m_min_samples)
, m_num_occurrences(other.m_num_occurrences)
, m_rnd(0)
// , m_rnd_denom(m_rnd.max() + 1)
{
// lock both instances
boost::unique_lock<boost::mutex> mylock(m_lock);
@ -235,6 +234,23 @@ BitextSampler(BitextSampler const& other)
m_finished = other.m_finished;
}
// Uniform sampling
template<typename Token>
size_t
BitextSampler<Token>::
perform_full_phrase_extraction()
{
if (m_next == m_stop) return m_ctr;
for (sapt::tsa::ArrayEntry I(m_next); I.next < m_stop; ++m_ctr)
{
++m_ctr;
m_root->readEntry(I.next, I);
consider_sample(I);
}
return m_ctr;
}
// Uniform sampling
template<typename Token>
size_t
@ -260,14 +276,14 @@ perform_random_sampling()
{
++m_ctr;
m_root->readEntry(I.next,I);
if (!flip_coin(I.sid, I.offset)) continue;
if (!flip_coin(I.sid, I.offset, m_bias.get())) continue;
consider_sample(I);
}
return m_ctr;
}
template<typename Token>
bool
size_t
BitextSampler<Token>::
consider_sample(TokenPosition const& p)
{
@ -279,7 +295,7 @@ consider_sample(TokenPosition const& p)
if (!m_bitext->find_trg_phr_bounds(rec))
{ // no good, probably because phrase is not coherent
m_stats->count_sample(docid, 0, rec.po_fwd, rec.po_bwd);
return false;
return 0;
}
// all good: register this sample as valid
@ -300,6 +316,7 @@ consider_sample(TokenPosition const& p)
// pair once per source phrase occurrence, or else run the risk of
// having more joint counts than marginal counts.
size_t max_evidence = 0;
for (size_t s = rec.s1; s <= rec.s2; ++s)
{
TSA<Token> const& I = m_fwd ? *m_bitext->I2 : *m_bitext->I1;
@ -313,8 +330,10 @@ consider_sample(TokenPosition const& p)
continue; // don't over-count
seen.push_back(tpid);
size_t raw2 = b->approxOccurrenceCount();
m_stats->add(tpid, sample_weight, m_bias ? (*m_bias)[p.sid] : 1,
aln, raw2, rec.po_fwd, rec.po_bwd, docid);
size_t evid = m_stats->add(tpid, sample_weight,
m_bias ? (*m_bias)[p.sid] : 1,
aln, raw2, rec.po_fwd, rec.po_bwd, docid);
max_evidence = std::max(max_evidence, evid);
bool ok = (i == rec.e2) || b->extend(o[i].id());
UTIL_THROW_IF2(!ok, "Could not extend target phrase.");
}
@ -322,7 +341,7 @@ consider_sample(TokenPosition const& p)
for (size_t k = 1; k < aln.size(); k += 2)
--aln[k];
}
return true;
return max_evidence;
}
#ifndef MMT
@ -333,7 +352,9 @@ operator()()
{
if (m_finished) return true;
boost::unique_lock<boost::mutex> lock(m_lock);
if (m_method == random_sampling)
if (m_method == full_coverage)
perform_full_phrase_extraction(); // consider all occurrences
else if (m_method == random_sampling)
perform_random_sampling();
else UTIL_THROW2("Unsupported sampling method.");
m_finished = true;

View File

@ -245,6 +245,10 @@ namespace Moses
if ((m = param.find("lr-func")) != param.end())
m_lr_func_name = m->second;
// accommodate typo in Germann, 2015: Sampling Phrase Tables for
// the Moses SMT System (PBML):
if ((m = param.find("lrfunc")) != param.end())
m_lr_func_name = m->second;
if ((m = param.find("extra")) != param.end())
m_extra_data = m->second;
@ -295,6 +299,7 @@ namespace Moses
// known_parameters.push_back("limit"); // replaced by "table-limit"
known_parameters.push_back("logcnt");
known_parameters.push_back("lr-func"); // associated lexical reordering function
known_parameters.push_back("lrfunc"); // associated lexical reordering function
known_parameters.push_back("method");
known_parameters.push_back("name");
known_parameters.push_back("num-features");

View File

@ -26,7 +26,7 @@ namespace sapt
BOOST_FOREACH(char const& x, denom)
{
if (x == '+') { --checksum; continue; }
if (x != 'g' && x != 's' && x != 'r') continue;
if (x != 'g' && x != 's' && x != 'r' && x != 'b') continue;
std::string s = (boost::format("pbwd-%c%.3f") % x % c).str();
this->m_feature_names.push_back(s);
}
@ -48,9 +48,12 @@ namespace sapt
BOOST_FOREACH(char const& x, denom)
{
uint32_t m2 = pp.raw2;
if (x == 'g') m2 = round(m2 * float(pp.good1) / pp.raw1);
if (x == 'g' || x == 'b') m2 = round(m2 * float(pp.good1) / pp.raw1);
else if (x == 's') m2 = round(m2 * float(pp.sample1) / pp.raw1);
(*dest)[i++] = log(lbop(std::max(m2, pp.joint), pp.joint,conf));
(*dest)[i] = log(lbop(std::max(m2, pp.joint), pp.joint,conf));
if (x == 'b') (*dest)[i] += log(pp.cum_bias) - log(pp.joint);
++i;
}
}
};

View File

@ -27,7 +27,7 @@ namespace sapt
BOOST_FOREACH(char const& x, denom)
{
if (x == '+') { --checksum; continue; }
if (x != 'g' && x != 's' && x != 'r') continue;
if (x != 'g' && x != 's' && x != 'r' && x != 'b') continue;
std::string s = (boost::format("pfwd-%c%.3f") % x % c).str();
this->m_feature_names.push_back(s);
}
@ -49,12 +49,16 @@ namespace sapt
// cerr<<pp.joint<<"/"<<pp.good1<<"/"<<pp.raw2<<endl;
}
size_t i = this->m_index;
float g = log(lbop(pp.good1, pp.joint, conf));;
BOOST_FOREACH(char const& c, this->denom)
{
switch (c)
{
case 'b':
(*dest)[i++] = g + log(pp.cum_bias) - log(pp.joint);
break;
case 'g':
(*dest)[i++] = log(lbop(pp.good1, pp.joint, conf));
(*dest)[i++] = g;
break;
case 's':
(*dest)[i++] = log(lbop(pp.sample1, pp.joint, conf));

View File

@ -173,7 +173,6 @@ TranslationOptionCollection::
ProcessOneUnknownWord(const InputPath &inputPath, size_t sourcePos,
size_t length, const ScorePair *inputScores)
{
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer&
unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance();
float unknownScore = FloorScore(TransformScore(0));

View File

@ -26,7 +26,7 @@ bool
TreeInput::
ProcessAndStripXMLTags(AllOptions const& opts, string &line,
std::vector<XMLParseOutput> &sourceLabels,
std::vector<XmlOption*> &xmlOptions)
std::vector<XmlOption const*> &xmlOptions)
{
//parse XML markup in translation line

View File

@ -45,7 +45,7 @@ protected:
bool ProcessAndStripXMLTags(AllOptions const& opts, std::string &line,
std::vector<XMLParseOutput> &sourceLabels,
std::vector<XmlOption*> &res);
std::vector<XmlOption const*> &res);
public:
TreeInput() : Sentence() { }

View File

@ -159,7 +159,7 @@ vector<string> TokenizeXml(const string& str, const std::string& lbrackStr, cons
* \param rbrackStr xml tag's right bracket string, typically ">"
*/
bool
ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption*> &res,
ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption const*> &res,
ReorderingConstraint &reorderingConstraint,
vector< size_t > &walls,
std::vector< std::pair<size_t, std::string> > &placeholders,

View File

@ -31,7 +31,7 @@ bool isXmlTag(const std::string& tag, const std::string& lbrackStr="<", const st
std::vector<std::string> TokenizeXml(const std::string& str, const std::string& lbrackStr="<", const std::string& rbrackStr=">");
bool ProcessAndStripXMLTags(AllOptions const& opts,
std::string &line, std::vector<XmlOption*> &res,
std::string &line, std::vector<XmlOption const*> &res,
ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls,
std::vector< std::pair<size_t, std::string> > &placeholders,
int offset,

View File

@ -67,6 +67,14 @@ namespace Moses {
if (params) factor_order = Scan<FactorType>(*params);
if (factor_order.empty()) factor_order.assign(1,0);
if (ReportAllFactors) {
for (size_t i = 1; i < MAX_NUM_FACTORS; ++i)
factor_order.push_back(i);
}
param.SetParameter(FactorDelimiter, "factor-delimiter", std::string("|"));
param.SetParameter(FactorDelimiter, "output-factor-delimiter", FactorDelimiter);
return true;
}
@ -76,6 +84,24 @@ namespace Moses {
update(std::map<std::string, xmlrpc_c::value>const& param)
{
ReportAllFactors = check(param, "report-all-factors", ReportAllFactors);
std::map<std::string, xmlrpc_c::value>::const_iterator m;
m = param.find("output-factors");
if (m != param.end())
factor_order = Tokenize<FactorType>(xmlrpc_c::value_string(m->second), ",");
if (ReportAllFactors) {
factor_order.clear();
for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
factor_order.push_back(i);
}
m = param.find("factor-delimiter");
if (m != param.end()) FactorDelimiter = Trim(xmlrpc_c::value_string(m->second));
m = param.find("output-factor-delimiter");
if (m != param.end()) FactorDelimiter = Trim(xmlrpc_c::value_string(m->second));
return true;
}
#endif

View File

@ -19,7 +19,8 @@ namespace Moses
bool PrintAlignmentInfo; // m_PrintAlignmentInfo
WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
std::string AlignmentOutputFile;
std::string FactorDelimiter;
bool WordGraph;
std::string SearchGraph;

View File

@ -378,8 +378,9 @@ pack_hypothesis(const Moses::Manager& manager, vector<Hypothesis const* > const&
// target string
ostringstream target;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) {
manager.OutputSurface(target, *e, m_options.output.factor_order,
m_options.output.ReportSegmentation, m_options.output.ReportAllFactors);
manager.OutputSurface(target, *e);
// , m_options.output.factor_order,
// m_options.output.ReportSegmentation, m_options.output.ReportAllFactors);
}
XVERBOSE(1, "BEST TRANSLATION: " << *(manager.GetBestHypothesis()) << std::endl);
// XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl);

View File

@ -12,8 +12,8 @@ using namespace Moses;
Translator::
Translator(Server& server)
: m_threadPool(server.options().numThreads),
m_server(server)
: m_server(server),
m_threadPool(server.options().numThreads)
{
// signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and

View File

@ -1,7 +1,8 @@
import option path ;
with-regtest = [ option.get "with-regtest" ] ;
with-xmlrpc = [ option.get "with-xmlrpc-c" ] ;
skip-compact = [ option.get "regtest-skip-compactpt" : : "yes" ] ;
with-xmlrpc = [ option.get "with-xmlrpc-c" ] ;
if $(with-regtest) {
with-regtest = [ path.root $(with-regtest) [ path.pwd ] ] ;
@ -32,8 +33,15 @@ if $(with-regtest) {
}
reg_test phrase-server : [ glob $(test-dir)/phrase-server.* ] : ../moses-cmd//moses : @reg_test_decode_server ;
}
reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
reg_test chart : [ glob $(test-dir)/chart.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
if $(skip-compact) {
reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM $(test-dir)/*compactptable ] : ../moses-cmd//moses : @reg_test_decode ;
reg_test chart : [ glob $(test-dir)/chart.* : $(test-dir)/*withDALM $(test-dir)/*compactptable ] : ../moses-cmd//moses : @reg_test_decode ;
} else {
reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
reg_test chart : [ glob $(test-dir)/chart.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
}
if [ option.get "with-dalm" : : "yes" ] {
reg_test dalm : [ glob $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
} else {

View File

@ -182,9 +182,11 @@ sub exec_moses_server {
}
while( 1==1 ) # wait until the server is listening for requests
{
sleep 5;
my $str = `grep "Listening on port $serverport" $results/run.stderr`;
last if($str =~ /Listening/);
sleep 5;
my $res = waitpid($pid, WNOHANG);
die "Moses crashed or aborted! Check $results/run.stderr for error messages.\n" if ($res);
my $str = `grep "Listening on port $serverport" $results/run.stderr`;
last if($str =~ /Listening/);
}
my $proxy = XMLRPC::Lite->proxy($url);
warn "Opening file $input to write to $results\n";

@ -1 +1 @@
Subproject commit f69e79f5fc92d993354fa775de197b029d321175
Subproject commit bbea49d71c5b9835d9a777a82085e57a33a0bcf6

View File

@ -1145,7 +1145,7 @@ filter
out: filtered-dir
default-name: tuning/filtered
rerun-on-change: filter-settings ttable-binarizer TRAINING:no-glue-grammar TRAINING:dont-tune-glue-grammar TRAINING:use-syntax-input-weight-feature TRAINING:config
ignore-if: TRAINING:binarize-all
ignore-if: TRAINING:binarize-all TRAINING:mmsapt
error: already exists. Please delete
filter-devtest
in: input-devtest TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table
@ -1159,7 +1159,7 @@ apply-filter
in: TRAINING:bin-config filtered-dir
out: filtered-config
default-name: tuning/moses.filtered.ini
ignore-if: TRAINING:binarize-all
ignore-if: TRAINING:binarize-all TRAINING:mmsapt
template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT
apply-filter-devtest
in: TRAINING:bin-config filtered-dir-devtest
@ -1288,14 +1288,14 @@ filter
out: filtered-dir
default-name: evaluation/filtered
rerun-on-change: filter-settings report-precision-by-coverage ttable-binarizer TRAINING:no-glue-grammar TRAINING:dont-tune-glue-grammar TRAINING:use-syntax-input-weight-feature TRAINING:config
pass-if: TRAINING:binarize-all
pass-if: TRAINING:binarize-all TRAINING:mmsapt
ignore-if: use-hiero
error: already exists. Please delete
apply-filter
in: filtered-dir TRAINING:config TUNING:config-with-reused-weights
out: filtered-config
default-name: evaluation/filtered.ini
ignore-if: TRAINING:binarize-all thot
ignore-if: TRAINING:binarize-all TRAINING:mmsapt thot
template: $moses-script-dir/ems/support/substitute-filtered-tables-and-weights.perl IN/moses.ini IN1 IN2 OUT
decode
in: TUNING:config-with-reused-weights input filtered-config

View File

@ -9,7 +9,7 @@ from argparse import ArgumentParser
import math
import os
from random import randint
import sys
import sys, gzip
def count_ngrams(snt, max_n):
@ -92,8 +92,13 @@ class Document:
def __init__(self, fname=None):
self.fname = fname
if fname:
self.snt = [line.strip().split() for line in open(fname)]
if fname[-3:] == ".gz":
self.snt = [line.strip().split() for line in gzip.open(fname).readlines()]
else:
self.snt = [line.strip().split() for line in open(fname)]
pass
self.ngrams = [count_ngrams(snt, 4) for snt in self.snt]
# print self.snt
else:
self.snt = None
self.ngrams = None