Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Barry Haddow 2015-05-13 12:13:09 +01:00
commit e61c905687
19 changed files with 157 additions and 168 deletions

View File

@ -36,7 +36,7 @@ OutputSearchGraphAsHypergraph(std::ostream& out) const
UTIL_THROW2("Not implemented.");
}
void
void
BaseManager::
OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision) const
{
@ -44,12 +44,12 @@ OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision)
if (! boost::filesystem::exists(odir))
boost::filesystem::create_directory(odir);
UTIL_THROW_IF2(!boost::filesystem::is_directory(odir),
"Cannot output hypergraphs to " << odir
<< " because that path exists but is not a directory.");
"Cannot output hypergraphs to " << odir
<< " because that path exists but is not a directory.");
// not clear why we need to output the weights every time we dump a search
// not clear why we need to output the weights every time we dump a search
// graph into a file again, but that's what the old code did.
string weightsFile = odir + "/weights";
TRACE_ERR("The weights file is " << weightsFile << "\n");
ofstream weightsOut;
@ -60,28 +60,25 @@ OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision)
// (or the translation task)
StaticData::Instance().GetAllWeights().Save(weightsOut);
weightsOut.close();
boost::iostreams::filtering_ostream file;
if (boost::ends_with(fname, ".gz"))
file.push(boost::iostreams::gzip_compressor());
else if (boost::ends_with(fname, ".bz2"))
file.push( boost::iostreams::bzip2_compressor() );
file.push( boost::iostreams::file_sink(fname, ios_base::out) );
if (file.is_complete() && file.good())
{
file.setf(std::ios::fixed);
file.precision(precision);
this->OutputSearchGraphAsHypergraph(file);
file.flush();
}
else
{
TRACE_ERR("Cannot output hypergraph for line "
<< this->GetSource().GetTranslationId()
<< " because the output file " << fname
<< " is not open or not ready for writing"
<< std::endl);
}
if (file.is_complete() && file.good()) {
file.setf(std::ios::fixed);
file.precision(precision);
this->OutputSearchGraphAsHypergraph(file);
file.flush();
} else {
TRACE_ERR("Cannot output hypergraph for line "
<< this->GetSource().GetTranslationId()
<< " because the output file " << fname
<< " is not open or not ready for writing"
<< std::endl);
}
file.pop();
}

View File

@ -66,13 +66,13 @@ public:
// virtual void OutputSearchGraphHypergraph() const = 0;
virtual void OutputSearchGraphAsHypergraph(std::ostream& out) const;
virtual void OutputSearchGraphAsHypergraph(std::string const& fname,
size_t const precision) const;
virtual void OutputSearchGraphAsHypergraph(std::string const& fname,
size_t const precision) const;
/***
* to be called after processing a sentence
*/
virtual void CalcDecoderStatistics() const = 0;
};
}

View File

@ -291,7 +291,7 @@ void ChartManager::FindReachableHypotheses(
}
}
void
void
ChartManager::
OutputSearchGraphAsHypergraph(std::ostream& out) const
{

View File

@ -228,7 +228,7 @@ batch_run()
// set up task of translating one sentence
boost::shared_ptr<TranslationTask>
task = TranslationTask::create(source, ioWrapper);
if (source->GetContext())
if (source->GetContext())
task->SetContextString(*source->GetContext());
else task->SetContextString(context_string);

View File

@ -47,10 +47,10 @@ namespace Moses
template class HypergraphOutput<Manager>;
template class HypergraphOutput<ChartManager>;
void
void
ChartSearchGraphWriterMoses::
WriteHypos(const ChartHypothesisCollection& hypos,
const map<unsigned, bool> &reachable) const
WriteHypos(const ChartHypothesisCollection& hypos,
const map<unsigned, bool> &reachable) const
{
ChartHypothesisCollection::const_iterator iter;
@ -62,20 +62,18 @@ WriteHypos(const ChartHypothesisCollection& hypos,
}
const ChartArcList *arcList = mainHypo.GetArcList();
if (arcList)
{
ChartArcList::const_iterator iterArc;
for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc)
{
const ChartHypothesis &arc = **iterArc;
if (reachable.find(arc.GetId()) != reachable.end())
(*m_out) << m_lineNumber << " " << arc << endl;
}
if (arcList) {
ChartArcList::const_iterator iterArc;
for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
const ChartHypothesis &arc = **iterArc;
if (reachable.find(arc.GetId()) != reachable.end())
(*m_out) << m_lineNumber << " " << arc << endl;
}
}
}
}
void
void
ChartSearchGraphWriterHypergraph::
WriteHeader(size_t winners, size_t losers) const
{
@ -83,10 +81,10 @@ WriteHeader(size_t winners, size_t losers) const
(*m_out) << winners << " " << (winners+losers) << endl;
}
void
void
ChartSearchGraphWriterHypergraph::
WriteHypos(const ChartHypothesisCollection& hypos,
const map<unsigned, bool> &reachable) const
const map<unsigned, bool> &reachable) const
{
ChartHypothesisCollection::const_iterator iter;
@ -113,8 +111,8 @@ WriteHypos(const ChartHypothesisCollection& hypos,
}
}
(*m_out) << edges.size() << endl;
for (vector<const ChartHypothesis*>::const_iterator ei = edges.begin();
ei != edges.end(); ++ei) {
for (vector<const ChartHypothesis*>::const_iterator ei = edges.begin();
ei != edges.end(); ++ei) {
const ChartHypothesis* hypo = *ei;
const TargetPhrase& target = hypo->GetCurrTargetPhrase();
size_t ntIndex = 0;

View File

@ -313,7 +313,7 @@ struct CompareHypothesisTotalScore {
ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool(); \
pool.freeObject(hypo); \
} \
#else
#define FREEHYPO(hypo) delete hypo
#endif

View File

@ -97,12 +97,12 @@ IOWrapper::IOWrapper()
// context buffering for context-sensitive decoding
m_look_ahead = staticData.GetContextParameters().look_ahead;
m_look_back = staticData.GetContextParameters().look_back;
m_inputType = staticData.GetInputType();
UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput,
"Context-sensitive decoding currently works only with sentence input.");
"Context-sensitive decoding currently works only with sentence input.");
m_currentLine = staticData.GetStartTranslationId();
m_inputFactorOrder = &staticData.GetInputFactorOrder();
@ -223,18 +223,16 @@ IOWrapper::IOWrapper()
std::string& fmt = m_hypergraph_output_filepattern;
// first, determine the output directory
if (p && p->size() > 2) fmt = p->at(2);
else if (nBestFilePath.size() && nBestFilePath != "-" &&
! boost::starts_with(nBestFilePath, "/dev/stdout"))
{
fmt = boost::filesystem::path(nBestFilePath).parent_path().string();
if (fmt.empty()) fmt = ".";
}
else fmt = boost::filesystem::current_path().string() + "/hypergraph";
else if (nBestFilePath.size() && nBestFilePath != "-" &&
! boost::starts_with(nBestFilePath, "/dev/stdout")) {
fmt = boost::filesystem::path(nBestFilePath).parent_path().string();
if (fmt.empty()) fmt = ".";
} else fmt = boost::filesystem::current_path().string() + "/hypergraph";
if (*fmt.rbegin() != '/') fmt += "/";
std::string extension = (p && p->size() > 1 ? p->at(1) : std::string("txt"));
UTIL_THROW_IF2(extension != "txt" && extension != "gz" && extension != "bz2",
"Unknown compression type '" << extension
<< "' for hypergraph output!");
"Unknown compression type '" << extension
<< "' for hypergraph output!");
fmt += string("%d.") + extension;
if (staticData.GetParameter().GetParam("spe-src")) {
@ -278,10 +276,10 @@ IOWrapper::
GetBufferedInput()
{
switch(m_inputType) {
case SentenceInput:
return BufferInput<Sentence>();
case ConfusionNetworkInput:
return BufferInput<ConfusionNet>();
case SentenceInput:
return BufferInput<Sentence>();
case ConfusionNetworkInput:
return BufferInput<ConfusionNet>();
case WordLatticeInput:
return BufferInput<WordLattice>();
case TreeInputType:
@ -294,7 +292,7 @@ GetBufferedInput()
TRACE_ERR("Unknown input type: " << m_inputType << "\n");
return boost::shared_ptr<InputType>();
}
}
boost::shared_ptr<InputType>
@ -304,53 +302,47 @@ IOWrapper::ReadInput()
boost::lock_guard<boost::mutex> lock(m_lock);
#endif
boost::shared_ptr<InputType> source = GetBufferedInput();
if (source)
{
source->SetTranslationId(m_currentLine++);
this->set_context_for(*source);
}
if (source) {
source->SetTranslationId(m_currentLine++);
this->set_context_for(*source);
}
m_past_input.push_back(source);
return source;
}
void
void
IOWrapper::
set_context_for(InputType& source)
{
boost::shared_ptr<string> context(new string);
list<boost::shared_ptr<InputType> >::iterator m = m_past_input.end();
// remove obsolete past input from buffer:
if (m_past_input.end() != m_past_input.begin())
{
for (size_t cnt = 0; cnt < m_look_back && --m != m_past_input.begin();
cnt += (*m)->GetSize());
while (m_past_input.begin() != m) m_past_input.pop_front();
}
if (m_past_input.end() != m_past_input.begin()) {
for (size_t cnt = 0; cnt < m_look_back && --m != m_past_input.begin();
cnt += (*m)->GetSize());
while (m_past_input.begin() != m) m_past_input.pop_front();
}
// cerr << string(80,'=') << endl;
if (m_past_input.size())
{
m = m_past_input.begin();
*context += (*m)->ToString();
// cerr << (*m)->ToString() << endl;
for (++m; m != m_past_input.end(); ++m)
{
// cerr << "\n" << (*m)->ToString() << endl;
*context += string(" ") + (*m)->ToString();
}
// cerr << string(80,'-') << endl;
if (m_past_input.size()) {
m = m_past_input.begin();
*context += (*m)->ToString();
// cerr << (*m)->ToString() << endl;
for (++m; m != m_past_input.end(); ++m) {
// cerr << "\n" << (*m)->ToString() << endl;
*context += string(" ") + (*m)->ToString();
}
// cerr << string(80,'-') << endl;
}
// cerr << source.ToString() << endl;
if (m_future_input.size())
{
// cerr << string(80,'-') << endl;
for (m = m_future_input.begin(); m != m_future_input.end(); ++m)
{
// if (m != m_future_input.begin()) cerr << "\n";
// cerr << (*m)->ToString() << endl;
if (context->size()) *context += " ";
*context += (*m)->ToString();
}
if (m_future_input.size()) {
// cerr << string(80,'-') << endl;
for (m = m_future_input.begin(); m != m_future_input.end(); ++m) {
// if (m != m_future_input.begin()) cerr << "\n";
// cerr << (*m)->ToString() << endl;
if (context->size()) *context += " ";
*context += (*m)->ToString();
}
}
// cerr << string(80,'=') << endl;
source.SetContext(context);
}

View File

@ -119,7 +119,7 @@ protected:
size_t m_look_ahead; /// for context-sensitive decoding: # of wrds to look ahead
size_t m_look_back; /// for context-sensitive decoding: # of wrds to look back
size_t m_buffered_ahead; /// number of words buffered ahead
// For context-sensitive decoding:
// For context-sensitive decoding:
// Number of context words ahead and before the current sentence.
std::string m_hypergraph_output_filepattern;
@ -184,14 +184,14 @@ public:
std::ifstream *spe_src, *spe_trg, *spe_aln;
private:
template<class itype>
template<class itype>
boost::shared_ptr<InputType>
BufferInput();
boost::shared_ptr<InputType>
GetBufferedInput();
void
void
set_context_for(InputType& source);
};
@ -202,27 +202,23 @@ BufferInput()
{
boost::shared_ptr<itype> source;
boost::shared_ptr<InputType> ret;
if (m_future_input.size())
{
ret = m_future_input.front();
m_future_input.pop_front();
m_buffered_ahead -= ret->GetSize();
}
else
{
source.reset(new itype);
if (!source->Read(*m_inputStream, *m_inputFactorOrder))
return ret;
ret = source;
}
if (m_future_input.size()) {
ret = m_future_input.front();
m_future_input.pop_front();
m_buffered_ahead -= ret->GetSize();
} else {
source.reset(new itype);
if (!source->Read(*m_inputStream, *m_inputFactorOrder))
return ret;
ret = source;
}
while (m_buffered_ahead < m_look_ahead)
{
source.reset(new itype);
if (!source->Read(*m_inputStream, *m_inputFactorOrder)) break;
m_future_input.push_back(source);
m_buffered_ahead += source->GetSize();
}
while (m_buffered_ahead < m_look_ahead) {
source.reset(new itype);
if (!source->Read(*m_inputStream, *m_inputFactorOrder)) break;
m_future_input.push_back(source);
m_buffered_ahead += source->GetSize();
}
return ret;
}

View File

@ -57,9 +57,9 @@ public:
void OutputSearchGraphSLF() const {
}
void
void
OutputSearchGraphAsHypergraph
( std::string const& fname, size_t const precision ) const
( std::string const& fname, size_t const precision ) const
{ }

View File

@ -173,13 +173,16 @@ public:
//! number of words in this sentence/confusion network
virtual size_t GetSize() const =0;
virtual boost::shared_ptr<std::string> const&
GetContext() const { return m_context; }
virtual void
SetContext(boost::shared_ptr<std::string> const& ctx)
{ m_context = ctx; }
virtual boost::shared_ptr<std::string> const&
GetContext() const {
return m_context;
}
virtual void
SetContext(boost::shared_ptr<std::string> const& ctx) {
m_context = ctx;
}
//! populate this InputType with data from in stream
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder) =0;

View File

@ -1982,10 +1982,10 @@ void Manager::OutputSearchGraphSLF() const
// static char const* key = "output-search-graph-hypergraph";
// PARAM_VEC const* p = staticData.GetParameter().GetParam(key);
// ScoreComponentCollection const& weights = staticData.GetAllWeights();
// string const& nBestFile = staticData.GetNBestFilePath();
// string const& nBestFile = staticData.GetNBestFilePath();
// HypergraphOutput<Manager> hypergraphOutput(PRECISION, p, nBestFile, weights);
// hypergraphOutput.Write(*this);
// }
void Manager::OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId) const

View File

@ -240,10 +240,10 @@ Parameter::Parameter()
AddParam(misc_opts,"feature-name-overwrite", "Override feature name (NOT arguments). Eg. SRILM-->KENLM, PhraseDictionaryMemory-->PhraseDictionaryScope3");
AddParam(misc_opts,"feature", "All the feature functions should be here");
AddParam(misc_opts,"context-string",
"A (tokenized) string containing context words for context-sensitive translation.");
AddParam(misc_opts,"context-window",
"Context window (in words) for context-sensitive translation: {+|-|+-}<number>.");
AddParam(misc_opts,"context-string",
"A (tokenized) string containing context words for context-sensitive translation.");
AddParam(misc_opts,"context-window",
"Context window (in words) for context-sensitive translation: {+|-|+-}<number>.");
// Compact phrase table and reordering table.
po::options_description cpt_opts("Options when using compact phrase and reordering tables.");

View File

@ -301,7 +301,9 @@ public:
}
const ContextParameters&
GetContextParameters() const { return m_context_parameters; }
GetContextParameters() const {
return m_context_parameters;
}
const std::vector<FactorType> &GetInputFactorOrder() const {
return m_inputFactorOrder;
@ -449,15 +451,15 @@ public:
}
bool IsNBestEnabled() const {
return (!m_nBestFilePath.empty() || m_mbr || m_useLatticeMBR || m_mira ||
m_outputSearchGraph || m_outputSearchGraphSLF ||
m_outputSearchGraphHypergraph || m_useConsensusDecoding ||
return (!m_nBestFilePath.empty() || m_mbr || m_useLatticeMBR || m_mira ||
m_outputSearchGraph || m_outputSearchGraphSLF ||
m_outputSearchGraphHypergraph || m_useConsensusDecoding ||
#ifdef HAVE_PROTOBUF
m_outputSearchGraphPB ||
m_outputSearchGraphPB ||
#endif
!m_latticeSamplesFilePath.empty());
!m_latticeSamplesFilePath.empty());
}
size_t GetLatticeSamplesSize() const {
return m_latticeSamplesSize;
}

View File

@ -31,9 +31,9 @@ public:
void OutputSearchGraph(OutputCollector *collector) const {}
// void OutputSearchGraphHypergraph() const {}
void
void
OutputSearchGraphAsHypergraph
( std::string const& fname, size_t const precision ) const
( std::string const& fname, size_t const precision ) const
{ }
void OutputSearchGraphSLF() const {}

View File

@ -217,7 +217,7 @@ namespace Moses
m_bias_log = &std::cerr;
else if (m_bias_logfile == "/dev/stdout")
m_bias_log = &std::cout;
else
else
{
m_bias_logger.reset(new ofstream(m_bias_logfile.c_str()));
m_bias_log = m_bias_logger.get();

View File

@ -178,12 +178,11 @@ void TranslationTask::Run()
// Output search graph in hypergraph format for Kenneth Heafield's
// lazy hypergraph decoder; writes to stderr
if (StaticData::Instance().GetOutputSearchGraphHypergraph())
{
size_t transId = manager->GetSource().GetTranslationId();
string fname = io->GetHypergraphOutputFileName(transId);
manager->OutputSearchGraphAsHypergraph(fname, PRECISION);
}
if (StaticData::Instance().GetOutputSearchGraphHypergraph()) {
size_t transId = manager->GetSource().GetTranslationId();
string fname = io->GetHypergraphOutputFileName(transId);
manager->OutputSearchGraphAsHypergraph(fname, PRECISION);
}
additionalReportingTime.stop();

View File

@ -427,7 +427,7 @@ inline float CalcTranslationScore(const std::vector<float> &probVector,
out << *this; \
return out.str(); \
} \
//! delete and remove every element of a collection object such as set, list etc
template<class COLL>
void RemoveAllInColl(COLL &coll)

View File

@ -17,28 +17,26 @@ init(Parameter& params)
std::string context_window;
params.SetParameter(context_window, "context-window", std::string(""));
if (context_window == "")
if (context_window == "")
return;
size_t p = context_window.find_first_of("0123456789");
if (p == 0)
if (p == 0)
look_back = look_ahead = atoi(context_window.c_str());
if (p == 1)
{
if (context_window[0] == '-')
look_back = atoi(context_window.substr(1).c_str());
else if (context_window[0] == '+')
look_ahead = atoi(context_window.substr(1).c_str());
else
UTIL_THROW2("Invalid specification of context window.");
}
if (p == 2)
{
if (context_window.substr(0,2) == "+-" ||
context_window.substr(0,2) == "-+")
look_back = look_ahead = atoi(context_window.substr(p).c_str());
else
UTIL_THROW2("Invalid specification of context window.");
}
if (p == 1) {
if (context_window[0] == '-')
look_back = atoi(context_window.substr(1).c_str());
else if (context_window[0] == '+')
look_ahead = atoi(context_window.substr(1).c_str());
else
UTIL_THROW2("Invalid specification of context window.");
}
if (p == 2) {
if (context_window.substr(0,2) == "+-" ||
context_window.substr(0,2) == "-+")
look_back = look_ahead = atoi(context_window.substr(p).c_str());
else
UTIL_THROW2("Invalid specification of context window.");
}
}
}

View File

@ -56,6 +56,10 @@ sub Beautify($)
my $cmd = "astyle --style='k&r' -s2 -v $name";
#print STDERR "Executing: $cmd \n";
`$cmd`;
$cmd = "sed -i 's\/[ \\t]*\$\/\/' \"$name\"";
#print STDERR "Executing: $cmd \n";
`$cmd`;
}
}
}