Code reorganization with respect to hypergraph output.

This commit is contained in:
Ulrich Germann 2015-05-12 02:28:58 +01:00
parent f087fce65e
commit d122605c0d
12 changed files with 193 additions and 178 deletions

View File

@ -1,11 +1,17 @@
#include <vector>
#include "StaticData.h"
#include "BaseManager.h"
#include "StaticData.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/TranslationTask.h"
#include <vector>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/filesystem.hpp>
using namespace std;
namespace Moses
@ -21,6 +27,63 @@ BaseManager::GetSource() const
return m_source;
}
void
BaseManager::
OutputSearchGraphAsHypergraph(std::ostream& out) const
{
// This virtual function that may not be implemented everywhere, but it should for
// derived classes that use it
UTIL_THROW2("Not implemented.");
}
void
BaseManager::
OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision) const
{
std::string odir = boost::filesystem::path(fname).parent_path().string();
if (! boost::filesystem::exists(odir))
boost::filesystem::create_directory(odir);
UTIL_THROW_IF2(!boost::filesystem::is_directory(odir),
"Cannot output hypergraphs to " << odir
<< " because that path exists but is not a directory.");
// not clear why we need to output the weights every time we dump a search
// graph into a file again, but that's what the old code did.
string weightsFile = odir + "/weights";
TRACE_ERR("The weights file is " << weightsFile << "\n");
ofstream weightsOut;
weightsOut.open(weightsFile.c_str());
weightsOut.setf(std::ios::fixed);
weightsOut.precision(6);
// just temporarily, till we've implemented weight scoring in the manager
// (or the translation task)
StaticData::Instance().GetAllWeights().Save(weightsOut);
weightsOut.close();
boost::iostreams::filtering_ostream file;
if (boost::ends_with(fname, ".gz"))
file.push(boost::iostreams::gzip_compressor());
else if (boost::ends_with(fname, ".bz2"))
file.push( boost::iostreams::bzip2_compressor() );
file.push( boost::iostreams::file_sink(fname, ios_base::out) );
if (file.is_complete() && file.good())
{
file.setf(std::ios::fixed);
file.precision(precision);
this->OutputSearchGraphAsHypergraph(file);
file.flush();
}
else
{
TRACE_ERR("Cannot output hypergraph for line "
<< this->GetSource().GetTranslationId()
<< " because the output file " << fname
<< " is not open or not ready for writing"
<< std::endl);
}
file.pop();
}

View File

@ -63,13 +63,16 @@ public:
virtual void OutputSearchGraph(OutputCollector *collector) const = 0;
virtual void OutputUnknowns(OutputCollector *collector) const = 0;
virtual void OutputSearchGraphSLF() const = 0;
virtual void OutputSearchGraphHypergraph() const = 0;
// virtual void OutputSearchGraphHypergraph() const = 0;
virtual void OutputSearchGraphAsHypergraph(std::ostream& out) const;
virtual void OutputSearchGraphAsHypergraph(std::string const& fname,
size_t const precision) const;
/***
* to be called after processing a sentence
*/
virtual void CalcDecoderStatistics() const = 0;
};
}

View File

@ -291,9 +291,11 @@ void ChartManager::FindReachableHypotheses(
}
}
void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const
void
ChartManager::
OutputSearchGraphAsHypergraph(std::ostream& out) const
{
ChartSearchGraphWriterHypergraph writer(&outputSearchGraphStream);
ChartSearchGraphWriterHypergraph writer(&out);
WriteSearchGraph(writer);
}
@ -812,14 +814,14 @@ void ChartManager::OutputDetailedAllTranslationReport(
collector->Write(translationId, out.str());
}
void ChartManager::OutputSearchGraphHypergraph() const
{
const StaticData &staticData = StaticData::Instance();
if (staticData.GetOutputSearchGraphHypergraph()) {
HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
hypergraphOutputChart.Write(*this);
}
}
// void ChartManager::OutputSearchGraphHypergraph() const
// {
// const StaticData &staticData = StaticData::Instance();
// if (staticData.GetOutputSearchGraphHypergraph()) {
// HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
// hypergraphOutputChart.Write(*this);
// }
// }
void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const
{

View File

@ -154,7 +154,7 @@ public:
void OutputSearchGraph(OutputCollector *collector) const;
void OutputSearchGraphSLF() const {
}
void OutputSearchGraphHypergraph() const;
// void OutputSearchGraphHypergraph() const;
};

View File

@ -44,128 +44,13 @@ using namespace std;
namespace Moses
{
template<class M>
HypergraphOutput<M>::HypergraphOutput(size_t precision) :
m_precision(precision)
{
const StaticData& staticData = StaticData::Instance();
vector<string> hypergraphParameters;
const PARAM_VEC *params = staticData.GetParameter().GetParam("output-search-graph-hypergraph");
if (params) {
hypergraphParameters = *params;
}
if (hypergraphParameters.size() > 0 && hypergraphParameters[0] == "true") {
m_appendSuffix = true;
} else {
m_appendSuffix = false;
}
string compression;
if (hypergraphParameters.size() > 1) {
m_compression = hypergraphParameters[1];
} else {
m_compression = "txt";
}
UTIL_THROW_IF(m_compression != "txt" && m_compression != "gz" && m_compression != "bz2",
util::Exception, "Unknown compression type: " << m_compression);
if ( hypergraphParameters.size() > 2 ) {
m_hypergraphDir = hypergraphParameters[2];
} else {
string nbestFile = staticData.GetNBestFilePath();
if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) {
boost::filesystem::path nbestPath(nbestFile);
// In the Boost filesystem API version 2,
// which was the default prior to Boost 1.46,
// the filename() method returned a string.
//
// In the Boost filesystem API version 3,
// which is the default starting with Boost 1.46,
// the filename() method returns a path object.
//
// To get a string from the path object,
// the native() method must be called.
// hypergraphDir = nbestPath.parent_path().filename()
//#if BOOST_VERSION >= 104600
// .native()
//#endif
//;
// Hopefully the following compiles under all versions of Boost.
//
// If this line gives you compile errors,
// contact Lane Schwartz on the Moses mailing list
m_hypergraphDir = nbestPath.parent_path().string();
if (m_hypergraphDir.empty()) m_hypergraphDir=".";
} else {
stringstream hypergraphDirName;
hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph";
m_hypergraphDir = hypergraphDirName.str();
}
}
if ( ! boost::filesystem::exists(m_hypergraphDir) ) {
boost::filesystem::create_directory(m_hypergraphDir);
}
UTIL_THROW_IF(!boost::filesystem::is_directory(m_hypergraphDir),
util::Exception, "Cannot output hypergraphs to " << m_hypergraphDir << " because that path exists, but is not a directory");
ofstream weightsOut;
stringstream weightsFilename;
weightsFilename << m_hypergraphDir << "/weights";
TRACE_ERR("The weights file is " << weightsFilename.str() << "\n");
weightsOut.open(weightsFilename.str().c_str());
weightsOut.setf(std::ios::fixed);
weightsOut.precision(6);
staticData.GetAllWeights().Save(weightsOut);
weightsOut.close();
}
template<class M>
void HypergraphOutput<M>::Write(const M& manager) const
{
stringstream fileName;
fileName << m_hypergraphDir << "/" << manager.GetSource().GetTranslationId();
if ( m_appendSuffix ) {
fileName << "." << m_compression;
}
boost::iostreams::filtering_ostream file;
if ( m_compression == "gz" ) {
file.push( boost::iostreams::gzip_compressor() );
} else if ( m_compression == "bz2" ) {
file.push( boost::iostreams::bzip2_compressor() );
}
file.push( boost::iostreams::file_sink(fileName.str(), ios_base::out) );
if (file.is_complete() && file.good()) {
file.setf(std::ios::fixed);
file.precision(m_precision);
manager.OutputSearchGraphAsHypergraph(file);
file.flush();
} else {
TRACE_ERR("Cannot output hypergraph for line " << manager.GetSource().GetTranslationId()
<< " because the output file " << fileName.str()
<< " is not open or not ready for writing"
<< std::endl);
}
file.pop();
}
template class HypergraphOutput<Manager>;
template class HypergraphOutput<ChartManager>;
void ChartSearchGraphWriterMoses::WriteHypos
(const ChartHypothesisCollection& hypos, const map<unsigned, bool> &reachable) const
void
ChartSearchGraphWriterMoses::
WriteHypos(const ChartHypothesisCollection& hypos,
const map<unsigned, bool> &reachable) const
{
ChartHypothesisCollection::const_iterator iter;
@ -177,28 +62,31 @@ void ChartSearchGraphWriterMoses::WriteHypos
}
const ChartArcList *arcList = mainHypo.GetArcList();
if (arcList) {
ChartArcList::const_iterator iterArc;
for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
const ChartHypothesis &arc = **iterArc;
if (reachable.find(arc.GetId()) != reachable.end()) {
(*m_out) << m_lineNumber << " " << arc << endl;
}
if (arcList)
{
ChartArcList::const_iterator iterArc;
for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc)
{
const ChartHypothesis &arc = **iterArc;
if (reachable.find(arc.GetId()) != reachable.end())
(*m_out) << m_lineNumber << " " << arc << endl;
}
}
}
}
}
void ChartSearchGraphWriterHypergraph::WriteHeader(size_t winners, size_t losers) const
{
void
ChartSearchGraphWriterHypergraph::
WriteHeader(size_t winners, size_t losers) const
{
(*m_out) << "# target ||| features ||| source-covered" << endl;
(*m_out) << winners << " " << (winners+losers) << endl;
}
void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollection& hypos,
const map<unsigned, bool> &reachable) const
void
ChartSearchGraphWriterHypergraph::
WriteHypos(const ChartHypothesisCollection& hypos,
const map<unsigned, bool> &reachable) const
{
ChartHypothesisCollection::const_iterator iter;
@ -225,7 +113,8 @@ void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollectio
}
}
(*m_out) << edges.size() << endl;
for (vector<const ChartHypothesis*>::const_iterator ei = edges.begin(); ei != edges.end(); ++ei) {
for (vector<const ChartHypothesis*>::const_iterator ei = edges.begin();
ei != edges.end(); ++ei) {
const ChartHypothesis* hypo = *ei;
const TargetPhrase& target = hypo->GetCurrTargetPhrase();
size_t ntIndex = 0;

View File

@ -64,30 +64,33 @@ POSSIBILITY OF SUCH DAMAGE.
#include "IOWrapper.h"
#include <boost/algorithm/string/predicate.hpp>
#include <boost/filesystem.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_stream.hpp>
using namespace std;
namespace Moses
{
IOWrapper::IOWrapper()
:m_nBestStream(NULL)
,m_outputWordGraphStream(NULL)
,m_outputSearchGraphStream(NULL)
,m_detailedTranslationReportingStream(NULL)
,m_unknownsStream(NULL)
,m_alignmentInfoStream(NULL)
,m_latticeSamplesStream(NULL)
,m_surpressSingleBestOutput(false)
: m_nBestStream(NULL)
, m_outputWordGraphStream(NULL)
, m_outputSearchGraphStream(NULL)
, m_detailedTranslationReportingStream(NULL)
, m_unknownsStream(NULL)
, m_alignmentInfoStream(NULL)
, m_latticeSamplesStream(NULL)
, m_surpressSingleBestOutput(false)
, m_look_ahead(0)
, m_look_back(0)
, m_buffered_ahead(0)
,spe_src(NULL)
,spe_trg(NULL)
,spe_aln(NULL)
, spe_src(NULL)
, spe_trg(NULL)
, spe_aln(NULL)
{
const StaticData &staticData = StaticData::Instance();
@ -214,6 +217,26 @@ IOWrapper::IOWrapper()
m_singleBestOutputCollector.reset(new Moses::OutputCollector(&std::cout));
}
// setup file pattern for hypergraph output
char const* key = "output-search-graph-hypergraph";
PARAM_VEC const* p = staticData.GetParameter().GetParam(key);
std::string& fmt = m_hypergraph_output_filepattern;
// first, determine the output directory
if (p && p->size() > 2) fmt = p->at(2);
else if (nBestFilePath.size() && nBestFilePath != "-" &&
! boost::starts_with(nBestFilePath, "/dev/stdout"))
{
fmt = boost::filesystem::path(nBestFilePath).parent_path().string();
if (fmt.empty()) fmt = ".";
}
else fmt = boost::filesystem::current_path().string() + "/hypergraph";
if (*fmt.rbegin() != '/') fmt += "/";
std::string extension = (p && p->size() > 1 ? p->at(1) : std::string("txt"));
UTIL_THROW_IF2(extension != "txt" && extension != "gz" && extension != "bz2",
"Unknown compression type '" << extension
<< "' for hypergraph output!");
fmt += string("%d.") + extension;
if (staticData.GetParameter().GetParam("spe-src")) {
spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str());
spe_trg = new ifstream(staticData.GetParameter().GetParam("spe-trg")->at(0).c_str());
@ -333,5 +356,14 @@ set_context_for(InputType& source)
}
std::string
IOWrapper::
GetHypergraphOutputFileName(size_t const id) const
{
return str(boost::format(m_hypergraph_output_filepattern) % id);
}
} // namespace

View File

@ -63,6 +63,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "search/applied.hh"
#include <boost/format.hpp>
namespace Moses
{
class ScoreComponentCollection;
@ -119,6 +121,9 @@ protected:
size_t m_buffered_ahead; /// number of words buffered ahead
// For context-sensitive decoding:
// Number of context words ahead and before the current sentence.
std::string m_hypergraph_output_filepattern;
public:
IOWrapper();
~IOWrapper();
@ -173,6 +178,8 @@ public:
m_inputStream = &input;
}
std::string GetHypergraphOutputFileName(size_t const id) const;
// post editing
std::ifstream *spe_src, *spe_trg, *spe_aln;

View File

@ -56,8 +56,11 @@ public:
}
void OutputSearchGraphSLF() const {
}
void OutputSearchGraphHypergraph() const {
}
void
OutputSearchGraphAsHypergraph
( std::string const& fname, size_t const precision ) const
{ }
private:

View File

@ -1974,14 +1974,19 @@ void Manager::OutputSearchGraphSLF() const
}
void Manager::OutputSearchGraphHypergraph() const
{
const StaticData &staticData = StaticData::Instance();
if (staticData.GetOutputSearchGraphHypergraph()) {
HypergraphOutput<Manager> hypergraphOutput(PRECISION);
hypergraphOutput.Write(*this);
}
}
// void Manager::OutputSearchGraphHypergraph() const
// {
// const StaticData &staticData = StaticData::Instance();
// if (!staticData.GetOutputSearchGraphHypergraph()) return;
// static char const* key = "output-search-graph-hypergraph";
// PARAM_VEC const* p = staticData.GetParameter().GetParam(key);
// ScoreComponentCollection const& weights = staticData.GetAllWeights();
// string const& nBestFile = staticData.GetNBestFilePath();
// HypergraphOutput<Manager> hypergraphOutput(PRECISION, p, nBestFile, weights);
// hypergraphOutput.Write(*this);
// }
void Manager::OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId) const
{

View File

@ -209,7 +209,7 @@ public:
void OutputWordGraph(OutputCollector *collector) const;
void OutputSearchGraph(OutputCollector *collector) const;
void OutputSearchGraphSLF() const;
void OutputSearchGraphHypergraph() const;
// void OutputSearchGraphHypergraph() const;
};

View File

@ -29,7 +29,13 @@ public:
OutputCollector *collector) const {}
void OutputLatticeSamples(OutputCollector *collector) const {}
void OutputSearchGraph(OutputCollector *collector) const {}
void OutputSearchGraphHypergraph() const {}
// void OutputSearchGraphHypergraph() const {}
void
OutputSearchGraphAsHypergraph
( std::string const& fname, size_t const precision ) const
{ }
void OutputSearchGraphSLF() const {}
void OutputWordGraph(OutputCollector *collector) const {}
void OutputDetailedTranslationReport(OutputCollector *collector) const {}

View File

@ -178,7 +178,12 @@ void TranslationTask::Run()
// Output search graph in hypergraph format for Kenneth Heafield's
// lazy hypergraph decoder; writes to stderr
manager->OutputSearchGraphHypergraph();
if (StaticData::Instance().GetOutputSearchGraphHypergraph())
{
size_t transId = manager->GetSource().GetTranslationId();
string fname = io->GetHypergraphOutputFileName(transId);
manager->OutputSearchGraphAsHypergraph(fname, PRECISION);
}
additionalReportingTime.stop();