Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2014-04-23 12:11:50 +01:00
commit 3f32e48f97
13 changed files with 435 additions and 120 deletions

View File

@ -35,7 +35,7 @@ if $(build-moses-server) = true
xmlrpc-linkflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --libs" ] ;
xmlrpc-cxxflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --cflags" ] ;
exe mosesserver : mosesserver.cpp ../../moses//moses ../../OnDiskPt//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
exe mosesserver : mosesserver.cpp ../../moses//moses ../../moses-cmd/IOWrapper.cpp ../../OnDiskPt//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
} else {
alias mosesserver ;
}

View File

@ -12,6 +12,7 @@
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
#include "moses/TreeInput.h"
#include "moses/LM/ORLM.h"
#include "moses-cmd/IOWrapper.h"
#ifdef WITH_THREADS
#include <boost/thread.hpp>
@ -22,6 +23,7 @@
#include <xmlrpc-c/server_abyss.hpp>
using namespace Moses;
using namespace MosesCmd;
using namespace std;
typedef std::map<std::string, xmlrpc_c::value> params_t;
@ -215,6 +217,8 @@ public:
cerr << "Input: " << source << endl;
si = params.find("align");
bool addAlignInfo = (si != params.end());
si = params.find("word-align");
bool addWordAlignInfo = (si != params.end());
si = params.find("sg");
bool addGraphInfo = (si != params.end());
si = params.find("topt");
@ -278,6 +282,20 @@ public:
if (addAlignInfo) {
retData.insert(pair<string, xmlrpc_c::value>("align", xmlrpc_c::value_array(alignInfo)));
}
if (addWordAlignInfo) {
stringstream wordAlignment;
OutputAlignment(wordAlignment, hypo);
vector<xmlrpc_c::value> alignments;
string alignmentPair;
while (wordAlignment >> alignmentPair) {
int pos = alignmentPair.find('-');
map<string, xmlrpc_c::value> wordAlignInfo;
wordAlignInfo["source-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(0, pos).c_str()));
wordAlignInfo["target-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(pos + 1).c_str()));
alignments.push_back(xmlrpc_c::value_struct(wordAlignInfo));
}
retData.insert(pair<string, xmlrpc_c::value_array>("word-align", alignments));
}
if(addGraphInfo) {
insertGraphInfo(manager,retData);
@ -415,9 +433,25 @@ public:
}
nBestXMLItem["hyp"] = xmlrpc_c::value_string(out.str());
if (addAlignmentInfo)
if (addAlignmentInfo) {
nBestXMLItem["align"] = xmlrpc_c::value_array(alignInfo);
if ((int)edges.size() > 0) {
stringstream wordAlignment;
OutputAlignment(wordAlignment, edges[0]);
vector<xmlrpc_c::value> alignments;
string alignmentPair;
while (wordAlignment >> alignmentPair) {
int pos = alignmentPair.find('-');
map<string, xmlrpc_c::value> wordAlignInfo;
wordAlignInfo["source-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(0, pos).c_str()));
wordAlignInfo["target-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(pos + 1).c_str()));
alignments.push_back(xmlrpc_c::value_struct(wordAlignInfo));
}
nBestXMLItem["word-align"] = xmlrpc_c::value_array(alignments);
}
}
// weighted score
nBestXMLItem["totalScore"] = xmlrpc_c::value_double(path.GetTotalScore());
nBestXml.push_back(xmlrpc_c::value_struct(nBestXMLItem));

View File

@ -553,7 +553,7 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
//DIMw
void IOWrapper::OutputDetailedAllTranslationReport(
const ChartTrellisPathList &nBestList,
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
const ChartManager &manager,
const Sentence &sentence,
long translationId)
@ -793,6 +793,58 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, long tran
m_nBestOutputCollector->Write(translationId, out.str());
}
void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
long translationId)
{
std::ostringstream out;
if (m_nBestOutputCollector->OutputIsCout()) {
// Set precision only if we're writing the n-best list to cout. This is to
// preserve existing behaviour, but should probably be done either way.
IOWrapper::FixPrecision(out);
}
bool includeWordAlignment =
StaticData::Instance().PrintAlignmentInfoInNbest();
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {
const ChartKBestExtractor::Derivation &derivation = **p;
// get the derivation's target-side yield
Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
// delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
out << " ||| ";
OutputAllFeatureScores(derivation.scoreBreakdown, out);
out << " ||| " << derivation.score;
// optionally, print word alignments
if (includeWordAlignment) {
out << " ||| ";
Alignments align;
OutputAlignmentNBest(align, derivation, 0);
for (Alignments::const_iterator q = align.begin(); q != align.end();
++q) {
out << q->first << "-" << q->second << " ";
}
}
out << std::endl;
}
assert(m_nBestOutputCollector);
m_nBestOutputCollector->Write(translationId, out.str());
}
void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, long translationId)
{
std::ostringstream out;
@ -927,6 +979,85 @@ size_t IOWrapper::OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartT
return totalTargetSize;
}
size_t IOWrapper::OutputAlignmentNBest(
Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget)
{
const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
size_t totalTargetSize = 0;
size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
size_t thisSourceSize = CalcSourceSize(&hypo);
// position of each terminal word in translation rule, irrespective of alignment
// if non-term, number is undefined
vector<size_t> sourceOffsets(thisSourceSize, 0);
vector<size_t> targetOffsets(tp.GetSize(), 0);
const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
"Error");
size_t targetInd = 0;
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) {
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
size_t sourceInd = targetPos2SourceInd[targetPos];
size_t sourcePos = sourceInd2pos[sourceInd];
const Moses::ChartKBestExtractor::Derivation &subderivation =
*derivation.subderivations[sourceInd];
// calc source size
size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
sourceOffsets[sourcePos] = sourceSize;
// calc target size.
// Recursively look thru child hypos
size_t currStartTarget = startTarget + totalTargetSize;
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
currStartTarget);
targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize;
++targetInd;
} else {
++totalTargetSize;
}
}
// convert position within translation rule to absolute position within
// source sentence / output sentence
ShiftOffsets(sourceOffsets, startSource);
ShiftOffsets(targetOffsets, startTarget);
// get alignments from this hypo
const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
// add to output arg, offsetting by source & target
AlignmentInfo::const_iterator iter;
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
const std::pair<size_t,size_t> &align = *iter;
size_t relSource = align.first;
size_t relTarget = align.second;
size_t absSource = sourceOffsets[relSource];
size_t absTarget = targetOffsets[relTarget];
pair<size_t, size_t> alignPoint(absSource, absTarget);
pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
UTIL_THROW_IF2(!ret.second, "Error");
}
return totalTargetSize;
}
void IOWrapper::OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo)
{
ostringstream out;

View File

@ -40,6 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/TypeDef.h"
#include "moses/Sentence.h"
#include "moses/FactorTypeSet.h"
#include "moses/ChartKBestExtractor.h"
#include "moses/ChartTrellisPathList.h"
#include "moses/OutputCollector.h"
#include "moses/ChartHypothesis.h"
@ -90,6 +91,7 @@ protected:
typedef std::set< std::pair<size_t, size_t> > Alignments;
size_t OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartTrellisNode &node, size_t startTarget);
std::size_t OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartKBestExtractor::Derivation &derivation, std::size_t startTarget);
size_t OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget);
void OutputAlignment(std::vector< std::set<size_t> > &retAlignmentsS2T, const Moses::AlignmentInfo &ai);
void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
@ -129,12 +131,13 @@ public:
void OutputBestHypo(const std::vector<const Moses::Factor*>& mbrBestHypo, long translationId);
void OutputBestNone(long translationId);
void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, long translationId);
void OutputNBestList(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, long translationId);
void OutputNBestList(const std::vector<search::Applied> &nbest, long translationId);
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
void OutputDetailedTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
void OutputDetailedTreeFragmentsTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
void OutputDetailedAllTranslationReport(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId);
void OutputDetailedAllTranslationReport(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId);
void Backtrack(const Moses::ChartHypothesis *hypo);
void ResetTranslationId();

View File

@ -151,7 +151,7 @@ public:
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
size_t nBestSize = staticData.GetNBestSize();
ChartTrellisPathList nBestList;
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
manager.CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
m_ioWrapper.OutputDetailedAllTranslationReport(nBestList, manager, sentence, translationId);
}
@ -160,7 +160,7 @@ public:
size_t nBestSize = staticData.GetNBestSize();
if (nBestSize > 0) {
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
ChartTrellisPathList nBestList;
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
m_ioWrapper.OutputNBestList(nBestList, translationId);
IFVERBOSE(2) {

View File

@ -180,6 +180,7 @@ public:
} else {
TRACE_ERR("Cannot output HTK standard lattice for line " << m_lineNumber << " because the output file is not open or not ready for writing" << std::endl);
}
delete file;
}
// Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
@ -233,7 +234,7 @@ public:
} else {
stringstream hypergraphDirName;
hypergraphDirName << boost::filesystem::current_path() << "/hypergraph";
hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph";
hypergraphDir = hypergraphDirName.str();
}
}
@ -530,9 +531,7 @@ size_t OutputFeatureWeightsForHypergraph(size_t index, const FeatureFunction* ff
}
return index+numScoreComps;
} else {
cerr << "Sparse features are not yet supported when outputting hypergraph format" << endl;
assert(false);
return 0;
UTIL_THROW2("Sparse features are not yet supported when outputting hypergraph format");
}
}
@ -644,7 +643,7 @@ int main(int argc, char** argv)
boost::filesystem::path nbestPath(nbestFile);
weightsFilename << nbestPath.parent_path().filename() << "/weights";
} else {
weightsFilename << boost::filesystem::current_path() << "/hypergraph/weights";
weightsFilename << boost::filesystem::current_path().string() << "/hypergraph/weights";
}
}
boost::filesystem::path weightsFilePath(weightsFilename.str());

View File

@ -45,6 +45,7 @@ typedef std::vector<ChartHypothesis*> ChartArcList;
class ChartHypothesis
{
friend std::ostream& operator<<(std::ostream&, const ChartHypothesis&);
friend class ChartKBestExtractor;
protected:
#ifdef USE_HYPO_POOL
@ -75,6 +76,9 @@ protected:
//! not implemented
ChartHypothesis(const ChartHypothesis &copy);
//! only used by ChartKBestExtractor
ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
public:
#ifdef USE_HYPO_POOL
void *operator new(size_t /* num_bytes */) {
@ -93,9 +97,6 @@ public:
}
#endif
//! only used by ChartKBestExtractor
ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
ChartHypothesis(const ChartTranslationOptions &, const RuleCubeItem &item,
ChartManager &manager);

View File

@ -32,52 +32,48 @@ namespace Moses
// Extract the k-best list from the search graph.
void ChartKBestExtractor::Extract(
const std::vector<const ChartHypothesis*> &topHypos, std::size_t k,
const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
KBestVec &kBestList)
{
typedef std::vector<const ChartHypothesis*> HypoVec;
kBestList.clear();
if (topHypos.empty()) {
if (topLevelHypos.empty()) {
return;
}
// Create a new top-level ChartHypothesis that has the best hypothesis as its
// predecessor. This is the search hypergraph's target vertex.
HypoVec::const_iterator iter = topHypos.begin();
// Create a new ChartHypothesis object, supremeHypo, that has the best
// top-level hypothesis as its predecessor and has the same score.
std::vector<const ChartHypothesis*>::const_iterator p = topLevelHypos.begin();
const ChartHypothesis &bestTopLevelHypo = **p;
boost::scoped_ptr<ChartHypothesis> supremeHypo(
new ChartHypothesis(**iter, *this));
new ChartHypothesis(bestTopLevelHypo, *this));
// Do the same for each alternative top-level hypothesis, but add the new
// ChartHypothesis objects as arcs from supremeHypo, as if they had been
// recombined.
float prevScore = (*iter)->GetTotalScore();
for (++iter; iter != topHypos.end(); ++iter) {
// Check that the first item in topHypos really was the best.
UTIL_THROW_IF2((*iter)->GetTotalScore() <= prevScore,
"top-level vertices are not correctly sorted");
for (++p; p != topLevelHypos.end(); ++p) {
// Check that the first item in topLevelHypos really was the best.
UTIL_THROW_IF2((*p)->GetTotalScore() <= bestTopLevelHypo.GetTotalScore(),
"top-level hypotheses are not correctly sorted");
// Note: there's no need for a smart pointer here: supremeHypo will take
// ownership of altHypo.
ChartHypothesis *altHypo = new ChartHypothesis(**iter, *this);
ChartHypothesis *altHypo = new ChartHypothesis(**p, *this);
supremeHypo->AddArc(altHypo);
}
// Create the target vertex corresponding to supremeHypo then generate
// it's k-best list.
boost::shared_ptr<Vertex> top = FindOrCreateVertex(*supremeHypo);
LazyKthBest(*top, k, k);
// Create the target vertex then lazily fill its k-best list.
boost::shared_ptr<Vertex> targetVertex = FindOrCreateVertex(*supremeHypo);
LazyKthBest(*targetVertex, k, k);
// Copy the k-best list from the target vertex, but drop the top edge from
// each derivation.
kBestList.reserve(top->kBestList.size());
for (KBestVec::const_iterator p = top->kBestList.begin();
p != top->kBestList.end(); ++p) {
const Derivation &d = **p;
assert(d.edge.tail.size() == 1); // d should have exactly one predecessor.
assert(d.backPointers.size() == 1);
std::size_t i = d.backPointers[0];
boost::shared_ptr<Derivation> pred = d.edge.tail[0]->kBestList[i];
kBestList.push_back(pred);
kBestList.reserve(targetVertex->kBestList.size());
for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
q = targetVertex->kBestList.begin();
q != targetVertex->kBestList.end(); ++q) {
const boost::shared_ptr<Derivation> d(*q);
assert(d);
assert(d->subderivations.size() == 1);
kBestList.push_back(d->subderivations[0]);
}
}
@ -96,8 +92,7 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
const Word &word = phrase.GetWord(pos);
if (word.IsNonTerminal()) {
std::size_t nonTermInd = nonTermIndexMap[pos];
const Derivation &subderivation =
*d.edge.tail[nonTermInd]->kBestList[d.backPointers[nonTermInd]];
const Derivation &subderivation = *d.subderivations[nonTermInd];
Phrase subPhrase = GetOutputPhrase(subderivation);
ret.Append(subPhrase);
} else {
@ -142,26 +137,6 @@ ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
return edge;
}
void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
{
// Create a derivation for v's best incoming edge.
UnweightedHyperarc bestEdge = CreateEdge(v.hypothesis);
boost::shared_ptr<Derivation> d(new Derivation(bestEdge));
v.candidates.push(d);
v.seen.insert(d);
// Create derivations for the rest of v's incoming edges.
const ChartArcList *arcList = v.hypothesis.GetArcList();
if (arcList) {
for (std::size_t i = 0; i < arcList->size(); ++i) {
const ChartHypothesis &recombinedHypo = *(*arcList)[i];
UnweightedHyperarc edge = CreateEdge(recombinedHypo);
boost::shared_ptr<Derivation> d(new Derivation(edge));
v.candidates.push(d);
v.seen.insert(d);
}
}
}
// Look for the vertex corresponding to a given ChartHypothesis, creating
// a new one if necessary.
boost::shared_ptr<ChartKBestExtractor::Vertex>
@ -174,66 +149,110 @@ ChartKBestExtractor::FindOrCreateVertex(const ChartHypothesis &h)
return sp; // Vertex was already in m_vertexMap.
}
sp.reset(new Vertex(h));
// Create the 1-best derivation and add it to the vertex's kBestList.
UnweightedHyperarc bestEdge;
bestEdge.head = sp;
const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
bestEdge.tail.resize(prevHypos.size());
for (std::size_t i = 0; i < prevHypos.size(); ++i) {
const ChartHypothesis *prevHypo = prevHypos[i];
bestEdge.tail[i] = FindOrCreateVertex(*prevHypo);
}
boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
std::pair<DerivationSet::iterator, bool> q =
m_derivations.insert(bestDerivation);
assert(q.second);
sp->kBestList.push_back(bestDerivation);
return sp;
}
// Create the 1-best derivation for each edge in BS(v) (except the best one)
// and add it to v's candidate queue.
void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
{
// Create derivations for all of v's incoming edges except the best. This
// means everything in v.hypothesis.GetArcList() and not the edge defined
// by v.hypothesis itself. The 1-best derivation for that edge will already
// have been created.
const ChartArcList *arcList = v.hypothesis.GetArcList();
if (arcList) {
for (std::size_t i = 0; i < arcList->size(); ++i) {
const ChartHypothesis &recombinedHypo = *(*arcList)[i];
boost::shared_ptr<Vertex> w = FindOrCreateVertex(recombinedHypo);
assert(w->kBestList.size() == 1);
v.candidates.push(w->kBestList[0]);
}
}
}
// Lazily fill v's k-best list.
void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
std::size_t globalK)
{
// If this is the first visit to vertex v then initialize the priority queue.
if (v.visited == false) {
// The 1-best derivation should already be in v's k-best list.
assert(v.kBestList.size() == 1);
// Initialize v's priority queue.
GetCandidates(v, globalK);
v.visited = true;
}
// Add derivations to the k-best list until it contains k or there are none
// left to add.
while (v.kBestList.size() < k) {
if (!v.kBestList.empty()) {
// Update the priority queue by adding the successors of the last
// derivation (unless they've been seen before).
const Derivation &d = *v.kBestList.back();
LazyNext(v, d, globalK);
}
assert(!v.kBestList.empty());
// Update the priority queue by adding the successors of the last
// derivation (unless they've been seen before).
boost::shared_ptr<Derivation> d(v.kBestList.back());
LazyNext(v, *d, globalK);
// Check if there are any derivations left in the queue.
if (v.candidates.empty()) {
break;
}
// Get the next best derivation and delete it from the queue.
boost::shared_ptr<Derivation> d = v.candidates.top();
boost::weak_ptr<Derivation> next = v.candidates.top();
v.candidates.pop();
// Add it to the k-best list.
v.kBestList.push_back(d);
v.kBestList.push_back(next);
}
}
// Create the neighbours of Derivation d and add them to v's candidate queue.
void ChartKBestExtractor::LazyNext(Vertex &v, const Derivation &d,
std::size_t globalK)
{
// Create the neighbours of Derivation d.
for (std::size_t i = 0; i < d.backPointers.size(); ++i) {
Vertex &predVertex = *d.edge.tail[i];
// Ensure that predVertex's k-best list contains enough derivations.
for (std::size_t i = 0; i < d.edge.tail.size(); ++i) {
Vertex &pred = *d.edge.tail[i];
// Ensure that pred's k-best list contains enough derivations.
std::size_t k = d.backPointers[i] + 2;
LazyKthBest(predVertex, k, globalK);
if (predVertex.kBestList.size() < k) {
// predVertex's derivations have been exhausted.
LazyKthBest(pred, k, globalK);
if (pred.kBestList.size() < k) {
// pred's derivations have been exhausted.
continue;
}
// Create the neighbour.
boost::shared_ptr<Derivation> next(new Derivation(d, i));
// Check if it has been created before.
std::pair<Vertex::DerivationSet::iterator, bool> p = v.seen.insert(next);
std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next);
if (p.second) {
v.candidates.push(next); // Haven't previously seen it.
}
}
}
// Construct a Derivation corresponding to a ChartHypothesis.
// Construct the 1-best Derivation that ends at edge e.
ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
{
edge = e;
backPointers.resize(edge.tail.size(), 0);
std::size_t arity = edge.tail.size();
backPointers.resize(arity, 0);
subderivations.reserve(arity);
for (std::size_t i = 0; i < arity; ++i) {
const Vertex &pred = *edge.tail[i];
assert(pred.kBestList.size() >= 1);
boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
subderivations.push_back(sub);
}
scoreBreakdown = edge.head->hypothesis.GetScoreBreakdown();
score = edge.head->hypothesis.GetTotalScore();
}
@ -244,14 +263,16 @@ ChartKBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
edge.head = d.edge.head;
edge.tail = d.edge.tail;
backPointers = d.backPointers;
subderivations = d.subderivations;
std::size_t j = ++backPointers[i];
scoreBreakdown = d.scoreBreakdown;
// Deduct the score of the old subderivation.
const Derivation &oldSubderivation = *(edge.tail[i]->kBestList[j-1]);
scoreBreakdown.MinusEquals(oldSubderivation.scoreBreakdown);
scoreBreakdown.MinusEquals(subderivations[i]->scoreBreakdown);
// Update the subderivation pointer.
boost::shared_ptr<Derivation> newSub(edge.tail[i]->kBestList[j]);
subderivations[i] = newSub;
// Add the score of the new subderivation.
const Derivation &newSubderivation = *(edge.tail[i]->kBestList[j]);
scoreBreakdown.PlusEquals(newSubderivation.scoreBreakdown);
scoreBreakdown.PlusEquals(subderivations[i]->scoreBreakdown);
score = scoreBreakdown.GetWeightedScore();
}

View File

@ -24,6 +24,7 @@
#include "ScoreComponentCollection.h"
#include <boost/unordered_set.hpp>
#include <boost/weak_ptr.hpp>
#include <queue>
#include <vector>
@ -53,17 +54,46 @@ public:
UnweightedHyperarc edge;
std::vector<std::size_t> backPointers;
std::vector<boost::shared_ptr<Derivation> > subderivations;
ScoreComponentCollection scoreBreakdown;
float score;
};
struct DerivationOrderer {
bool operator()(const boost::shared_ptr<Derivation> &d1,
const boost::shared_ptr<Derivation> &d2) const {
return d1->score < d2->score;
bool operator()(const boost::weak_ptr<Derivation> &d1,
const boost::weak_ptr<Derivation> &d2) const {
boost::shared_ptr<Derivation> s1(d1);
boost::shared_ptr<Derivation> s2(d2);
return s1->score < s2->score;
}
};
struct Vertex {
typedef std::priority_queue<boost::weak_ptr<Derivation>,
std::vector<boost::weak_ptr<Derivation> >,
DerivationOrderer> DerivationQueue;
Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
const ChartHypothesis &hypothesis;
std::vector<boost::weak_ptr<Derivation> > kBestList;
DerivationQueue candidates;
bool visited;
};
typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
// Extract the k-best list from the search hypergraph given the full, sorted
// list of top-level vertices.
void Extract(const std::vector<const ChartHypothesis*> &topHypos,
std::size_t k, KBestVec &);
static Phrase GetOutputPhrase(const Derivation &);
private:
typedef boost::unordered_map<const ChartHypothesis *,
boost::shared_ptr<Vertex> > VertexMap;
struct DerivationHasher {
std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
std::size_t seed = 0;
@ -83,36 +113,8 @@ public:
}
};
struct Vertex {
typedef std::priority_queue<boost::shared_ptr<Derivation>,
std::vector<boost::shared_ptr<Derivation> >,
DerivationOrderer> DerivationQueue;
typedef boost::unordered_set<boost::shared_ptr<Derivation>,
DerivationHasher,
DerivationEqualityPred> DerivationSet;
Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
const ChartHypothesis &hypothesis;
std::vector<boost::shared_ptr<Derivation> > kBestList;
DerivationQueue candidates;
DerivationSet seen;
bool visited;
};
typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
// Extract the k-best list from the search hypergraph given the full, sorted
// list of top-level vertices.
void Extract(const std::vector<const ChartHypothesis*> &topHypos,
std::size_t k, KBestVec &);
static Phrase GetOutputPhrase(const Derivation &);
private:
typedef boost::unordered_map<const ChartHypothesis *,
boost::shared_ptr<Vertex> > VertexMap;
typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
DerivationEqualityPred> DerivationSet;
UnweightedHyperarc CreateEdge(const ChartHypothesis &);
boost::shared_ptr<Vertex> FindOrCreateVertex(const ChartHypothesis &);
@ -121,6 +123,7 @@ private:
void LazyNext(Vertex &, const Derivation &, std::size_t);
VertexMap m_vertexMap;
DerivationSet m_derivations;
};
} // namespace Moses

View File

@ -23,6 +23,7 @@
#include "ChartManager.h"
#include "ChartCell.h"
#include "ChartHypothesis.h"
#include "ChartKBestExtractor.h"
#include "ChartTranslationOptions.h"
#include "ChartTrellisDetourQueue.h"
#include "ChartTrellisNode.h"
@ -261,6 +262,65 @@ void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDi
}
}
/** Calculate the n-best paths through the output hypergraph.
* Return the list of paths with the variable ret
* \param n how may paths to return
* \param ret return argument
* \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths)
*/
void ChartManager::CalcNBest(
std::size_t n,
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
bool onlyDistinct) const
{
nBestList.clear();
if (n == 0 || m_source.GetSize() == 0) {
return;
}
// Get the list of top-level hypotheses, sorted by score.
WordsRange range(0, m_source.GetSize()-1);
const ChartCell &lastCell = m_hypoStackColl.Get(range);
boost::scoped_ptr<const std::vector<const ChartHypothesis*> > topLevelHypos(
lastCell.GetAllSortedHypotheses());
if (!topLevelHypos) {
return;
}
ChartKBestExtractor extractor;
if (!onlyDistinct) {
// Return the n-best list as is, including duplicate translations.
extractor.Extract(*topLevelHypos, n, nBestList);
return;
}
// Determine how many derivations to extract. If the n-best list is
// restricted to distinct translations then this limit should be bigger
// than n. The n-best factor determines how much bigger the limit should be,
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
// too many translations are identical.
const StaticData &staticData = StaticData::Instance();
const std::size_t nBestFactor = staticData.GetNBestFactor();
std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
// Extract the derivations.
ChartKBestExtractor::KBestVec bigList;
bigList.reserve(numDerivations);
extractor.Extract(*topLevelHypos, numDerivations, bigList);
// Copy derivations into nBestList, skipping ones with repeated translations.
std::set<Phrase> distinct;
for (ChartKBestExtractor::KBestVec::const_iterator p = bigList.begin();
nBestList.size() < n && p != bigList.end(); ++p) {
boost::shared_ptr<ChartKBestExtractor::Derivation> derivation = *p;
Phrase translation = ChartKBestExtractor::GetOutputPhrase(*derivation);
if (distinct.insert(translation).second) {
nBestList.push_back(derivation);
}
}
}
void ChartManager::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const
{
size_t size = m_source.GetSize();

View File

@ -30,6 +30,7 @@
#include "SentenceStats.h"
#include "ChartTranslationOptionList.h"
#include "ChartParser.h"
#include "ChartKBestExtractor.h"
#include <boost/shared_ptr.hpp>
@ -71,6 +72,7 @@ public:
void AddXmlChartOptions();
const ChartHypothesis *GetBestHypothesis() const;
void CalcNBest(size_t count, ChartTrellisPathList &ret, bool onlyDistinct=0) const;
void CalcNBest(size_t n, std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList, bool onlyDistinct=false) const;
void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
void FindReachableHypotheses( const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable ) const; /* auxilliary function for GetSearchGraph */

View File

@ -102,8 +102,8 @@ Parameter::Parameter()
AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
AddParam("unpruned-search-graph", "usg", "When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses");
AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF)");
AddParam("output-search-graph-hypergraph", "Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder)");
AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF) - the flag should be followed byy a directory name, which must exist");
AddParam("output-search-graph-hypergraph", "Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder). This flag is followed by 3 values: 'true (gz|txt|bz) directory-name'");
AddParam("include-lhs-in-search-graph", "lhssg", "When outputting chart search graph, include the label of the LHS of the rule (useful when using syntax)");
#ifdef HAVE_PROTOBUF
AddParam("output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");

View File

@ -0,0 +1,61 @@
#!/usr/bin/perl
use strict;
use Getopt::Long "GetOptions";
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
sub trim($);
sub DeleteScore;
my $keepScoresStr;
GetOptions(
"keep-scores=s" => \$keepScoresStr
) or exit(1);
my @keepScores = split(/,/, $keepScoresStr);
#MAIN LOOP
while (my $line = <STDIN>) {
chomp($line);
#print STDERR "line=$line\n";
my @toks = split(/\|/, $line);
my @scores = split(/ /, $toks[6]);
$toks[6] = DeleteScore($toks[6], \@keepScores);
# output
print $toks[0];
for (my $i = 1; $i < scalar(@toks); ++$i) {
print "|" .$toks[$i];
}
print "\n";
}
######################
# Perl trim function to remove whitespace from the start and end of the string
sub trim($) {
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
sub DeleteScore
{
my $string = $_[0];
my @keepScores = @{$_[1]};
$string = trim($string);
my @toks = split(/ /, $string);
$string = "";
for (my $i = 0; $i < scalar(@keepScores); ++$i) {
$string .= $toks[ $keepScores[$i] ] ." ";
}
$string = " " .$string;
return $string;
}