mosesdecoder/moses/Syntax/S2T/PChart.h
Phil Williams 5240c430ce Merge s2t branch
This adds a new string-to-tree decoder, which can be enabled with the -s2t
option.  It's intended to be faster and simpler than the generic chart
decoder, and is designed to support lattice input (still WIP).  For a en-de
system trained on WMT14 data, it's approximately 40% faster in practice.

For background information, see the decoding section of the EMNLP tutorial
on syntax-based MT:

  http://www.emnlp2014.org/tutorials/5_notes.pdf

Some features are not implemented yet, including support for internal tree
structure and soft source-syntactic constraints.
2014-11-04 13:13:56 +00:00

90 lines
2.4 KiB
C++

#pragma once
#include <vector>
#include <boost/unordered_map.hpp>
#include "moses/Syntax/NonTerminalMap.h"
#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/SymbolEqualityPred.h"
#include "moses/Syntax/SymbolHasher.h"
#include "moses/Word.h"
namespace Moses
{
namespace Syntax
{
namespace S2T
{
class PChart
{
public:
struct Cell
{
typedef boost::unordered_map<Word, PVertex, SymbolHasher,
SymbolEqualityPred> TMap;
typedef NonTerminalMap<PVertex> NMap;
// Collection of terminal vertices (keyed by terminal symbol).
TMap terminalVertices;
// Collection of non-terminal vertices (keyed by non-terminal symbol).
NMap nonTerminalVertices;
};
struct CompressedItem {
std::size_t end;
const PVertex *vertex;
};
typedef std::vector<std::vector<CompressedItem> > CompressedMatrix;
PChart(std::size_t width, bool maintainCompressedChart);
~PChart();
std::size_t GetWidth() const { return m_cells.size(); }
const Cell &GetCell(std::size_t start, std::size_t end) const {
return m_cells[start][end];
}
// Insert the given PVertex and return a reference to the inserted object.
PVertex &AddVertex(const PVertex &v) {
const std::size_t start = v.span.GetStartPos();
const std::size_t end = v.span.GetEndPos();
Cell &cell = m_cells[start][end];
// If v is a terminal vertex add it to the cell's terminalVertices map.
if (!v.symbol.IsNonTerminal()) {
Cell::TMap::value_type x(v.symbol, v);
std::pair<Cell::TMap::iterator, bool> ret =
cell.terminalVertices.insert(x);
return ret.first->second;
}
// If v is a non-terminal vertex add it to the cell's nonTerminalVertices
// map and update the compressed chart (if enabled).
std::pair<Cell::NMap::Iterator, bool> result =
cell.nonTerminalVertices.Insert(v.symbol, v);
if (result.second && m_compressedChart) {
CompressedItem item;
item.end = end;
item.vertex = &(result.first->second);
(*m_compressedChart)[start][v.symbol[0]->GetId()].push_back(item);
}
return result.first->second;
}
const CompressedMatrix &GetCompressedMatrix(std::size_t start) const {
return (*m_compressedChart)[start];
}
private:
typedef std::vector<CompressedMatrix> CompressedChart;
std::vector<std::vector<Cell> > m_cells;
CompressedChart *m_compressedChart;
};
} // S2T
} // Syntax
} // Moses