mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
5240c430ce
This adds a new string-to-tree decoder, which can be enabled with the -s2t option. It's intended to be faster and simpler than the generic chart decoder, and is designed to support lattice input (still WIP). For a en-de system trained on WMT14 data, it's approximately 40% faster in practice. For background information, see the decoding section of the EMNLP tutorial on syntax-based MT: http://www.emnlp2014.org/tutorials/5_notes.pdf Some features are not implemented yet, including support for internal tree structure and soft source-syntactic constraints.
90 lines
2.4 KiB
C++
90 lines
2.4 KiB
C++
#pragma once
|
|
|
|
#include <vector>
|
|
|
|
#include <boost/unordered_map.hpp>
|
|
|
|
#include "moses/Syntax/NonTerminalMap.h"
|
|
#include "moses/Syntax/PVertex.h"
|
|
#include "moses/Syntax/SymbolEqualityPred.h"
|
|
#include "moses/Syntax/SymbolHasher.h"
|
|
#include "moses/Word.h"
|
|
|
|
namespace Moses
|
|
{
|
|
namespace Syntax
|
|
{
|
|
namespace S2T
|
|
{
|
|
|
|
class PChart
|
|
{
|
|
public:
|
|
struct Cell
|
|
{
|
|
typedef boost::unordered_map<Word, PVertex, SymbolHasher,
|
|
SymbolEqualityPred> TMap;
|
|
typedef NonTerminalMap<PVertex> NMap;
|
|
// Collection of terminal vertices (keyed by terminal symbol).
|
|
TMap terminalVertices;
|
|
// Collection of non-terminal vertices (keyed by non-terminal symbol).
|
|
NMap nonTerminalVertices;
|
|
};
|
|
|
|
struct CompressedItem {
|
|
std::size_t end;
|
|
const PVertex *vertex;
|
|
};
|
|
|
|
typedef std::vector<std::vector<CompressedItem> > CompressedMatrix;
|
|
|
|
PChart(std::size_t width, bool maintainCompressedChart);
|
|
|
|
~PChart();
|
|
|
|
std::size_t GetWidth() const { return m_cells.size(); }
|
|
|
|
const Cell &GetCell(std::size_t start, std::size_t end) const {
|
|
return m_cells[start][end];
|
|
}
|
|
|
|
// Insert the given PVertex and return a reference to the inserted object.
|
|
PVertex &AddVertex(const PVertex &v) {
|
|
const std::size_t start = v.span.GetStartPos();
|
|
const std::size_t end = v.span.GetEndPos();
|
|
Cell &cell = m_cells[start][end];
|
|
// If v is a terminal vertex add it to the cell's terminalVertices map.
|
|
if (!v.symbol.IsNonTerminal()) {
|
|
Cell::TMap::value_type x(v.symbol, v);
|
|
std::pair<Cell::TMap::iterator, bool> ret =
|
|
cell.terminalVertices.insert(x);
|
|
return ret.first->second;
|
|
}
|
|
// If v is a non-terminal vertex add it to the cell's nonTerminalVertices
|
|
// map and update the compressed chart (if enabled).
|
|
std::pair<Cell::NMap::Iterator, bool> result =
|
|
cell.nonTerminalVertices.Insert(v.symbol, v);
|
|
if (result.second && m_compressedChart) {
|
|
CompressedItem item;
|
|
item.end = end;
|
|
item.vertex = &(result.first->second);
|
|
(*m_compressedChart)[start][v.symbol[0]->GetId()].push_back(item);
|
|
}
|
|
return result.first->second;
|
|
}
|
|
|
|
const CompressedMatrix &GetCompressedMatrix(std::size_t start) const {
|
|
return (*m_compressedChart)[start];
|
|
}
|
|
|
|
private:
|
|
typedef std::vector<CompressedMatrix> CompressedChart;
|
|
|
|
std::vector<std::vector<Cell> > m_cells;
|
|
CompressedChart *m_compressedChart;
|
|
};
|
|
|
|
} // S2T
|
|
} // Syntax
|
|
} // Moses
|