mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-06 03:33:37 +03:00
9e88f794e6
This performs some minor transformations to Egret forests: escaping of Moses special characters; removal of "^g" suffixes from constituent labels; and marking of slash/hyphen split points (using @ characters).
47 lines
959 B
C++
47 lines
959 B
C++
#pragma once
|
|
|
|
#include <istream>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "SplitPoint.h"
|
|
|
|
namespace MosesTraining
|
|
{
|
|
namespace Syntax
|
|
{
|
|
namespace PostprocessEgretForests
|
|
{
|
|
|
|
class SplitPointFileParser {
|
|
public:
|
|
struct Entry {
|
|
std::vector<SplitPoint> splitPoints;
|
|
};
|
|
|
|
SplitPointFileParser();
|
|
SplitPointFileParser(std::istream &);
|
|
|
|
const Entry &operator*() const { return m_entry; }
|
|
const Entry *operator->() const { return &m_entry; }
|
|
|
|
SplitPointFileParser &operator++();
|
|
|
|
friend bool operator==(const SplitPointFileParser &,
|
|
const SplitPointFileParser &);
|
|
|
|
friend bool operator!=(const SplitPointFileParser &,
|
|
const SplitPointFileParser &);
|
|
|
|
private:
|
|
void ParseLine(const std::string &, std::vector<SplitPoint> &);
|
|
|
|
Entry m_entry;
|
|
std::istream *m_input;
|
|
std::string m_tmpLine;
|
|
};
|
|
|
|
} // namespace PostprocessEgretForests
|
|
} // namespace Syntax
|
|
} // namespace MosesTraining
|