mosesdecoder/phrase-extract/postprocess-egret-forests/SplitPointFileParser.h
Phil Williams 9e88f794e6 Add phrase-extract/postprocess-egret-forests
This performs some minor transformations to Egret forests: escaping of
Moses special characters; removal of "^g" suffixes from constituent labels;
and marking of slash/hyphen split points (using @ characters).
2015-03-10 13:51:30 +00:00

47 lines
959 B
C++

#pragma once
#include <istream>
#include <string>
#include <vector>
#include "SplitPoint.h"
namespace MosesTraining
{
namespace Syntax
{
namespace PostprocessEgretForests
{
class SplitPointFileParser {
public:
struct Entry {
std::vector<SplitPoint> splitPoints;
};
SplitPointFileParser();
SplitPointFileParser(std::istream &);
const Entry &operator*() const { return m_entry; }
const Entry *operator->() const { return &m_entry; }
SplitPointFileParser &operator++();
friend bool operator==(const SplitPointFileParser &,
const SplitPointFileParser &);
friend bool operator!=(const SplitPointFileParser &,
const SplitPointFileParser &);
private:
void ParseLine(const std::string &, std::vector<SplitPoint> &);
Entry m_entry;
std::istream *m_input;
std::string m_tmpLine;
};
} // namespace PostprocessEgretForests
} // namespace Syntax
} // namespace MosesTraining