mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-27 11:59:52 +03:00
9e88f794e6
This performs some minor transformations to Egret forests: escaping of Moses special characters; removal of "^g" suffixes from constituent labels; and marking of slash/hyphen split points (using @ characters).
28 lines
464 B
C++
28 lines
464 B
C++
#pragma once
|
|
|
|
#include <vector>
|
|
#include <string>
|
|
|
|
#include "Forest.h"
|
|
|
|
namespace MosesTraining
|
|
{
|
|
namespace Syntax
|
|
{
|
|
namespace PostprocessEgretForests
|
|
{
|
|
|
|
struct SplitPoint {
|
|
int tokenPos;
|
|
int charPos;
|
|
std::string connector;
|
|
};
|
|
|
|
void MarkSplitPoints(const std::vector<SplitPoint> &, Forest &);
|
|
|
|
void MarkSplitPoints(const std::vector<SplitPoint> &, std::string &);
|
|
|
|
} // namespace PostprocessEgretForests
|
|
} // namespace Syntax
|
|
} // namespace MosesTraining
|