mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-08-17 23:40:50 +03:00
49 lines
1.4 KiB
C++
49 lines
1.4 KiB
C++
#pragma once
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "syntax-common/tree_fragment_tokenizer.h"
|
|
|
|
#include "RuleSymbol.h"
|
|
|
|
namespace MosesTraining
|
|
{
|
|
namespace Syntax
|
|
{
|
|
namespace ScoreStsg
|
|
{
|
|
|
|
// Stores one half of a STSG rule, as represented in the extract file. The
|
|
// original string is stored as the member 'string', along with its token
|
|
// sequence ('tokens') and frontier symbol sequence ('frontierSymbols'). Note
|
|
// that 'tokens' and 'frontierSymbols' use StringPiece objects that depend on
|
|
// the original string. Therefore changing the value of 'string' invalidates
|
|
// both 'tokens' and 'frontierSymbols'.
|
|
struct TokenizedRuleHalf {
|
|
bool IsFullyLexical() const;
|
|
bool IsString() const;
|
|
bool IsTree() const;
|
|
|
|
// The rule half as it appears in the extract file, except with any trailing
|
|
// or leading spaces removed (here a space is defined as a blank or a tab).
|
|
std::string string;
|
|
|
|
// The token sequence for the string.
|
|
std::vector<TreeFragmentToken> tokens;
|
|
|
|
// The frontier symbols of the rule half. For example:
|
|
//
|
|
// string: "[VP [VBN] [PP [IN] [NP [DT] [JJ positive] [NN light]]]]"
|
|
// frontier: ("VBN",t), ("IN",t), ("DT",t), ("positive",f), ("light",f)
|
|
//
|
|
// string: "[X] [X] Sinne [X]"
|
|
// frontier: ("X",t), ("X",t), ("Sinne",f), ("X",t)
|
|
//
|
|
std::vector<RuleSymbol> frontierSymbols;
|
|
};
|
|
|
|
} // namespace ScoreStsg
|
|
} // namespace Syntax
|
|
} // namespace MosesTraining
|