mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
extract-ghkm: add --SentenceOffset option
This should behave the same as the --SentenceOffset option for extract-rules. The extract-parallel.perl script expects the rule extractor to have this option.
This commit is contained in:
parent
289a9ea54f
commit
0851a4d113
@ -90,7 +90,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
||||
std::string alignmentLine;
|
||||
XmlTreeParser xmlTreeParser(labelSet, topLabelSet);
|
||||
ScfgRuleWriter writer(fwdExtractStream, invExtractStream, options);
|
||||
size_t lineNum = 0;
|
||||
size_t lineNum = options.sentenceOffset;
|
||||
while (true) {
|
||||
std::getline(targetStream, targetLine);
|
||||
std::getline(sourceStream, sourceLine);
|
||||
@ -289,6 +289,9 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
|
||||
"extract minimal rules only")
|
||||
("PCFG",
|
||||
"include score based on PCFG scores in target corpus")
|
||||
("SentenceOffset",
|
||||
po::value(&options.sentenceOffset)->default_value(options.sentenceOffset),
|
||||
"set sentence number offset if processing split corpus")
|
||||
("UnknownWordLabel",
|
||||
po::value(&options.unknownWordFile),
|
||||
"write unknown word labels to named file")
|
||||
|
@ -38,6 +38,7 @@ struct Options {
|
||||
, maxScope(3)
|
||||
, minimal(false)
|
||||
, pcfg(false)
|
||||
, sentenceOffset(0)
|
||||
, unpairedExtractFormat(false) {}
|
||||
|
||||
// Positional options
|
||||
@ -57,6 +58,7 @@ struct Options {
|
||||
int maxScope;
|
||||
bool minimal;
|
||||
bool pcfg;
|
||||
int sentenceOffset;
|
||||
bool unpairedExtractFormat;
|
||||
std::string unknownWordFile;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user