extract-ghkm: add --SentenceOffset option

This should behave the same as the --SentenceOffset option for
extract-rules.  The extract-parallel.perl script expects the rule
extractor to have this option.
This commit is contained in:
Phil Williams 2012-10-03 20:04:09 +01:00
parent 289a9ea54f
commit 0851a4d113
2 changed files with 6 additions and 1 deletions

View File

@ -90,7 +90,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
std::string alignmentLine;
XmlTreeParser xmlTreeParser(labelSet, topLabelSet);
ScfgRuleWriter writer(fwdExtractStream, invExtractStream, options);
size_t lineNum = 0;
size_t lineNum = options.sentenceOffset;
while (true) {
std::getline(targetStream, targetLine);
std::getline(sourceStream, sourceLine);
@ -289,6 +289,9 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
"extract minimal rules only")
("PCFG",
"include score based on PCFG scores in target corpus")
("SentenceOffset",
po::value(&options.sentenceOffset)->default_value(options.sentenceOffset),
"set sentence number offset if processing split corpus")
("UnknownWordLabel",
po::value(&options.unknownWordFile),
"write unknown word labels to named file")

View File

@ -38,6 +38,7 @@ struct Options {
, maxScope(3)
, minimal(false)
, pcfg(false)
, sentenceOffset(0)
, unpairedExtractFormat(false) {}
// Positional options
@ -57,6 +58,7 @@ struct Options {
int maxScope;
bool minimal;
bool pcfg;
int sentenceOffset;
bool unpairedExtractFormat;
std::string unknownWordFile;
};