add parameter '--eval-sacrebleu-args' for translation task

This commit is contained in:
Bing Han 2022-09-02 03:44:26 +08:00
parent b4001184f4
commit 2e57d1f8c2

View File

@ -259,6 +259,13 @@ class TranslationConfig(FairseqDataclass):
"argparse_const": "@@ ",
},
)
eval_sacrebleu_args: Optional[str] = field(
default="{}",
metadata={
"help": "args for sacrebleu.corpus_bleu, as JSON string; "
'e.g., \'{"tokenize": "zh", "lowercase": true}\''
},
)
eval_bleu_print_samples: bool = field(
default=False, metadata={"help": "print sample generations during validation"}
)
@ -491,7 +498,9 @@ class TranslationTask(FairseqTask):
if self.cfg.eval_bleu_print_samples:
logger.info("example hypothesis: " + hyps[0])
logger.info("example reference: " + refs[0])
sacrebleu_args = json.loads(self.cfg.eval_sacrebleu_args)
if self.cfg.eval_tokenized_bleu:
return sacrebleu.corpus_bleu(hyps, [refs], tokenize="none")
else:
return sacrebleu.corpus_bleu(hyps, [refs])
# force set the sacrebleu tokenizer to "none" to avoid double-tokenization
sacrebleu_args["tokenize"] = "none"
return sacrebleu.corpus_bleu(hyps, [refs], **sacrebleu_args)