mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
use square brackets with output format '--brackets' (for cleaner escaping and consistency with decoder tree output)
This commit is contained in:
parent
7e359a8892
commit
0861b464c5
@ -57,7 +57,7 @@ Parameter::Parameter()
|
||||
AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
|
||||
AddParam("max-phrase-length", "maximum phrase length (default 20)");
|
||||
AddParam("n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
|
||||
AddParam("n-best-trees", "Write n-best target-side trees (additional to normal n-best-list) to file {n-best-list-file}.trees");
|
||||
AddParam("n-best-trees", "Write n-best target-side trees to n-best-list");
|
||||
AddParam("lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
|
||||
AddParam("n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
|
||||
AddParam("print-all-derivations", "to print all derivations in search graph");
|
||||
|
@ -55,6 +55,8 @@ def escape_special_chars(line):
|
||||
|
||||
line = line.replace('\'',''') # xml
|
||||
line = line.replace('"','"') # xml
|
||||
line = line.replace('[','[') # syntax non-terminal
|
||||
line = line.replace(']',']') # syntax non-terminal
|
||||
|
||||
return line
|
||||
|
||||
@ -91,11 +93,11 @@ def write(sentence, output_format='xml'):
|
||||
out = create_brackets(0,sentence)
|
||||
|
||||
out = out.replace('|','|') # factor separator
|
||||
out = out.replace('[','[') # syntax non-terminal
|
||||
out = out.replace(']',']') # syntax non-terminal
|
||||
|
||||
out = out.replace(''',''') # lxml is buggy if input is escaped
|
||||
out = out.replace('"','"') # lxml is buggy if input is escaped
|
||||
out = out.replace('[','[') # lxml is buggy if input is escaped
|
||||
out = out.replace(']',']') # lxml is buggy if input is escaped
|
||||
|
||||
print(out)
|
||||
|
||||
@ -138,9 +140,9 @@ def create_subtree(position, sentence):
|
||||
def create_brackets(position, sentence):
|
||||
|
||||
if position:
|
||||
element = "( " + sentence[position].proj_func + ' '
|
||||
element = "[ " + sentence[position].proj_func + ' '
|
||||
else:
|
||||
element = "( sent "
|
||||
element = "[ sent "
|
||||
|
||||
for i in range(1,position):
|
||||
if sentence[i].proj_head == position:
|
||||
@ -148,26 +150,19 @@ def create_brackets(position, sentence):
|
||||
|
||||
if position:
|
||||
word = sentence[position].word
|
||||
if word == ')':
|
||||
word = 'RBR'
|
||||
elif word == '(':
|
||||
word = 'LBR'
|
||||
|
||||
tag = sentence[position].tag
|
||||
if tag == '$(':
|
||||
tag = '$BR'
|
||||
|
||||
if preterminals:
|
||||
element += '( ' + tag + ' ' + word + ' ) '
|
||||
element += '[ ' + tag + ' ' + word + ' ] '
|
||||
else:
|
||||
element += word + ' ) '
|
||||
element += word + ' ] '
|
||||
|
||||
for i in range(position, len(sentence)):
|
||||
if i and sentence[i].proj_head == position:
|
||||
element += create_brackets(i, sentence)
|
||||
|
||||
if preterminals or not position:
|
||||
element += ') '
|
||||
element += '] '
|
||||
|
||||
return element
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user