use square brackets with output format '--brackets' (for cleaner escaping and consistency with decoder tree output)

This commit is contained in:
Rico Sennrich 2014-09-15 14:37:52 +01:00
parent 7e359a8892
commit 0861b464c5
2 changed files with 10 additions and 15 deletions

View File

@ -57,7 +57,7 @@ Parameter::Parameter()
AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
AddParam("max-phrase-length", "maximum phrase length (default 20)");
AddParam("n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
AddParam("n-best-trees", "Write n-best target-side trees (additional to normal n-best-list) to file {n-best-list-file}.trees");
AddParam("n-best-trees", "Write n-best target-side trees to n-best-list");
AddParam("lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
AddParam("n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
AddParam("print-all-derivations", "to print all derivations in search graph");

View File

@ -55,6 +55,8 @@ def escape_special_chars(line):
line = line.replace('\'',''') # xml
line = line.replace('"','"') # xml
line = line.replace('[','[') # syntax non-terminal
line = line.replace(']',']') # syntax non-terminal
return line
@ -91,11 +93,11 @@ def write(sentence, output_format='xml'):
out = create_brackets(0,sentence)
out = out.replace('|','|') # factor separator
out = out.replace('[','[') # syntax non-terminal
out = out.replace(']',']') # syntax non-terminal
out = out.replace(''',''') # lxml is buggy if input is escaped
out = out.replace('"','"') # lxml is buggy if input is escaped
out = out.replace('[','[') # lxml is buggy if input is escaped
out = out.replace(']',']') # lxml is buggy if input is escaped
print(out)
@ -138,9 +140,9 @@ def create_subtree(position, sentence):
def create_brackets(position, sentence):
if position:
element = "( " + sentence[position].proj_func + ' '
element = "[ " + sentence[position].proj_func + ' '
else:
element = "( sent "
element = "[ sent "
for i in range(1,position):
if sentence[i].proj_head == position:
@ -148,26 +150,19 @@ def create_brackets(position, sentence):
if position:
word = sentence[position].word
if word == ')':
word = 'RBR'
elif word == '(':
word = 'LBR'
tag = sentence[position].tag
if tag == '$(':
tag = '$BR'
if preterminals:
element += '( ' + tag + ' ' + word + ' ) '
element += '[ ' + tag + ' ' + word + ' ] '
else:
element += word + ' ) '
element += word + ' ] '
for i in range(position, len(sentence)):
if i and sentence[i].proj_head == position:
element += create_brackets(i, sentence)
if preterminals or not position:
element += ') '
element += '] '
return element