Merge github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2012-05-31 17:02:24 +01:00
commit c3d4615fc0
14 changed files with 365 additions and 202 deletions

View File

@ -0,0 +1,8 @@
ad af 500 1000
bd bf 5 10
der le 20285 102586
der NULL 12926 704917
gipfel sommet 3485 7322
pass col 419 2911
pass passeport 7 28
sitzung séance 14 59

View File

@ -0,0 +1,8 @@
af ad 500 1000
bf bd 5 10
col pass 419 615
le der 20285 113635
passeport pass 7 615
retrouvé NULL 34 1016136
séance sitzung 14 33
sommet gipfel 3485 5700

View File

@ -0,0 +1,8 @@
ad af 0.5
bd bf 0.5
der le 0.1977365
der NULL 0.0183369
gipfel sommet 0.4759629
pass col 0.1439368
pass passeport 0.2500000
sitzung séance 0.2372881

View File

@ -0,0 +1,8 @@
af ad 0.5
bf bd 0.5
col pass 0.6813008
le der 0.1785101
passeport pass 0.0113821
retrouvé NULL 0.0000335
séance sitzung 0.4242424
sommet gipfel 0.6114035

View File

@ -0,0 +1,8 @@
ad ||| af ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 1000 1000
bd ||| bf ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 10 10
der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00872768 0.0366795 0.611403 2.718 ||| 1-0 ||| 5808 518
der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.0284616 0.288889 0.121619 2.718 ||| 0-0 1-1 ||| 749 45
pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.143937 0.628866 0.681301 2.718 ||| 0-0 ||| 1875 582
pass ||| passeport retrouvé ||| 0.3 0.3 0.3 0.3 0.5 0.25 0.00171821 3.813e-07 2.718 ||| 0-0 ||| 2 582
pass ||| passeport ||| 0.3 0.3 0.3 0.3 0.266667 0.25 0.00687285 0.0113821 2.718 ||| 0-0 ||| 15 582
sitzung ||| séance ||| 0.3 0.3 0.3 0.3 0.272727 0.237288 0.352941 0.424242 2.718 ||| 0-0 ||| 22 17

View File

@ -0,0 +1,8 @@
ad af 100 1000
bd bf 1 10
der le 150181 944391
der NULL 54483 3595140
gipfel sommet 3421 9342
pass col 2 70
pass passeport 73 379
sitzung séance 3441 5753

View File

@ -0,0 +1,8 @@
af ad 100 1000
bf bd 1 10
col pass 2 108
le der 150181 1356104
passeport pass 73 108
retrouvé NULL 43 6276240
séance sitzung 3441 6142
sommet gipfel 3421 4908

View File

@ -0,0 +1,8 @@
ad af 0.1
bd bf 0.1
der le 0.1590242
der NULL 0.0151546
gipfel sommet 0.366195
pass col 0.0285714
pass passeport 0.1926121
sitzung séance 0.5981227

View File

@ -0,0 +1,8 @@
af ad 0.1
bf bd 0.1
col pass 0.0185185
le der 0.1107445
passeport pass 0.6759259
retrouvé NULL 0.0000069
séance sitzung 0.5602410
sommet gipfel 0.6970253

View File

@ -0,0 +1,5 @@
ad ||| af ||| 0.6 0.6 0.6 0.6 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 1000 1000
bd ||| bf ||| 0.6 0.6 0.6 0.6 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 10 10
der pass ||| le passeport ||| 0.6 0.6 0.6 0.6 0.16 0.03063 0.4 0.0748551 2.718 ||| 0-0 1-1 ||| 25 10
pass ||| passeport ||| 0.6 0.6 0.6 0.6 0.28022 0.192612 0.607143 0.675926 2.718 ||| 0-0 ||| 182 84
sitzung ||| séance ||| 0.6 0.6 0.6 0.6 0.784521 0.598123 0.516654 0.560241 2.718 ||| 0-0 ||| 4251 6455

View File

@ -0,0 +1,9 @@
ad ||| af ||| 0.3 0.3 0.3 0.3 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 25332.4712297 1074.23173673
bd ||| bf ||| 0.3 0.3 0.3 0.3 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 253.324712297 10.7423173673
der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00686984 0.0366795 0.617135 2.718 ||| 1-0 ||| 5808.0 518.0
der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.023534 0.284201 0.0972183 2.718 ||| 0-0 1-1 ||| 749.0 45.7423173673
der pass ||| le passeport ||| 6e-10 6e-10 6e-10 6e-10 0.16 0.0329324 0.0064913 0.00303408 2.718 ||| 0-0 1-1 ||| 608.311780741 45.7423173673
pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.142393 0.6222 0.671744 2.718 ||| 0-0 ||| 1875.0 588.235465885
pass ||| passeport retrouvé ||| 0.3 0.3 0.3 0.3 0.5 0.199258 0.0017 5.11945e-07 2.718 ||| 0-0 ||| 2.0 588.235465885
pass ||| passeport ||| 0.3 0.3 0.3 0.3 0.280174 0.199258 0.0132359 0.0209644 2.718 ||| 0-0 ||| 4443.5097638 588.235465885
sitzung ||| séance ||| 0.3 0.3 0.3 0.3 0.784412 0.59168 0.511045 0.552002 2.718 ||| 0-0 ||| 103459.335197 496.165860589

View File

@ -1,9 +1,9 @@
ad ||| af ||| 0.5 0.398085 0.5 0.482814 2.718 ||| 0-0 ||| 1000.000001 1000.000001
bd ||| bf ||| 0.5 0.111367 0.5 0.172867 2.718 ||| 0-0 ||| 10.00000001 10.00000001
ad ||| af ||| 0.242966 0.398085 0.483231 0.482814 2.718 ||| 0-0 ||| 2797.86490081 1043.7557397
bd ||| bf ||| 0.102213 0.111367 0.174411 0.172867 2.718 ||| 0-0 ||| 1807.86490081 53.7557396976
der gipfel ||| sommet ||| 0.00327135 0.00863717 0.0366795 0.612073 2.718 ||| 1-0 ||| 5808.0 518.0
der pass ||| le col ||| 0.0173565 0.0260469 0.288889 0.113553 2.718 ||| 0-0 1-1 ||| 749.0 45.00000001
der pass ||| le passeport ||| 0.0064 0.0389201 8.88889e-12 0.0101009 2.718 ||| 0-0 1-1 ||| 2.5e-08 45.00000001
pass ||| col ||| 0.1952 0.131811 0.628866 0.63621 2.718 ||| 0-0 ||| 1875.0 582.000000084
pass ||| passeport retrouvé ||| 0.5 0.196956 0.00171821 1.89355e-06 2.718 ||| 0-0 ||| 2.0 582.000000084
pass ||| passeport ||| 0.266667 0.196956 0.00687285 0.0565932 2.718 ||| 0-0 ||| 15.000000182 582.000000084
sitzung ||| séance ||| 0.272727 0.545019 0.352941 0.502625 2.718 ||| 0-0 ||| 22.000004251 17.000006455
der pass ||| le col ||| 0.0173565 0.0260469 0.146469 0.113553 2.718 ||| 0-0 1-1 ||| 749.0 88.7557396976
der pass ||| le passeport ||| 0.16 0.0389201 0.197196 0.0101009 2.718 ||| 0-0 1-1 ||| 1797.86490081 88.7557396976
pass ||| col ||| 0.1952 0.131811 0.584893 0.63621 2.718 ||| 0-0 ||| 1875.0 625.755739698
pass ||| passeport retrouvé ||| 0.5 0.196956 0.00159806 1.89355e-06 2.718 ||| 0-0 ||| 2.0 625.755739698
pass ||| passeport ||| 0.280108 0.196956 0.0488465 0.0565932 2.718 ||| 0-0 ||| 1812.86490081 625.755739698
sitzung ||| séance ||| 0.778334 0.545019 0.470846 0.502625 2.718 ||| 0-0 ||| 1819.86490081 60.7557396976

View File

@ -0,0 +1,9 @@
ad ||| af ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 10000.0 5000.0
bd ||| bf ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 100.0 50.0
der gipfel ||| sommet ||| 0.15 0.15 0.15 0.15 0.00327135 0.00569336 0.0366795 0.651018 2.718 ||| 1-0 ||| 5808.0 518.0
der pass ||| le col ||| 0.15 0.15 0.15 0.15 0.0173565 0.0193836 0.152941 0.0675369 2.718 ||| 0-0 1-1 ||| 749.0 85.0
der pass ||| le passeport ||| 0.3 0.3 0.3 0.3 0.16 0.0307772 0.188235 0.0128336 2.718 ||| 0-0 1-1 ||| 225.0 85.0
pass ||| col ||| 0.15 0.15 0.15 0.15 0.1952 0.121573 0.398693 0.582296 2.718 ||| 0-0 ||| 1875.0 918.0
pass ||| passeport retrouvé ||| 0.15 0.15 0.15 0.15 0.5 0.193033 0.00108932 1.16835e-06 2.718 ||| 0-0 ||| 2.0 918.0
pass ||| passeport ||| 0.45 0.45 0.45 0.45 0.280097 0.193033 0.22658 0.11065 2.718 ||| 0-0 ||| 1653.0 918.0
sitzung ||| séance ||| 0.45 0.45 0.45 0.45 0.784227 0.597753 0.516546 0.559514 2.718 ||| 0-0 ||| 38281.0 25837.0

View File

@ -36,7 +36,10 @@ from collections import defaultdict
from operator import mul
from tempfile import NamedTemporaryFile
from subprocess import Popen
from itertools import izip
try:
from itertools import izip
except:
izip = zip
try:
from lxml import etree as ET
@ -85,76 +88,9 @@ class Moses():
filename = os.path.join(model,'model',table)
fileobj = handle_file(filename,'open',mode)
return fileobj
def load_phrase_counts(self,line,priority,i,store='all',filter_by=None,filter_by_src=None,filter_by_target=None,inverted=False):
"""take single phrase table line and store counts in internal data structure"""
src = line[0]
target = line[1]
if inverted:
src,target = target,src
target_count = 0
src_count = 0
if (store == 'all' or store == 'pairs') and not (filter_by and not (src in filter_by and target in filter_by[src])):
if priority < 10 or (src in self.phrase_pairs and target in self.phrase_pairs[src]):
try:
target_count,src_count = map(float,line[-1].split())
except:
sys.stderr.write(str(line)+'\n')
sys.stderr.write('Counts are missing. Maybe you have an older Moses version without counts?\n')
return
scores = line[2].split()
pst = float(scores[0])
pts = float(scores[2])
if priority == 2: #MAP
self.phrase_pairs[src][target][0][0][i] = pst
self.phrase_pairs[src][target][0][1][i] = pts
else:
self.phrase_pairs[src][target][0][0][i] = pst*target_count
self.phrase_pairs[src][target][0][1][i] = pts*src_count
self.store_info(src,target,line)
if (store == 'all' or store == 'source') and not (filter_by_src and not filter_by_src.get(src)):
if not src_count:
try:
if priority == 2: #MAP
src_count = 1
else:
src_count = float(line[-1].split()[1])
except:
sys.stderr.write(str(line)+'\n')
sys.stderr.write('Counts are missing. Maybe you have an older Moses version without counts?\n')
return
self.phrase_source[src][i] = src_count
if (store == 'all' or store == 'target') and not (filter_by_target and not filter_by_target.get(target)):
if not target_count:
try:
if priority == 2: #MAP
target_count = 1
else:
target_count = float(line[-1].split()[0])
except:
sys.stderr.write(str(line)+'\n')
sys.stderr.write('Counts are missing. Maybe you have an older Moses version without counts?\n')
return
self.phrase_target[target][i] = target_count
def load_phrase_probabilities(self,line,priority,i,store='pairs',filter_by=None,filter_by_src=None,filter_by_target=None,inverted=False):
def load_phrase_features(self,line,priority,i,mode='interpolate',store='pairs',filter_by=None,filter_by_src=None,filter_by_target=None,inverted=False,flags=None):
"""take single phrase table line and store probablities in internal data structure"""
src = line[0]
@ -167,18 +103,54 @@ class Moses():
self.store_info(src,target,line)
model_probabilities = map(float,line[2].split()[:-1])
scores = line[2].split()
if len(scores) <self.number_of_features:
sys.stderr.write('Error: model only has {0} features. Expected {1}.\n'.format(len(scores),self.number_of_features))
exit()
scores = scores[:self.number_of_features]
model_probabilities = map(float,scores)
phrase_probabilities = self.phrase_pairs[src][target][0]
if mode == 'counts' and not priority == 2: #priority 2 is MAP
try:
target_count,src_count = map(float,line[-1].split())
except:
sys.stderr.write(str(line)+'\n')
sys.stderr.write('Counts are missing. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n')
return
i_e2f = flags['i_e2f']
i_f2e = flags['i_f2e']
model_probabilities[i_e2f] *= target_count
model_probabilities[i_f2e] *= src_count
for j,p in enumerate(model_probabilities):
phrase_probabilities[j][i] = p
# mark that the src/target phrase has been seen.
# needed for re-normalization during linear interpolation
if (store == 'all' or store == 'source') and not (filter_by_src and not src in filter_by_src):
self.phrase_source[src][i] = 1
if mode == 'counts' and not priority == 2: #priority 2 is MAP
try:
self.phrase_source[src][i] = float(line[-1].split()[1])
except:
sys.stderr.write(str(line)+'\n')
sys.stderr.write('Counts are missing. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n')
return
else:
self.phrase_source[src][i] = 1
if (store == 'all' or store == 'target') and not (filter_by_target and not target in filter_by_target):
self.phrase_target[target][i] = 1
if mode == 'counts' and not priority == 2: #priority 2 is MAP
try:
self.phrase_target[target][i] = float(line[-1].split()[0])
except:
sys.stderr.write(str(line)+'\n')
sys.stderr.write('Counts are missing. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n')
return
else:
self.phrase_target[target][i] = 1
def load_reordering_probabilities(self,line,priority,i,store='pairs'):
@ -194,8 +166,7 @@ class Moses():
reordering_probabilities[j][i] = p
def traverse_incrementally(self,table,models,load_lines,store_flag,inverted=False,lowmem=False):
def traverse_incrementally(self,table,models,load_lines,store_flag,mode='interpolate',inverted=False,lowmem=False,flags=None):
"""hack-ish way to find common phrase pairs in multiple models in one traversal without storing it all in memory
relies on alphabetical sorting of phrase table.
"""
@ -218,7 +189,7 @@ class Moses():
if increment != stack[i][0]:
continue
else:
load_lines(stack[i],priority,i,store=store_flag,inverted=inverted)
load_lines(stack[i],priority,i,mode=mode,store=store_flag,inverted=inverted,flags=flags)
stack[i] = ''
for line in model:
@ -229,7 +200,7 @@ class Moses():
stack[i] = line
break
load_lines(line,priority,i,store=store_flag,inverted=inverted)
load_lines(line,priority,i,mode=mode,store=store_flag,inverted=inverted,flags=flags)
yield 1
@ -255,7 +226,7 @@ class Moses():
self.word_pairs_f2e[a][b][i] = float(prob)
def load_word_counts(self,line,side,i,priority,e2f_filter=None,f2e_filter=None):
def load_word_counts(self,line,side,i,priority,e2f_filter=None,f2e_filter=None,flags=None):
"""process single line of lexical table"""
a, b, ab_count, b_count = line.split(b' ')
@ -389,8 +360,10 @@ class Moses():
# information specific to Moses model: alignment info and comment section with target and source counts
alignment,comments = self.phrase_pairs[src][target][1]
if mode == 'counts':
srccount = dot_product(self.phrase_source[src],weights[2])
targetcount = dot_product(self.phrase_target[target],weights[0])
i_e2f = flags['i_e2f']
i_f2e = flags['i_f2e']
srccount = dot_product(self.phrase_source[src],weights[i_f2e])
targetcount = dot_product(self.phrase_target[target],weights[i_e2f])
comments = b"%s %s" %(targetcount,srccount)
features = b' '.join([b'%.6g' %(f) for f in features])
@ -471,7 +444,8 @@ class Moses():
def merge(self,pt_normal, pt_inverse, pt_out, mode='interpolate'):
"""merge two phrasetables (the latter having been inverted to calculate p(s|t) and lex(s|t) in sorted order)"""
"""merge two phrasetables (the latter having been inverted to calculate p(s|t) and lex(s|t) in sorted order)
Assumes that p(s|t) and lex(s|t) are in first table half, p(t|s) and lex(t|s) in second"""
for line,line2 in izip(pt_normal,pt_inverse):
@ -479,9 +453,10 @@ class Moses():
line2 = line2.split(b' ||| ')
#scores
mid = int(self.number_of_features/2)
scores1 = line[2].split()
scores2 = line2[2].split()
line[2] = b' '.join(scores2[:2]+scores1[2:])
line[2] = b' '.join(scores2[:mid]+scores1[mid:])
# marginal counts
if mode == 'counts':
@ -722,17 +697,14 @@ def cross_entropy(model_interface,reference_interface,weights,score,mode,flags):
don't call this directly, but use one of the Combine_TMs methods.
"""
weights = normalize_weights(weights,mode)
weights = normalize_weights(weights,mode,flags)
if 'compare_cross-entropies' in flags and flags['compare_cross-entropies']:
num_results = len(model_interface.models)
else:
num_results = 1
cross_entropy_ts = [0]*num_results
cross_entropy_st = [0]*num_results
cross_entropy_lex_ts = [0]*num_results
cross_entropy_lex_st = [0]*num_results
cross_entropies = [[0]*num_results for i in range(model_interface.number_of_features)]
oov = [0]*num_results
oov2 = 0
other_translations = [0]*num_results
@ -780,11 +752,8 @@ def cross_entropy(model_interface,reference_interface,weights,score,mode,flags):
continue
n[i] += c
cross_entropy_ts[i] += -log(features[2],2)*c
cross_entropy_st[i] += -log(features[0],2)*c
cross_entropy_lex_ts[i] += -log(features[3],2)*c
cross_entropy_lex_st[i] += -log(features[1],2)*c
for j in range(model_interface.number_of_features):
cross_entropies[j][i] -= log(features[j],2)*c
elif src in model_interface.phrase_source and not ('compare_cross-entropies' in flags and flags['compare_cross-entropies']):
other_translations[i] += c
@ -799,22 +768,18 @@ def cross_entropy(model_interface,reference_interface,weights,score,mode,flags):
for i in range(num_results):
try:
cross_entropy_ts[i] /= n[i]
cross_entropy_st[i] /= n[i]
cross_entropy_lex_ts[i] /= n[i]
cross_entropy_lex_st[i] /= n[i]
for j in range(model_interface.number_of_features):
cross_entropies[j][i] /= n[i]
except ZeroDivisionError:
sys.stderr.write('Warning: no matching phrase pairs between reference set and model\n')
cross_entropy_ts[i] = 0
cross_entropy_st[i] = 0
cross_entropy_lex_ts[i] = 0
cross_entropy_lex_st[i] = 0
for j in range(model_interface.number_of_features):
cross_entropies[j][i] = 0
if 'compare_cross-entropies' in flags and flags['compare_cross-entropies']:
return [(cross_entropy_st[i],cross_entropy_lex_st[i],cross_entropy_ts[i],cross_entropy_lex_ts[i],other_translations[i],oov[i],ignored[i],n[i],total_pairs) for i in range(num_results)], (n[0],total_pairs,oov2)
return [tuple([ce[i] for ce in cross_entropies]) + (other_translations[i],oov[i],ignored[i],n[i],total_pairs) for i in range(num_results)], (n[0],total_pairs,oov2)
else:
return cross_entropy_st[0],cross_entropy_lex_st[0],cross_entropy_ts[0],cross_entropy_lex_ts[0],other_translations[0],oov2,total_pairs
return tuple([ce[0] for ce in cross_entropies]) + (other_translations[0],oov2,total_pairs)
def cross_entropy_light(model_interface,reference_interface,weights,score,mode,flags,cache):
@ -823,22 +788,16 @@ def cross_entropy_light(model_interface,reference_interface,weights,score,mode,f
Same as cross_entropy, but optimized for speed: it doesn't generate all of the statistics,
doesn't normalize, and uses caching.
"""
weights = normalize_weights(weights,mode)
cross_entropy_ts = 0
cross_entropy_st = 0
cross_entropy_lex_ts = 0
cross_entropy_lex_st = 0
weights = normalize_weights(weights,mode,flags)
cross_entropies = [0]*model_interface.number_of_features
for (src,target,c) in cache:
features = score(weights,src,target,model_interface,flags,cache=True)
cross_entropy_ts -= log(features[2],2)*c
cross_entropy_st -= log(features[0],2)*c
cross_entropy_lex_ts -= log(features[3],2)*c
cross_entropy_lex_st -= log(features[1],2)*c
for i in range(model_interface.number_of_features):
cross_entropies[i] -= log(features[i],2)*c
return cross_entropy_st,cross_entropy_lex_st,cross_entropy_ts,cross_entropy_lex_ts
return cross_entropies
def _get_reference_cache(reference_interface,model_interface):
@ -881,16 +840,16 @@ def _get_lexical_filter(reference_interface,model_interface):
return e2f_filter,f2e_filter
def _hillclimb_move(weights,stepsize,mode):
def _hillclimb_move(weights,stepsize,mode,flags):
"""Move function for hillclimb algorithm. Updates each weight by stepsize."""
for i,w in enumerate(weights):
yield normalize_weights(weights[:i]+[w+stepsize]+weights[i+1:],mode)
yield normalize_weights(weights[:i]+[w+stepsize]+weights[i+1:],mode,flags)
for i,w in enumerate(weights):
new = w-stepsize
if new >= 1e-10:
yield normalize_weights(weights[:i]+[new]+weights[i+1:],mode)
yield normalize_weights(weights[:i]+[new]+weights[i+1:],mode,flags)
def _hillclimb(scores,best_weights,objective,model_interface,reference_interface,score_function,mode,flags,precision,cache,n):
"""first (deprecated) implementation of iterative weight optimization."""
@ -912,7 +871,7 @@ def _hillclimb(scores,best_weights,objective,model_interface,reference_interface
move = 0
for w in _hillclimb_move(list(best_weights),stepsize,mode):
for w in _hillclimb_move(list(best_weights),stepsize,mode,flags):
weights_tuple = tuple(w)
if weights_tuple in scores:
@ -948,22 +907,18 @@ def optimize_cross_entropy_hillclimb(model_interface,reference_interface,initial
cache,n = _get_reference_cache(reference_interface,model_interface)
# each objective is a triple: which score to minimize from cross_entropy(), which weights to update accordingly, and a comment that is printed
objectives = [(lambda x: scores[x][0]/n,[0],'minimize cross-entropy for p(s|t)'), #optimize cross-entropy for p(s|t)
(lambda x: scores[x][1]/n,[1],'minimize cross-entropy for lex(s|t)'),
(lambda x: scores[x][2]/n,[2],'minimize cross-entropy for p(t|s)'), #optimize for p(t|s)
(lambda x: scores[x][3]/n,[3],'minimize cross-entropy for lex(t|s)')]
objectives = [(lambda x: scores[x][i]/n,[i],'minimize cross-entropy for feature {0}'.format(i)) for i in range(model_interface.number_of_features)]
scores[best_weights] = cross_entropy_light(model_interface,reference_interface,initial_weights,score_function,mode,flags,cache)
final_weights = initial_weights[:]
final_cross_entropy = [0]*model_interface.number_of_features
for objective, features, comment in objectives:
for i,(objective, features, comment) in enumerate(objectives):
best_weights = min(scores,key=objective)
sys.stderr.write('Optimizing objective "' + comment +'"\n')
best_weights = _hillclimb(scores,best_weights,objective,model_interface,reference_interface,score_function,mode,flags,precision,cache,n)
best_weights = _hillclimb(scores,best_weights,objective,model_interface,reference_interface,score_function,feature_specific_mode(mode,i,flags),flags,precision,cache,n)
sys.stderr.write('\nCross-entropy:' + str(objective(best_weights)) + '- weights: ' + str(best_weights)+'\n\n')
sys.stderr.write('\nCross-entropy:' + str(objective(best_weights)) + ' - weights: ' + str(best_weights)+'\n\n')
for j in features:
final_weights[j] = list(best_weights)
@ -984,11 +939,8 @@ def optimize_cross_entropy(model_interface,reference_interface,initial_weights,s
cache,n = _get_reference_cache(reference_interface,model_interface)
# each objective is a triple: which score to minimize from cross_entropy(), which weights to update accordingly, and a comment that is printed
objectives = [(lambda w: cross_entropy_light(model_interface,reference_interface,[[1]+list(w) for m in range(model_interface.number_of_features)],score_function,mode,flags,cache)[0],[0],'minimize cross-entropy for p(s|t)'), #optimize cross-entropy for p(s|t)
(lambda w: cross_entropy_light(model_interface,reference_interface,[[1]+list(w) for m in range(model_interface.number_of_features)],score_function,mode,flags,cache)[1],[1],'minimize cross-entropy for lex(s|t)'),
(lambda w: cross_entropy_light(model_interface,reference_interface,[[1]+list(w) for m in range(model_interface.number_of_features)],score_function,mode,flags,cache)[2],[2],'minimize cross-entropy for p(t|s)'), #optimize for p(t|s)
(lambda w: cross_entropy_light(model_interface,reference_interface,[[1]+list(w) for m in range(model_interface.number_of_features)],score_function,mode,flags,cache)[3],[3],'minimize cross-entropy for lex(t|s)')]
objectives = [(lambda w: cross_entropy_light(model_interface,reference_interface,[[1]+list(w) for m in range(model_interface.number_of_features)],score_function,feature_specific_mode(mode,i,flags),flags,cache)[i],[i],'minimize cross-entropy for feature {0}'.format(i)) for i in range(model_interface.number_of_features)] #optimize cross-entropy for p(s|t)
final_weights = initial_weights[:]
final_cross_entropy = [0]*model_interface.number_of_features
@ -996,7 +948,7 @@ def optimize_cross_entropy(model_interface,reference_interface,initial_weights,s
sys.stderr.write('Optimizing objective "' + comment +'"\n')
initial_values = [1]*(len(model_interface.models)-1) # we leave value of first model at 1 and optimize all others (normalized of course)
best_weights, best_point, data = fmin_l_bfgs_b(objective,initial_values,approx_grad=True,bounds=[(0.000000001,None)]*len(initial_values))
best_weights = normalize_weights([1]+list(best_weights),mode)
best_weights = normalize_weights([1]+list(best_weights),feature_specific_mode(mode,i,flags),flags)
sys.stderr.write('Cross-entropy after L-BFGS optimization: ' + str(best_point/n) + ' - weights: ' + str(best_weights)+'\n')
for j in features:
@ -1006,41 +958,59 @@ def optimize_cross_entropy(model_interface,reference_interface,initial_weights,s
return final_weights,final_cross_entropy
def feature_specific_mode(mode,i,flags):
"""in mode 'counts', only the default Moses features can be recomputed from raw frequencies;
all other features are interpolated by default.
This fucntion mostly serves optical purposes (i.e. normalizing a single weight vector for logging),
since normalize_weights also handles a mix of interpolated and recomputed features.
"""
if mode == 'counts' and i not in [flags['i_e2f'],flags['i_e2f_lex'],flags['i_f2e'],flags['i_f2e_lex']]:
return 'interpolate'
else:
return mode
def redistribute_probability_mass(weights,src,target,interface,flags,mode='interpolate'):
"""the conditional probability p(x|y) is undefined for cases where p(y) = 0
this function redistributes the probability mass to only consider models for which p(y) > 0
"""
i_e2f = flags['i_e2f']
i_e2f_lex = flags['i_e2f_lex']
i_f2e = flags['i_f2e']
i_f2e_lex = flags['i_f2e_lex']
new_weights = weights[:]
if flags['normalize_s_given_t'] == 's':
# set weight to 0 for all models where target phrase is unseen (p(s|t)
new_weights[0] = map(mul,interface.phrase_source[src],weights[0])
new_weights[i_e2f] = map(mul,interface.phrase_source[src],weights[i_e2f])
if flags['normalize-lexical_weights']:
new_weights[1] = map(mul,interface.phrase_source[src],weights[1])
new_weights[i_e2f_lex] = map(mul,interface.phrase_source[src],weights[i_e2f_lex])
elif flags['normalize_s_given_t'] == 't':
# set weight to 0 for all models where target phrase is unseen (p(s|t)
new_weights[0] = map(mul,interface.phrase_target[target],weights[0])
new_weights[i_e2f] = map(mul,interface.phrase_target[target],weights[i_e2f])
if flags['normalize-lexical_weights']:
new_weights[1] = map(mul,interface.phrase_target[target],weights[1])
new_weights[i_e2f_lex] = map(mul,interface.phrase_target[target],weights[i_e2f_lex])
# set weight to 0 for all models where source phrase is unseen (p(t|s)
new_weights[2] = map(mul,interface.phrase_source[src],weights[2])
new_weights[i_f2e] = map(mul,interface.phrase_source[src],weights[i_f2e])
if flags['normalize-lexical_weights']:
new_weights[3] = map(mul,interface.phrase_source[src],weights[3])
new_weights[i_f2e_lex] = map(mul,interface.phrase_source[src],weights[i_f2e_lex])
return normalize_weights(new_weights,mode)
return normalize_weights(new_weights,mode,flags)
def score_interpolate(weights,src,target,interface,flags,cache=False):
"""linear interpolation of probabilites (and other feature values)
if normalized is True, the probability mass for p(x|y) is redistributed to models with p(y) > 0
"""
model_values = interface.phrase_pairs[src][target][0]
scores = [0]*len(model_values)
@ -1059,12 +1029,12 @@ def score_interpolate(weights,src,target,interface,flags,cache=False):
lts = 0
else:
scores[1] = compute_lexicalweight(normalized_weights[1],e2f_alignment,interface.word_pairs_e2f,None,mode='interpolate')
scores[3] = compute_lexicalweight(normalized_weights[3],f2e_alignment,interface.word_pairs_f2e,None,mode='interpolate')
scores[flags['i_e2f_lex']] = compute_lexicalweight(normalized_weights[flags['i_e2f_lex']],e2f_alignment,interface.word_pairs_e2f,None,mode='interpolate')
scores[flags['i_f2e_lex']] = compute_lexicalweight(normalized_weights[flags['i_f2e_lex']],f2e_alignment,interface.word_pairs_f2e,None,mode='interpolate')
for idx,prob in enumerate(model_values):
if not ('recompute_lexweights' in flags and flags['recompute_lexweights'] and (idx == 1 or idx == 3)):
if not ('recompute_lexweights' in flags and flags['recompute_lexweights'] and (idx == flags['i_e2f_lex'] or idx == flags['i_f2e_lex'])):
scores[idx] = dot_product(prob,normalized_weights[idx])
return scores
@ -1093,32 +1063,43 @@ def score_counts(weights,src,target,interface,flags,cache=False):
each count is multiplied by its weight; trivial case is weight 1 for each model, which corresponds to a concatentation
"""
i_e2f = flags['i_e2f']
i_e2f_lex = flags['i_e2f_lex']
i_f2e = flags['i_f2e']
i_f2e_lex = flags['i_f2e_lex']
# if we have non-default number of weights, assume that we might have to do a mix of count-based and interpolated scores.
if len(weights) == 4:
scores = [0]*len(weights)
else:
scores = score_interpolate(weights,src,target,interface,flags,cache=cache)
try:
joined_count = dot_product(interface.phrase_pairs[src][target][0][0],weights[0])
target_count = dot_product(interface.phrase_target[target],weights[0])
pst = joined_count / target_count
joined_count = dot_product(interface.phrase_pairs[src][target][0][i_e2f],weights[i_e2f])
target_count = dot_product(interface.phrase_target[target],weights[i_e2f])
scores[i_e2f] = joined_count / target_count
except ZeroDivisionError:
pst = 0
scores[i_e2f] = 0
try:
joined_count = dot_product(interface.phrase_pairs[src][target][0][1],weights[2])
source_count = dot_product(interface.phrase_source[src],weights[2])
pts = joined_count / source_count
joined_count = dot_product(interface.phrase_pairs[src][target][0][i_f2e],weights[i_f2e])
source_count = dot_product(interface.phrase_source[src],weights[i_f2e])
scores[i_f2e] = joined_count / source_count
except ZeroDivisionError:
pts = 0
scores[i_f2e] = 0
e2f_alignment,f2e_alignment = interface.get_word_alignments(src,target,cache=cache)
if not e2f_alignment or not f2e_alignment:
sys.stderr.write('Error: no word alignments found, but necessary for lexical weight computation.\n')
lst = 0
lts = 0
scores[i_e2f_lex] = 0
scores[i_f2e_lex] = 0
else:
lst = compute_lexicalweight(weights[1],e2f_alignment,interface.word_pairs_e2f,interface.word_target,mode='counts',cache=cache)
lts = compute_lexicalweight(weights[3],f2e_alignment,interface.word_pairs_f2e,interface.word_source,mode='counts',cache=cache)
scores[i_e2f_lex] = compute_lexicalweight(weights[i_e2f_lex],e2f_alignment,interface.word_pairs_e2f,interface.word_target,mode='counts',cache=cache)
scores[i_f2e_lex] = compute_lexicalweight(weights[i_f2e_lex],f2e_alignment,interface.word_pairs_f2e,interface.word_source,mode='counts',cache=cache)
return [pst,lst,pts,lts]
return scores
def score_interpolate_reordering(weights,src,target,interface):
@ -1174,7 +1155,7 @@ def compute_lexicalweight(weights,alignment,word_pairs,marginal,mode='counts',ca
return lex
def normalize_weights(weights,mode):
def normalize_weights(weights,mode,flags=None):
"""make sure that probability mass in linear interpolation is 1
for weighted counts, weight of first model is set to 1
"""
@ -1203,7 +1184,7 @@ def normalize_weights(weights,mode):
except ZeroDivisionError:
sys.stderr.write('Error: Zero division in weight normalization. Are some of your weights zero? This might lead to undefined behaviour if a phrase pair is only seen in model with weight 0\n')
elif mode == 'counts':
elif mode == 'counts_pure':
if type(weights[0]) == list:
@ -1217,6 +1198,19 @@ def normalize_weights(weights,mode):
ratio = 1/weights[0]
new_weights = [weight * ratio for weight in weights]
# make sure that features other than the standard Moses features are always interpolated (since no count-based computation is defined)
elif mode == 'counts':
if type(weights[0]) == list:
norm_counts = normalize_weights(weights,'counts_pure')
new_weights = normalize_weights(weights,'interpolate')
for i in [flags['i_e2f'],flags['i_e2f_lex'],flags['i_f2e'],flags['i_f2e_lex']]:
new_weights[i] = norm_counts[i]
return new_weights
else:
return normalize_weights(weights,'counts_pure')
return new_weights
@ -1288,7 +1282,11 @@ class Combine_TMs():
'normalize_s_given_t':None,
'normalize-lexical_weights':True,
'add_origin_features':False,
'lowmem': False
'lowmem': False,
'i_e2f':0,
'i_e2f_lex':1,
'i_f2e':2,
'i_f2e_lex':3
}
# each model needs a priority. See init docstring for more info
@ -1296,7 +1294,17 @@ class Combine_TMs():
'map':2,
'supplementary':10}
def __init__(self,models,weights=None,output_file=None,mode='interpolate',number_of_features=4,model_interface=Moses,reference_interface=Moses_Alignment,reference_file=None,lang_src=None,lang_target=None,output_lexical=None,**flags):
def __init__(self,models,weights=None,
output_file=None,
mode='interpolate',
number_of_features=4,
model_interface=Moses,
reference_interface=Moses_Alignment,
reference_file=None,
lang_src=None,
lang_target=None,
output_lexical=None,
**flags):
"""The whole configuration of the task is done during intialization. Afterwards, you only need to call your intended method(s).
You can change some of the class attributes afterwards (such as the weights, or the output file), but you should never change the models or mode after initialization.
See unit_test function for example configurations
@ -1317,12 +1325,17 @@ class Combine_TMs():
mode: declares the basic mixture-model algorithm. there are currently three options:
'counts': weighted counts (requires some statistics that Moses doesn't produce. Apply train_model.patch to train_model.perl and repeat step 4 of Moses training to obtain them.)
Only the standard Moses features are recomputed from weighted counts; additional features are linearly interpolated
(see number_of_features to allow more features, and i_e2f etc. if the standard features are in a non-standard position)
'interpolate': linear interpolation
'loglinear': loglinear interpolation (careful: this creates the intersection of phrase tables and is often of little use)
number_of_features: could be used to interpolate models with non-default Moses features. 4 features is currently still hardcoded in various places
(e.g. cross_entropy calculations, mode 'counts')
i_e2f,i_e2f_lex,i_f2e,i_f2e_lex: Index of the (Moses) phrase table features p(s|t), lex(s|t), p(t|s) and lex(t|s).
Relevant for mode 'counts', and if 'recompute_lexweights' is True in mode 'interpolate'. In mode 'counts', any additional features are combined through linear interpolation.
model_interface: class that handles reading phrase tables and lexical tables, and writing phrase tables. Currently only Moses is implemented.
default: Moses
@ -1378,7 +1391,15 @@ class Combine_TMs():
self.lang_target = lang_target
self.loaded = defaultdict(int)
self.output_lexical = output_lexical
self.flags = copy.copy(self.flags)
self.flags.update(flags)
self.flags['i_e2f'] = int(self.flags['i_e2f'])
self.flags['i_e2f_lex'] = int(self.flags['i_e2f_lex'])
self.flags['i_f2e'] = int(self.flags['i_f2e'])
self.flags['i_f2e_lex'] = int(self.flags['i_f2e_lex'])
if reference_interface:
self.reference_interface = reference_interface(reference_file)
@ -1395,22 +1416,18 @@ class Combine_TMs():
if mode == 'interpolate':
self.score = score_interpolate
self.load_lines = self.model_interface.load_phrase_probabilities
elif mode == 'loglinear':
self.score = score_loglinear
self.load_lines = self.model_interface.load_phrase_probabilities
elif mode == 'counts':
self.score = score_counts
self.load_lines = self.model_interface.load_phrase_counts
self.flags = copy.copy(self.flags)
self.flags.update(flags)
def _sanity_checks(self,models,number_of_features,weights):
"""check if input arguments make sense (correct number of weights, valid model priorities etc.)
is only called on initialization. If you change weights afterwards, better know what you're doing.
"""
number_of_features = int(number_of_features)
for (model,priority) in models:
assert(priority in self._priorities)
@ -1437,12 +1454,12 @@ class Combine_TMs():
sys.stderr.write('Warning: No weights defined: initializing with uniform weights\n')
new_weights = normalize_weights(weights,self.mode)
new_weights = normalize_weights(weights,self.mode,self.flags)
if weights != new_weights:
if self.mode == 'interpolate' or self.mode == 'loglinear':
sys.stderr.write('Warning: weights should sum to 1 - ')
elif self.mode == 'counts':
sys.stderr.write('Warning: normalizing weights so that first model has weight 1 - ')
sys.stderr.write('Warning: normalizing weights so that first model has weight 1 (for features that are recomputed from counts) - ')
sys.stderr.write('normalizing to: '+ str(new_weights) +'\n')
weights = new_weights
@ -1472,7 +1489,7 @@ class Combine_TMs():
if 'pt-filtered' in data and not self.loaded['pt-filtered']:
models_prioritized = [(self.model_interface.open_table(model,'phrase-table'),priority,i) for (model,priority,i) in priority_sort_models(self.models)]
for model,priority,i in models_prioritized:
sys.stderr.write('Loading phrase table ' + str(i) + ' (only data relevant for reference set)')
j = 0
@ -1481,7 +1498,7 @@ class Combine_TMs():
sys.stderr.write('...'+str(j))
j += 1
line = line.rstrip().split(b' ||| ')
self.load_lines(line,priority,i,store='all',filter_by=self.reference_interface.word_pairs,filter_by_src=self.reference_interface.word_source,filter_by_target=self.reference_interface.word_target)
self.model_interface.load_phrase_features(line,priority,i,store='all',mode=self.mode,filter_by=self.reference_interface.word_pairs,filter_by_src=self.reference_interface.word_source,filter_by_target=self.reference_interface.word_target,flags=self.flags)
sys.stderr.write(' done\n')
self.loaded['pt-filtered'] = 1
@ -1506,7 +1523,7 @@ class Combine_TMs():
sys.stderr.write('...'+str(j))
j += 1
line = line.rstrip().split(b' ||| ')
self.load_lines(line,priority,i,store='target')
self.model_interface.load_phrase_features(line,priority,i,mode=self.mode,store='target',flags=self.flags)
sys.stderr.write(' done\n')
self.loaded['pt-target'] = 1
@ -1554,8 +1571,8 @@ class Combine_TMs():
i = 0
sys.stderr.write('Incrementally loading and processing phrase tables...')
for block in self.model_interface.traverse_incrementally('phrase-table',models,self.load_lines,store_flag,inverted=inverted,lowmem=self.flags['lowmem']):
for block in self.model_interface.traverse_incrementally('phrase-table',models,self.model_interface.load_phrase_features,store_flag,mode=self.mode,inverted=inverted,lowmem=self.flags['lowmem'],flags=self.flags):
for src in sorted(self.model_interface.phrase_pairs, key = lambda x: x + b' |'):
for target in sorted(self.model_interface.phrase_pairs[src], key = lambda x: x + b' |'):
@ -1642,8 +1659,8 @@ class Combine_TMs():
i = 0
sys.stderr.write('Incrementally loading and processing phrase tables...')
for block in self.model_interface.traverse_incrementally('reordering-table',models,self.model_interface.load_reordering_probabilities,'pairs',lowmem=self.flags['lowmem']):
for block in self.model_interface.traverse_incrementally('reordering-table',models,self.model_interface.load_reordering_probabilities,'pairs',mode=self.mode,lowmem=self.flags['lowmem'],flags=self.flags):
for src in sorted(self.model_interface.reordering_pairs):
for target in sorted(self.model_interface.reordering_pairs[src]):
if not i % 1000000:
@ -1676,18 +1693,21 @@ class Combine_TMs():
results, (intersection,total_pairs,oov2) = cross_entropy(self.model_interface,self.reference_interface,self.weights,self.score,self.mode,self.flags)
padding = 90
num_features = self.model_interface.number_of_features
print('\nResults of model comparison\n')
print('{0:<{padding}}: {1}'.format('phrase pairs in reference (tokens)',total_pairs, padding=padding))
print('{0:<{padding}}: {1}'.format('phrase pairs in model intersection (tokens)',intersection, padding=padding))
print('{0:<{padding}}: {1}\n'.format('phrase pairs in model union (tokens)',total_pairs-oov2, padding=padding))
for i,(cross_entropy_st,cross_entropy_lex_st,cross_entropy_ts,cross_entropy_lex_ts,other_translations,oov,ignored,n,total_pairs) in enumerate(results):
for i,data in enumerate(results):
cross_entropies = data[:num_features]
(other_translations,oov,ignored,n,total_pairs) = data[num_features:]
print('model ' +str(i))
print('{0:<{padding}}: {1}'.format('cross-entropy p(s|t)', cross_entropy_st, padding=padding))
print('{0:<{padding}}: {1}'.format('cross-entropy lex(s|t)', cross_entropy_lex_st, padding=padding))
print('{0:<{padding}}: {1}'.format('cross-entropy p(t|s)', cross_entropy_ts, padding=padding))
print('{0:<{padding}}: {1}'.format('cross-entropy lex(t|s)', cross_entropy_lex_ts, padding=padding))
for j in range(num_features):
print('{0:<{padding}}: {1}'.format('cross-entropy for feature {0}'.format(j), cross_entropies[j], padding=padding))
print('{0:<{padding}}: {1}'.format('phrase pairs in model (tokens)', n+ignored, padding=padding))
print('{0:<{padding}}: {1}'.format('phrase pairs in model, but not in intersection (tokens)', ignored, padding=padding))
print('{0:<{padding}}: {1}'.format('phrase pairs in union, but not in model (but source phrase is) (tokens)', other_translations, padding=padding))
@ -1786,6 +1806,18 @@ def test():
Combiner = Combine_TMs([[os.path.join('test','model1'),'primary'],[os.path.join('test','model2'),'map']],output_file=os.path.join('test','phrase-table_test8'),mode='counts',reference_file='test/extract')
Combiner.combine_given_tuning_set()
# count-based combination of two non-default models, with fixed weights. Same as test 3, but with the standard features moved back
# command line: python tmcombine.py combine_given_weights test/model3 test/model4 -w "0.5,0.5;0.5,0.5;0.5,0.5;0.5,0.5;0.1,0.9;0.1,1;0.2,0.8;0.5,0.5" -o test/phrase-table_test9 -m counts --number_of_features 8 --i_e2f 4 --i_e2f_lex 5 --i_f2e 6 --i_f2e_lex 7 -r test/extract
sys.stderr.write('Regression test 9\n')
Combiner = Combine_TMs([[os.path.join('test','model3'),'primary'],[os.path.join('test','model4'),'primary']],[[0.5,0.5],[0.5,0.5],[0.5,0.5],[0.5,0.5],[0.1,0.9],[0.1,1],[0.2,0.8],[0.5,0.5]],os.path.join('test','phrase-table_test9'),mode='counts',number_of_features=8,i_e2f=4,i_e2f_lex=5,i_f2e=6,i_f2e_lex=7)
Combiner.combine_given_weights()
# count-based combination of two non-default models, with fixed weights. Same as test 5, but with the standard features moved back
# command line: python tmcombine.py combine_given_tuning_set test/model3 test/model4 -o test/phrase-table_test10 -m counts --number_of_features 8 --i_e2f 4 --i_e2f_lex 5 --i_f2e 6 --i_f2e_lex 7 -r test/extract
sys.stderr.write('Regression test 10\n')
Combiner = Combine_TMs([[os.path.join('test','model3'),'primary'],[os.path.join('test','model4'),'primary']],output_file=os.path.join('test','phrase-table_test10'),mode='counts',number_of_features=8,i_e2f=4,i_e2f_lex=5,i_f2e=6,i_f2e_lex=7,reference_file='test/extract')
Combiner.combine_given_tuning_set()
#convert weight vector passed as a command line argument
class to_list(argparse.Action):
@ -1800,46 +1832,68 @@ class to_list(argparse.Action):
def parse_command_line():
parser = argparse.ArgumentParser(description='Combine translation models. Check DOCSTRING of the class Combine_TMs() and its methods for a more in-depth documentation and additional configuration options not available through the command line. The function test() shows examples.')
parser.add_argument('action', metavar='ACTION', choices=["combine_given_weights","combine_given_tuning_set","combine_reordering_tables","compute_cross_entropy","return_best_cross_entropy","compare_cross_entropies"],
group1 = parser.add_argument_group('Main options')
group2 = parser.add_argument_group('More model combination options')
group1.add_argument('action', metavar='ACTION', choices=["combine_given_weights","combine_given_tuning_set","combine_reordering_tables","compute_cross_entropy","return_best_cross_entropy","compare_cross_entropies"],
help='What you want to do with the models. One of %(choices)s.')
parser.add_argument('model', metavar='DIRECTORY', nargs='+',
group1.add_argument('model', metavar='DIRECTORY', nargs='+',
help='Model directory. Assumes default Moses structure (i.e. path to phrase table and lexical tables).')
parser.add_argument('-w', '--weights', dest='weights', action=to_list,
group1.add_argument('-w', '--weights', dest='weights', action=to_list,
default=None,
help='weight vector. Format 1: single vector, one weight per model. Example: \"0.1,0.9\" ; format 2: one vector per feature, one weight per model: \"0.1,0.9;0.5,0.5;0.4,0.6;0.2,0.8\"')
parser.add_argument('-m', '--mode', type=str,
group1.add_argument('-m', '--mode', type=str,
default="interpolate",
choices=["counts","interpolate","loglinear"],
help='basic mixture-model algorithm. Default: %(default)s. Note: depending on mode and additional configuration, additional statistics are needed. Check docstring documentation of Combine_TMs() for more info.')
parser.add_argument('-r', '--reference', type=str,
group1.add_argument('-r', '--reference', type=str,
default=None,
help='File containing reference phrase pairs for cross-entropy calculation. Default interface expects \'path/model/extract.gz\' that is produced by training a model on the reference (i.e. development) corpus.')
parser.add_argument('-o', '--output', type=str,
group1.add_argument('-o', '--output', type=str,
default="-",
help='Output file (phrase table). If not specified, model is written to standard output.')
parser.add_argument('--output-lexical', type=str,
group1.add_argument('--output-lexical', type=str,
default=None,
help=('Not only create a combined phrase table, but also combined lexical tables. Writes to OUTPUT_LEXICAL.e2f and OUTPUT_LEXICAL.f2e, or OUTPUT_LEXICAL.counts.e2f in mode \'counts\'.'))
parser.add_argument('--lowmem', action="store_true",
group1.add_argument('--lowmem', action="store_true",
help=('Low memory mode: requires two passes (and sorting in between) to combine a phrase table, but loads less data into memory. Only relevant for mode "counts" and some configurations of mode "interpolate".'))
parser.add_argument('--normalized', action="store_true",
help=('for each phrase pair x,y: ignore models with p(y)=0, and distribute probability mass among models with p(y)>0. (default: missing entries (x,y) are always interpreted as p(x|y)=0). Only relevant in mode "interpolate".'))
parser.add_argument('--recompute_lexweights', action="store_true",
help=('don\'t directly interpolate lexical weights, but interpolate word translation probabilities instead and recompute the lexical weights. Only relevant in mode "interpolate".'))
parser.add_argument('--tempdir', type=str,
group1.add_argument('--tempdir', type=str,
default=None,
help=('Temporary directory in --lowmem mode.'))
group2.add_argument('--i_e2f', type=int,
default=0, metavar='N',
help=('Index of p(f|e) (relevant for mode counts if phrase table has custom feature order). (default: %(default)s)'))
group2.add_argument('--i_e2f_lex', type=int,
default=1, metavar='N',
help=('Index of lex(f|e) (relevant for mode counts or with option recompute_lexweights if phrase table has custom feature order). (default: %(default)s)'))
group2.add_argument('--i_f2e', type=int,
default=2, metavar='N',
help=('Index of p(e|f) (relevant for mode counts if phrase table has custom feature order). (default: %(default)s)'))
group2.add_argument('--i_f2e_lex', type=int,
default=3, metavar='N',
help=('Index of lex(e|f) (relevant for mode counts or with option recompute_lexweights if phrase table has custom feature order). (default: %(default)s)'))
group2.add_argument('--number_of_features', type=int,
default=4, metavar='N',
help=('Combine models with N + 1 features (last feature is constant phrase penalty). (default: %(default)s)'))
group2.add_argument('--normalized', action="store_true",
help=('for each phrase pair x,y: ignore models with p(y)=0, and distribute probability mass among models with p(y)>0. (default: missing entries (x,y) are always interpreted as p(x|y)=0). Only relevant in mode "interpolate".'))
group2.add_argument('--recompute_lexweights', action="store_true",
help=('don\'t directly interpolate lexical weights, but interpolate word translation probabilities instead and recompute the lexical weights. Only relevant in mode "interpolate".'))
return parser.parse_args()
@ -1854,7 +1908,21 @@ if __name__ == "__main__":
else:
args = parse_command_line()
#initialize
combiner = Combine_TMs([(m,'primary') for m in args.model],weights=args.weights,mode=args.mode,output_file=args.output,reference_file=args.reference,output_lexical=args.output_lexical,lowmem=args.lowmem,normalized=args.normalized,recompute_lexweights=args.recompute_lexweights,tempdir=args.tempdir)
combiner = Combine_TMs([(m,'primary') for m in args.model],
weights=args.weights,
mode=args.mode,
output_file=args.output,
reference_file=args.reference,
output_lexical=args.output_lexical,
lowmem=args.lowmem,
normalized=args.normalized,
recompute_lexweights=args.recompute_lexweights,
tempdir=args.tempdir,
number_of_features=args.number_of_features,
i_e2f=args.i_e2f,
i_e2f_lex=args.i_e2f_lex,
i_f2e=args.i_f2e,
i_f2e_lex=args.i_f2e_lex)
# execute right method
f_string = "combiner."+args.action+'()'
exec(f_string)