mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 14:32:38 +03:00
Merge branch 'master' of ssh://github.com/moses-smt/mosesdecoder
This commit is contained in:
commit
ae20251bf3
@ -109,12 +109,12 @@ class Moses():
|
|||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
scores = scores[:self.number_of_features]
|
scores = scores[:self.number_of_features]
|
||||||
model_probabilities = map(float,scores)
|
model_probabilities = list(map(float,scores))
|
||||||
phrase_probabilities = self.phrase_pairs[src][target][0]
|
phrase_probabilities = self.phrase_pairs[src][target][0]
|
||||||
|
|
||||||
if mode == 'counts' and not priority == 2: #priority 2 is MAP
|
if mode == 'counts' and not priority == 2: #priority 2 is MAP
|
||||||
try:
|
try:
|
||||||
counts = map(float,line[4].split())
|
counts = list(map(float,line[4].split()))
|
||||||
try:
|
try:
|
||||||
target_count,src_count,joint_count = counts
|
target_count,src_count,joint_count = counts
|
||||||
joint_count_e2f = joint_count
|
joint_count_e2f = joint_count
|
||||||
@ -171,7 +171,7 @@ class Moses():
|
|||||||
src = line[0]
|
src = line[0]
|
||||||
target = line[1]
|
target = line[1]
|
||||||
|
|
||||||
model_probabilities = map(float,line[2].split())
|
model_probabilities = list(map(float,line[2].split()))
|
||||||
reordering_probabilities = self.reordering_pairs[src][target]
|
reordering_probabilities = self.reordering_pairs[src][target]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -212,7 +212,7 @@ class Moses():
|
|||||||
line = line.rstrip().split(b' ||| ')
|
line = line.rstrip().split(b' ||| ')
|
||||||
if line[-1].endswith(b' |||'):
|
if line[-1].endswith(b' |||'):
|
||||||
line[-1] = line[-1][:-4]
|
line[-1] = line[-1][:-4]
|
||||||
line.append('')
|
line.append(b'')
|
||||||
|
|
||||||
if increment != line[0]:
|
if increment != line[0]:
|
||||||
stack[i] = line
|
stack[i] = line
|
||||||
@ -341,7 +341,7 @@ class Moses():
|
|||||||
textual_f2e = [[t,[]] for t in target_list]
|
textual_f2e = [[t,[]] for t in target_list]
|
||||||
|
|
||||||
for pair in alignment.split(b' '):
|
for pair in alignment.split(b' '):
|
||||||
s,t = pair.split('-')
|
s,t = pair.split(b'-')
|
||||||
s,t = int(s),int(t)
|
s,t = int(s),int(t)
|
||||||
|
|
||||||
textual_e2f[s][1].append(target_list[t])
|
textual_e2f[s][1].append(target_list[t])
|
||||||
@ -349,11 +349,11 @@ class Moses():
|
|||||||
|
|
||||||
for s,t in textual_e2f:
|
for s,t in textual_e2f:
|
||||||
if not t:
|
if not t:
|
||||||
t.append('NULL')
|
t.append(b'NULL')
|
||||||
|
|
||||||
for s,t in textual_f2e:
|
for s,t in textual_f2e:
|
||||||
if not t:
|
if not t:
|
||||||
t.append('NULL')
|
t.append(b'NULL')
|
||||||
|
|
||||||
#tupelize so we can use the value as dictionary keys
|
#tupelize so we can use the value as dictionary keys
|
||||||
for i in range(len(textual_e2f)):
|
for i in range(len(textual_e2f)):
|
||||||
@ -374,7 +374,7 @@ class Moses():
|
|||||||
# if one feature value is 0 (either because of loglinear interpolation or rounding to 0), don't write it to phrasetable
|
# if one feature value is 0 (either because of loglinear interpolation or rounding to 0), don't write it to phrasetable
|
||||||
# (phrase pair will end up with probability zero in log-linear model anyway)
|
# (phrase pair will end up with probability zero in log-linear model anyway)
|
||||||
if 0 in features:
|
if 0 in features:
|
||||||
return ''
|
return b''
|
||||||
|
|
||||||
# information specific to Moses model: alignment info and comment section with target and source counts
|
# information specific to Moses model: alignment info and comment section with target and source counts
|
||||||
additional_entries = self.phrase_pairs[src][target][1]
|
additional_entries = self.phrase_pairs[src][target][1]
|
||||||
@ -394,7 +394,7 @@ class Moses():
|
|||||||
features = b' '.join([b'%.6g' %(f) for f in features])
|
features = b' '.join([b'%.6g' %(f) for f in features])
|
||||||
|
|
||||||
if flags['add_origin_features']:
|
if flags['add_origin_features']:
|
||||||
origin_features = map(lambda x: 2.718**bool(x),self.phrase_pairs[src][target][0][0]) # 1 if phrase pair doesn't occur in model, 2.718 if it does
|
origin_features = list(map(lambda x: 2.718**bool(x),self.phrase_pairs[src][target][0][0])) # 1 if phrase pair doesn't occur in model, 2.718 if it does
|
||||||
origin_features = b' '.join([b'%.4f' %(f) for f in origin_features]) + ' '
|
origin_features = b' '.join([b'%.4f' %(f) for f in origin_features]) + ' '
|
||||||
else:
|
else:
|
||||||
origin_features = b''
|
origin_features = b''
|
||||||
@ -445,7 +445,7 @@ class Moses():
|
|||||||
# if one feature value is 0 (either because of loglinear interpolation or rounding to 0), don't write it to reordering table
|
# if one feature value is 0 (either because of loglinear interpolation or rounding to 0), don't write it to reordering table
|
||||||
# (phrase pair will end up with probability zero in log-linear model anyway)
|
# (phrase pair will end up with probability zero in log-linear model anyway)
|
||||||
if 0 in features:
|
if 0 in features:
|
||||||
return ''
|
return b''
|
||||||
|
|
||||||
features = b' '.join([b'%.6g' %(f) for f in features])
|
features = b' '.join([b'%.6g' %(f) for f in features])
|
||||||
|
|
||||||
@ -699,7 +699,7 @@ class Moses_Alignment():
|
|||||||
line = line.split(b' ||| ')
|
line = line.split(b' ||| ')
|
||||||
if line[-1].endswith(b' |||'):
|
if line[-1].endswith(b' |||'):
|
||||||
line[-1] = line[-1][:-4]
|
line[-1] = line[-1][:-4]
|
||||||
line.append('')
|
line.append(b'')
|
||||||
|
|
||||||
src = line[0]
|
src = line[0]
|
||||||
target = line[1]
|
target = line[1]
|
||||||
@ -1030,21 +1030,21 @@ def redistribute_probability_mass(weights,src,target,interface,flags,mode='inter
|
|||||||
if flags['normalize_s_given_t'] == 's':
|
if flags['normalize_s_given_t'] == 's':
|
||||||
|
|
||||||
# set weight to 0 for all models where target phrase is unseen (p(s|t)
|
# set weight to 0 for all models where target phrase is unseen (p(s|t)
|
||||||
new_weights[i_e2f] = map(mul,interface.phrase_source[src],weights[i_e2f])
|
new_weights[i_e2f] = list(map(mul,interface.phrase_source[src],weights[i_e2f]))
|
||||||
if flags['normalize-lexical_weights']:
|
if flags['normalize-lexical_weights']:
|
||||||
new_weights[i_e2f_lex] = map(mul,interface.phrase_source[src],weights[i_e2f_lex])
|
new_weights[i_e2f_lex] = list(map(mul,interface.phrase_source[src],weights[i_e2f_lex]))
|
||||||
|
|
||||||
elif flags['normalize_s_given_t'] == 't':
|
elif flags['normalize_s_given_t'] == 't':
|
||||||
|
|
||||||
# set weight to 0 for all models where target phrase is unseen (p(s|t)
|
# set weight to 0 for all models where target phrase is unseen (p(s|t)
|
||||||
new_weights[i_e2f] = map(mul,interface.phrase_target[target],weights[i_e2f])
|
new_weights[i_e2f] = list(map(mul,interface.phrase_target[target],weights[i_e2f]))
|
||||||
if flags['normalize-lexical_weights']:
|
if flags['normalize-lexical_weights']:
|
||||||
new_weights[i_e2f_lex] = map(mul,interface.phrase_target[target],weights[i_e2f_lex])
|
new_weights[i_e2f_lex] = list(map(mul,interface.phrase_target[target],weights[i_e2f_lex]))
|
||||||
|
|
||||||
# set weight to 0 for all models where source phrase is unseen (p(t|s)
|
# set weight to 0 for all models where source phrase is unseen (p(t|s)
|
||||||
new_weights[i_f2e] = map(mul,interface.phrase_source[src],weights[i_f2e])
|
new_weights[i_f2e] = list(map(mul,interface.phrase_source[src],weights[i_f2e]))
|
||||||
if flags['normalize-lexical_weights']:
|
if flags['normalize-lexical_weights']:
|
||||||
new_weights[i_f2e_lex] = map(mul,interface.phrase_source[src],weights[i_f2e_lex])
|
new_weights[i_f2e_lex] = list(map(mul,interface.phrase_source[src],weights[i_f2e_lex]))
|
||||||
|
|
||||||
|
|
||||||
return normalize_weights(new_weights,mode,flags)
|
return normalize_weights(new_weights,mode,flags)
|
||||||
@ -1095,7 +1095,7 @@ def score_loglinear(weights,src,target,interface,flags,cache=False):
|
|||||||
|
|
||||||
for idx,prob in enumerate(model_values):
|
for idx,prob in enumerate(model_values):
|
||||||
try:
|
try:
|
||||||
scores.append(exp(dot_product(map(log,prob),weights[idx])))
|
scores.append(exp(dot_product(list(map(log,prob)),weights[idx])))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
scores.append(0)
|
scores.append(0)
|
||||||
|
|
||||||
@ -1265,6 +1265,8 @@ def handle_file(filename,action,fileobj=None,mode='r'):
|
|||||||
|
|
||||||
if mode == 'r':
|
if mode == 'r':
|
||||||
mode = 'rb'
|
mode = 'rb'
|
||||||
|
elif mode == 'w':
|
||||||
|
mode = 'wb'
|
||||||
|
|
||||||
if mode == 'rb' and not filename == '-' and not os.path.exists(filename):
|
if mode == 'rb' and not filename == '-' and not os.path.exists(filename):
|
||||||
if os.path.exists(filename+'.gz'):
|
if os.path.exists(filename+'.gz'):
|
||||||
@ -1281,7 +1283,7 @@ def handle_file(filename,action,fileobj=None,mode='r'):
|
|||||||
if filename.endswith('.gz'):
|
if filename.endswith('.gz'):
|
||||||
fileobj = gzip.open(filename,mode)
|
fileobj = gzip.open(filename,mode)
|
||||||
|
|
||||||
elif filename == '-' and mode == 'w':
|
elif filename == '-' and mode == 'wb':
|
||||||
fileobj = sys.stdout
|
fileobj = sys.stdout
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user