diff --git a/contrib/tmcombine/tmcombine.py b/contrib/tmcombine/tmcombine.py index 6560ad23b..d7475b34c 100755 --- a/contrib/tmcombine/tmcombine.py +++ b/contrib/tmcombine/tmcombine.py @@ -114,17 +114,29 @@ class Moses(): if mode == 'counts' and not priority == 2: #priority 2 is MAP try: - target_count,src_count = map(float,line[-1].split()) + counts = map(float,line[-1].split()) + try: + target_count,src_count,joint_count = counts + joint_count_e2f = joint_count + joint_count_f2e = joint_count + except ValueError: + # possibly old-style phrase table with 2 counts in last column, or phrase table produced by tmcombine + # note: since each feature has different weight vector, we may have two different phrase pair frequencies + target_count,src_count = counts + i_e2f = flags['i_e2f'] + i_f2e = flags['i_f2e'] + joint_count_e2f = model_probabilities[i_e2f] * target_count + joint_count_f2e = model_probabilities[i_f2e] * src_count except: - sys.stderr.write(str(line)+'\n') - sys.stderr.write('Counts are missing. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n') - return - + sys.stderr.write(str(b" ||| ".join(line))+b'\n') + sys.stderr.write('ERROR: counts are missing or misformatted. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n') + raise + i_e2f = flags['i_e2f'] i_f2e = flags['i_f2e'] - model_probabilities[i_e2f] *= target_count - model_probabilities[i_f2e] *= src_count - + model_probabilities[i_e2f] = joint_count_e2f + model_probabilities[i_f2e] = joint_count_f2e + for j,p in enumerate(model_probabilities): phrase_probabilities[j][i] = p