From 63ca61ba0b04a70a6780816dbe1b1b609d7e709a Mon Sep 17 00:00:00 2001 From: Rico Sennrich Date: Thu, 27 Mar 2014 14:32:13 +0000 Subject: [PATCH] tmcombine: don't crash if model contains sparse features or other data (no adaptation; tmcombine will copy sparse features / data from first model) --- .../tmcombine/test/model3/model/phrase-table | 4 +- .../tmcombine/test/model4/model/phrase-table | 2 +- contrib/tmcombine/test/phrase-table_test1 | 16 +++---- contrib/tmcombine/test/phrase-table_test10 | 18 +++---- contrib/tmcombine/test/phrase-table_test2 | 18 +++---- contrib/tmcombine/test/phrase-table_test3 | 18 +++---- contrib/tmcombine/test/phrase-table_test4 | 16 +++---- contrib/tmcombine/test/phrase-table_test5 | 18 +++---- contrib/tmcombine/test/phrase-table_test6 | 8 ++-- contrib/tmcombine/test/phrase-table_test7 | 2 +- contrib/tmcombine/test/phrase-table_test8 | 18 +++---- contrib/tmcombine/test/phrase-table_test9 | 18 +++---- contrib/tmcombine/tmcombine.py | 47 ++++++++++++++----- 13 files changed, 112 insertions(+), 91 deletions(-) diff --git a/contrib/tmcombine/test/model3/model/phrase-table b/contrib/tmcombine/test/model3/model/phrase-table index f5c8647de..737157e69 100644 --- a/contrib/tmcombine/test/model3/model/phrase-table +++ b/contrib/tmcombine/test/model3/model/phrase-table @@ -1,5 +1,5 @@ -ad ||| af ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 1000 1000 -bd ||| bf ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 10 10 +ad ||| af ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 1000 1000 ||| sparse_feature 1 +bd ||| bf ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 10 10 ||| der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00872768 0.0366795 0.611403 2.718 ||| 1-0 ||| 5808 518 der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.0284616 0.288889 0.121619 2.718 ||| 0-0 1-1 ||| 749 45 pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.143937 0.628866 0.681301 2.718 ||| 0-0 ||| 1875 582 diff --git a/contrib/tmcombine/test/model4/model/phrase-table b/contrib/tmcombine/test/model4/model/phrase-table index 494b6a37f..8262264e9 100644 --- a/contrib/tmcombine/test/model4/model/phrase-table +++ b/contrib/tmcombine/test/model4/model/phrase-table @@ -1,4 +1,4 @@ -ad ||| af ||| 0.6 0.6 0.6 0.6 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 1000 1000 +ad ||| af ||| 0.6 0.6 0.6 0.6 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 1000 1000 ||| sparse_feature 2 bd ||| bf ||| 0.6 0.6 0.6 0.6 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 10 10 der pass ||| le passeport ||| 0.6 0.6 0.6 0.6 0.16 0.03063 0.4 0.0748551 2.718 ||| 0-0 1-1 ||| 25 10 pass ||| passeport ||| 0.6 0.6 0.6 0.6 0.28022 0.192612 0.607143 0.675926 2.718 ||| 0-0 ||| 182 84 diff --git a/contrib/tmcombine/test/phrase-table_test1 b/contrib/tmcombine/test/phrase-table_test1 index 1d1d5a238..1309b711d 100644 --- a/contrib/tmcombine/test/phrase-table_test1 +++ b/contrib/tmcombine/test/phrase-table_test1 @@ -1,8 +1,8 @@ -ad ||| af ||| 0.3 0.3 0.3 0.3 2.718 ||| 0-0 ||| 1000 1000 -bd ||| bf ||| 0.3 0.3 0.3 0.3 2.718 ||| 0-0 ||| 10 10 -der gipfel ||| sommet ||| 0.00163568 0.00436384 0.0183397 0.305702 2.718 ||| 1-0 ||| 5808 518 -der pass ||| le col ||| 0.00867825 0.0142308 0.144445 0.0608095 2.718 ||| 0-0 1-1 ||| 749 45 -pass ||| col ||| 0.0976 0.0719685 0.314433 0.340651 2.718 ||| 0-0 ||| 1875 582 -pass ||| passeport retrouvé ||| 0.25 0.125 0.000859105 1.9065e-07 2.718 ||| 0-0 ||| 2 582 -pass ||| passeport ||| 0.273444 0.221306 0.307008 0.343654 2.718 ||| 0-0 ||| 182 84 -sitzung ||| séance ||| 0.528624 0.417705 0.434797 0.492241 2.718 ||| 0-0 ||| 4251 6455 +ad ||| af ||| 0.3 0.3 0.3 0.3 ||| 0-0 ||| 1000 1000 +bd ||| bf ||| 0.3 0.3 0.3 0.3 ||| 0-0 ||| 10 10 +der gipfel ||| sommet ||| 0.00163568 0.00436384 0.0183397 0.305702 ||| 1-0 ||| 5808 518 +der pass ||| le col ||| 0.00867825 0.0142308 0.144445 0.0608095 ||| 0-0 1-1 ||| 749 45 +pass ||| col ||| 0.0976 0.0719685 0.314433 0.340651 ||| 0-0 ||| 1875 582 +pass ||| passeport retrouvé ||| 0.25 0.125 0.000859105 1.9065e-07 ||| 0-0 ||| 2 582 +pass ||| passeport ||| 0.273444 0.221306 0.307008 0.343654 ||| 0-0 ||| 15 582 +sitzung ||| séance ||| 0.528624 0.417705 0.434797 0.492241 ||| 0-0 ||| 22 17 diff --git a/contrib/tmcombine/test/phrase-table_test10 b/contrib/tmcombine/test/phrase-table_test10 index ee2aebeb1..594bb428f 100644 --- a/contrib/tmcombine/test/phrase-table_test10 +++ b/contrib/tmcombine/test/phrase-table_test10 @@ -1,9 +1,9 @@ -ad ||| af ||| 0.3 0.3 0.3 0.3 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 25332.4712297 1074.23173673 -bd ||| bf ||| 0.3 0.3 0.3 0.3 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 253.324712297 10.7423173673 -der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00686984 0.0366795 0.617135 2.718 ||| 1-0 ||| 5808.0 518.0 -der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.023534 0.284201 0.0972183 2.718 ||| 0-0 1-1 ||| 749.0 45.7423173673 -der pass ||| le passeport ||| 6e-10 6e-10 6e-10 6e-10 0.16 0.0329324 0.0064913 0.00303408 2.718 ||| 0-0 1-1 ||| 608.311780741 45.7423173673 -pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.142393 0.6222 0.671744 2.718 ||| 0-0 ||| 1875.0 588.235465885 -pass ||| passeport retrouvé ||| 0.3 0.3 0.3 0.3 0.5 0.199258 0.0017 5.11945e-07 2.718 ||| 0-0 ||| 2.0 588.235465885 -pass ||| passeport ||| 0.3 0.3 0.3 0.3 0.280174 0.199258 0.0132359 0.0209644 2.718 ||| 0-0 ||| 4443.5097638 588.235465885 -sitzung ||| séance ||| 0.3 0.3 0.3 0.3 0.784412 0.59168 0.511045 0.552002 2.718 ||| 0-0 ||| 103459.335197 496.165860589 +ad ||| af ||| 0.3 0.3 0.3 0.3 0.115771 0.35574 0.472359 0.469238 ||| 0-0 ||| 25362.6029089 1074.23173673 ||| sparse_feature 1 +bd ||| bf ||| 0.3 0.3 0.3 0.3 0.115771 0.35574 0.472359 0.469238 ||| 0-0 ||| 253.626029089 10.7423173673 ||| +der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00686984 0.0366795 0.617135 ||| 1-0 ||| 5808.0 518.0 +der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.023534 0.284201 0.0972183 ||| 0-0 1-1 ||| 749.0 45.7423173673 +der pass ||| le passeport ||| 6e-10 6e-10 6e-10 6e-10 0.16 0.0329324 0.0064913 0.00303408 ||| 0-0 1-1 ||| 609.065072723 45.7423173673 +pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.142393 0.6222 0.671744 ||| 0-0 ||| 1875.0 588.235465885 +pass ||| passeport retrouvé ||| 0.3 0.3 0.3 0.3 0.5 0.199258 0.0017 5.11945e-07 ||| 0-0 ||| 2.0 588.235465885 +pass ||| passeport ||| 0.3 0.3 0.3 0.3 0.280174 0.199258 0.0132359 0.0209644 ||| 0-0 ||| 4448.99372942 588.235465885 +sitzung ||| séance ||| 0.3 0.3 0.3 0.3 0.784412 0.59168 0.511045 0.552002 ||| 0-0 ||| 103587.424966 496.165860589 diff --git a/contrib/tmcombine/test/phrase-table_test2 b/contrib/tmcombine/test/phrase-table_test2 index 9d3f28816..4cd3b40b5 100644 --- a/contrib/tmcombine/test/phrase-table_test2 +++ b/contrib/tmcombine/test/phrase-table_test2 @@ -1,9 +1,9 @@ -ad ||| af ||| 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 1000 1000 -bd ||| bf ||| 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 10 10 -der gipfel ||| sommet ||| 0.000327135 0.000793425 0.0073359 0.305702 2.718 ||| 1-0 ||| 5808 518 -der pass ||| le col ||| 0.00173565 0.00258742 0.0577778 0.0608095 2.718 ||| 0-0 1-1 ||| 749 45 -der pass ||| le passeport ||| 0.144 0.0278455 0.32 0.0374275 2.718 ||| 0-0 1-1 ||| 25 10 -pass ||| col ||| 0.01952 0.0130852 0.125773 0.340651 2.718 ||| 0-0 ||| 1875 582 -pass ||| passeport retrouvé ||| 0.05 0.0227273 0.000343642 1.9065e-07 2.718 ||| 0-0 ||| 2 582 -pass ||| passeport ||| 0.278865 0.197829 0.487089 0.343654 2.718 ||| 0-0 ||| 182 84 -sitzung ||| séance ||| 0.733342 0.56532 0.483911 0.492241 2.718 ||| 0-0 ||| 4251 6455 +ad ||| af ||| 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 1000 1000 +bd ||| bf ||| 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 10 10 +der gipfel ||| sommet ||| 0.000327135 0.000793425 0.0073359 0.305702 ||| 1-0 ||| 5808 518 +der pass ||| le col ||| 0.00173565 0.00258742 0.0577778 0.0608095 ||| 0-0 1-1 ||| 749 45 +der pass ||| le passeport ||| 0.144 0.0278455 0.32 0.0374275 ||| 0-0 1-1 ||| 25 10 +pass ||| col ||| 0.01952 0.0130852 0.125773 0.340651 ||| 0-0 ||| 1875 582 +pass ||| passeport retrouvé ||| 0.05 0.0227273 0.000343642 1.9065e-07 ||| 0-0 ||| 2 582 +pass ||| passeport ||| 0.278865 0.197829 0.487089 0.343654 ||| 0-0 ||| 15 582 +sitzung ||| séance ||| 0.733342 0.56532 0.483911 0.492241 ||| 0-0 ||| 22 17 diff --git a/contrib/tmcombine/test/phrase-table_test3 b/contrib/tmcombine/test/phrase-table_test3 index 8dfed73b8..b208730cd 100644 --- a/contrib/tmcombine/test/phrase-table_test3 +++ b/contrib/tmcombine/test/phrase-table_test3 @@ -1,9 +1,9 @@ -ad ||| af ||| 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 10000.0 5000.0 -bd ||| bf ||| 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 100.0 50.0 -der gipfel ||| sommet ||| 0.00327135 0.00569336 0.0366795 0.651018 2.718 ||| 1-0 ||| 5808.0 518.0 -der pass ||| le col ||| 0.0173565 0.0193836 0.152941 0.0675369 2.718 ||| 0-0 1-1 ||| 749.0 85.0 -der pass ||| le passeport ||| 0.16 0.0307772 0.188235 0.0128336 2.718 ||| 0-0 1-1 ||| 225.0 85.0 -pass ||| col ||| 0.1952 0.121573 0.398693 0.582296 2.718 ||| 0-0 ||| 1875.0 918.0 -pass ||| passeport retrouvé ||| 0.5 0.193033 0.00108932 1.16835e-06 2.718 ||| 0-0 ||| 2.0 918.0 -pass ||| passeport ||| 0.280097 0.193033 0.22658 0.11065 2.718 ||| 0-0 ||| 1653.0 918.0 -sitzung ||| séance ||| 0.784227 0.597753 0.516546 0.559514 2.718 ||| 0-0 ||| 38281.0 25837.0 +ad ||| af ||| 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 10000.0 5000.0 +bd ||| bf ||| 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 100.0 50.0 +der gipfel ||| sommet ||| 0.00327135 0.00569336 0.0366795 0.651018 ||| 1-0 ||| 5808.0 518.0 +der pass ||| le col ||| 0.0173565 0.0193836 0.152941 0.0675369 ||| 0-0 1-1 ||| 749.0 85.0 +der pass ||| le passeport ||| 0.16 0.0307772 0.188235 0.0128336 ||| 0-0 1-1 ||| 225.0 85.0 +pass ||| col ||| 0.1952 0.121573 0.398693 0.582296 ||| 0-0 ||| 1875.0 918.0 +pass ||| passeport retrouvé ||| 0.5 0.193033 0.00108932 1.16835e-06 ||| 0-0 ||| 2.0 918.0 +pass ||| passeport ||| 0.280097 0.193033 0.22658 0.11065 ||| 0-0 ||| 1653.0 918.0 +sitzung ||| séance ||| 0.784227 0.597753 0.516546 0.559514 ||| 0-0 ||| 38281.0 25837.0 diff --git a/contrib/tmcombine/test/phrase-table_test4 b/contrib/tmcombine/test/phrase-table_test4 index 7485c728f..18773ad67 100644 --- a/contrib/tmcombine/test/phrase-table_test4 +++ b/contrib/tmcombine/test/phrase-table_test4 @@ -1,8 +1,8 @@ -ad ||| af ||| 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 1000.0 1000.0 -bd ||| bf ||| 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 10.0 10.0 -der gipfel ||| sommet ||| 0.00327135 0.00872769 0.0366795 0.611404 2.718 ||| 1-0 ||| 5808.0 518.0 -der pass ||| le col ||| 0.0173565 0.0284616 0.288889 0.121619 2.718 ||| 0-0 1-1 ||| 749.0 45.0 -pass ||| col ||| 0.1952 0.143937 0.628866 0.681301 2.718 ||| 0-0 ||| 1875.0 582.0 -pass ||| passeport retrouvé ||| 0.5 0.25 0.00171821 3.80847e-07 2.718 ||| 0-0 ||| 2.0 582.0 -pass ||| passeport ||| 0.266667 0.25 0.00687285 0.0113821 2.718 ||| 0-0 ||| 15.0 582.0 -sitzung ||| séance ||| 0.272727 0.237288 0.352941 0.424242 2.718 ||| 0-0 ||| 22.0 17.0 +ad ||| af ||| 0.5 0.5 0.5 0.5 ||| 0-0 ||| 1000.0 1000.0 +bd ||| bf ||| 0.5 0.5 0.5 0.5 ||| 0-0 ||| 10.0 10.0 +der gipfel ||| sommet ||| 0.00327135 0.00872769 0.0366795 0.611404 ||| 1-0 ||| 5808.0 518.0 +der pass ||| le col ||| 0.0173565 0.0284616 0.288889 0.121619 ||| 0-0 1-1 ||| 749.0 45.0 +pass ||| col ||| 0.1952 0.143937 0.628866 0.681301 ||| 0-0 ||| 1875.0 582.0 +pass ||| passeport retrouvé ||| 0.5 0.25 0.00171821 3.80847e-07 ||| 0-0 ||| 2.0 582.0 +pass ||| passeport ||| 0.266667 0.25 0.00687285 0.0113821 ||| 0-0 ||| 15.0 582.0 +sitzung ||| séance ||| 0.272727 0.237288 0.352941 0.424242 ||| 0-0 ||| 22.0 17.0 diff --git a/contrib/tmcombine/test/phrase-table_test5 b/contrib/tmcombine/test/phrase-table_test5 index 45f15163d..383781b11 100644 --- a/contrib/tmcombine/test/phrase-table_test5 +++ b/contrib/tmcombine/test/phrase-table_test5 @@ -1,9 +1,9 @@ -ad ||| af ||| 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 25332.4712297 1074.23173673 -bd ||| bf ||| 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 253.324712297 10.7423173673 -der gipfel ||| sommet ||| 0.00327135 0.00686984 0.0366795 0.617135 2.718 ||| 1-0 ||| 5808.0 518.0 -der pass ||| le col ||| 0.0173565 0.023534 0.284201 0.0972183 2.718 ||| 0-0 1-1 ||| 749.0 45.7423173673 -der pass ||| le passeport ||| 0.16 0.0329324 0.0064913 0.00303408 2.718 ||| 0-0 1-1 ||| 608.311780741 45.7423173673 -pass ||| col ||| 0.1952 0.142393 0.6222 0.671744 2.718 ||| 0-0 ||| 1875.0 588.235465885 -pass ||| passeport retrouvé ||| 0.5 0.199258 0.0017 5.11945e-07 2.718 ||| 0-0 ||| 2.0 588.235465885 -pass ||| passeport ||| 0.280174 0.199258 0.0132359 0.0209644 2.718 ||| 0-0 ||| 4443.5097638 588.235465885 -sitzung ||| séance ||| 0.784412 0.59168 0.511045 0.552002 2.718 ||| 0-0 ||| 103459.335197 496.165860589 +ad ||| af ||| 0.115771 0.35574 0.472359 0.469238 ||| 0-0 ||| 25362.6029089 1074.23173673 +bd ||| bf ||| 0.115771 0.35574 0.472359 0.469238 ||| 0-0 ||| 253.626029089 10.7423173673 +der gipfel ||| sommet ||| 0.00327135 0.00686984 0.0366795 0.617135 ||| 1-0 ||| 5808.0 518.0 +der pass ||| le col ||| 0.0173565 0.023534 0.284201 0.0972183 ||| 0-0 1-1 ||| 749.0 45.7423173673 +der pass ||| le passeport ||| 0.16 0.0329324 0.0064913 0.00303408 ||| 0-0 1-1 ||| 609.065072723 45.7423173673 +pass ||| col ||| 0.1952 0.142393 0.6222 0.671744 ||| 0-0 ||| 1875.0 588.235465885 +pass ||| passeport retrouvé ||| 0.5 0.199258 0.0017 5.11945e-07 ||| 0-0 ||| 2.0 588.235465885 +pass ||| passeport ||| 0.280174 0.199258 0.0132359 0.0209644 ||| 0-0 ||| 4448.99372942 588.235465885 +sitzung ||| séance ||| 0.784412 0.59168 0.511045 0.552002 ||| 0-0 ||| 103587.424966 496.165860589 diff --git a/contrib/tmcombine/test/phrase-table_test6 b/contrib/tmcombine/test/phrase-table_test6 index 38daf4512..57374f148 100644 --- a/contrib/tmcombine/test/phrase-table_test6 +++ b/contrib/tmcombine/test/phrase-table_test6 @@ -1,4 +1,4 @@ -ad ||| af ||| 0.117462 0.117462 0.117462 0.117462 2.718 ||| 0-0 ||| 1000 1000 -bd ||| bf ||| 0.117462 0.117462 0.117462 0.117462 2.718 ||| 0-0 ||| 10 10 -pass ||| passeport ||| 0.278834 0.197701 0.387861 0.449295 2.718 ||| 0-0 ||| 182 84 -sitzung ||| séance ||| 0.705857 0.545304 0.497336 0.544877 2.718 ||| 0-0 ||| 4251 6455 +ad ||| af ||| 0.117462 0.117462 0.117462 0.117462 ||| 0-0 ||| 1000 1000 +bd ||| bf ||| 0.117462 0.117462 0.117462 0.117462 ||| 0-0 ||| 10 10 +pass ||| passeport ||| 0.278834 0.197701 0.387861 0.449295 ||| 0-0 ||| 15 582 +sitzung ||| séance ||| 0.705857 0.545304 0.497336 0.544877 ||| 0-0 ||| 22 17 diff --git a/contrib/tmcombine/test/phrase-table_test7 b/contrib/tmcombine/test/phrase-table_test7 index 01ea2a076..8a6285c75 100644 --- a/contrib/tmcombine/test/phrase-table_test7 +++ b/contrib/tmcombine/test/phrase-table_test7 @@ -1 +1 @@ -([(1.8744705606119034, 2.0752881273042374, 1.5025010618768841, 1.2370391973008494, 0, 0, 1, 1, 22), (0.35011602922315899, 0.74148657814725749, 0.95272965495298623, 0.83588062023889353, 1, 0, 0, 1, 22)], (1, 22, 20)) \ No newline at end of file +([(1.8744705606119034, 2.0752881273042374, 1.5025010618768841, 1.2370391973008494, 0, 0, 1, 1, 22), (0.350116029223159, 0.7414865781472575, 0.9527296549529862, 0.8358806202388935, 1, 0, 0, 1, 22)], (1, 22, 20)) \ No newline at end of file diff --git a/contrib/tmcombine/test/phrase-table_test8 b/contrib/tmcombine/test/phrase-table_test8 index f0776cd80..1974a53df 100644 --- a/contrib/tmcombine/test/phrase-table_test8 +++ b/contrib/tmcombine/test/phrase-table_test8 @@ -1,9 +1,9 @@ -ad ||| af ||| 0.242966 0.398085 0.483231 0.482814 2.718 ||| 0-0 ||| 2797.86490081 1043.7557397 -bd ||| bf ||| 0.102213 0.111367 0.174411 0.172867 2.718 ||| 0-0 ||| 1807.86490081 53.7557396976 -der gipfel ||| sommet ||| 0.00327135 0.00863717 0.0366795 0.612073 2.718 ||| 1-0 ||| 5808.0 518.0 -der pass ||| le col ||| 0.0173565 0.0260469 0.146469 0.113553 2.718 ||| 0-0 1-1 ||| 749.0 88.7557396976 -der pass ||| le passeport ||| 0.16 0.0389201 0.197196 0.0101009 2.718 ||| 0-0 1-1 ||| 1797.86490081 88.7557396976 -pass ||| col ||| 0.1952 0.131811 0.584893 0.63621 2.718 ||| 0-0 ||| 1875.0 625.755739698 -pass ||| passeport retrouvé ||| 0.5 0.196956 0.00159806 1.89355e-06 2.718 ||| 0-0 ||| 2.0 625.755739698 -pass ||| passeport ||| 0.280108 0.196956 0.0488465 0.0565932 2.718 ||| 0-0 ||| 1812.86490081 625.755739698 -sitzung ||| séance ||| 0.778334 0.545019 0.470846 0.502625 2.718 ||| 0-0 ||| 1819.86490081 60.7557396976 +ad ||| af ||| 0.242882 0.39808 0.483231 0.482813 ||| 0-0 ||| 2799.50876845 1043.75589858 +bd ||| bf ||| 0.102211 0.111366 0.17441 0.172864 ||| 0-0 ||| 1809.50876845 53.7558985771 +der gipfel ||| sommet ||| 0.00327135 0.00863716 0.0366795 0.612073 ||| 1-0 ||| 5808.0 518.0 +der pass ||| le col ||| 0.0173565 0.0260468 0.146469 0.113553 ||| 0-0 1-1 ||| 749.0 88.7558985771 +der pass ||| le passeport ||| 0.16 0.03892 0.197197 0.0101013 ||| 0-0 1-1 ||| 1799.50876845 88.7558985771 +pass ||| col ||| 0.1952 0.13181 0.584893 0.636208 ||| 0-0 ||| 1875.0 625.755898577 +pass ||| passeport retrouvé ||| 0.5 0.196956 0.00159806 1.89361e-06 ||| 0-0 ||| 2.0 625.755898577 +pass ||| passeport ||| 0.280108 0.196956 0.0488467 0.056595 ||| 0-0 ||| 1814.50876845 625.755898577 +sitzung ||| séance ||| 0.77834 0.545022 0.470846 0.502627 ||| 0-0 ||| 1821.50876845 60.7558985771 diff --git a/contrib/tmcombine/test/phrase-table_test9 b/contrib/tmcombine/test/phrase-table_test9 index 017c97854..3e640d14b 100644 --- a/contrib/tmcombine/test/phrase-table_test9 +++ b/contrib/tmcombine/test/phrase-table_test9 @@ -1,9 +1,9 @@ -ad ||| af ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 10000.0 5000.0 -bd ||| bf ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 100.0 50.0 -der gipfel ||| sommet ||| 0.15 0.15 0.15 0.15 0.00327135 0.00569336 0.0366795 0.651018 2.718 ||| 1-0 ||| 5808.0 518.0 -der pass ||| le col ||| 0.15 0.15 0.15 0.15 0.0173565 0.0193836 0.152941 0.0675369 2.718 ||| 0-0 1-1 ||| 749.0 85.0 -der pass ||| le passeport ||| 0.3 0.3 0.3 0.3 0.16 0.0307772 0.188235 0.0128336 2.718 ||| 0-0 1-1 ||| 225.0 85.0 -pass ||| col ||| 0.15 0.15 0.15 0.15 0.1952 0.121573 0.398693 0.582296 2.718 ||| 0-0 ||| 1875.0 918.0 -pass ||| passeport retrouvé ||| 0.15 0.15 0.15 0.15 0.5 0.193033 0.00108932 1.16835e-06 2.718 ||| 0-0 ||| 2.0 918.0 -pass ||| passeport ||| 0.45 0.45 0.45 0.45 0.280097 0.193033 0.22658 0.11065 2.718 ||| 0-0 ||| 1653.0 918.0 -sitzung ||| séance ||| 0.45 0.45 0.45 0.45 0.784227 0.597753 0.516546 0.559514 2.718 ||| 0-0 ||| 38281.0 25837.0 +ad ||| af ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 10000.0 5000.0 ||| sparse_feature 1 +bd ||| bf ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 ||| 0-0 ||| 100.0 50.0 ||| +der gipfel ||| sommet ||| 0.15 0.15 0.15 0.15 0.00327135 0.00569336 0.0366795 0.651018 ||| 1-0 ||| 5808.0 518.0 +der pass ||| le col ||| 0.15 0.15 0.15 0.15 0.0173565 0.0193836 0.152941 0.0675369 ||| 0-0 1-1 ||| 749.0 85.0 +der pass ||| le passeport ||| 0.3 0.3 0.3 0.3 0.16 0.0307772 0.188235 0.0128336 ||| 0-0 1-1 ||| 225.0 85.0 +pass ||| col ||| 0.15 0.15 0.15 0.15 0.1952 0.121573 0.398693 0.582296 ||| 0-0 ||| 1875.0 918.0 +pass ||| passeport retrouvé ||| 0.15 0.15 0.15 0.15 0.5 0.193033 0.00108932 1.16835e-06 ||| 0-0 ||| 2.0 918.0 +pass ||| passeport ||| 0.45 0.45 0.45 0.45 0.280097 0.193033 0.22658 0.11065 ||| 0-0 ||| 1653.0 918.0 +sitzung ||| séance ||| 0.45 0.45 0.45 0.45 0.784227 0.597753 0.516546 0.559514 ||| 0-0 ||| 38281.0 25837.0 diff --git a/contrib/tmcombine/tmcombine.py b/contrib/tmcombine/tmcombine.py index 6b98b3acd..f96b35da6 100755 --- a/contrib/tmcombine/tmcombine.py +++ b/contrib/tmcombine/tmcombine.py @@ -114,7 +114,7 @@ class Moses(): if mode == 'counts' and not priority == 2: #priority 2 is MAP try: - counts = map(float,line[-1].split()) + counts = map(float,line[4].split()) try: target_count,src_count,joint_count = counts joint_count_e2f = joint_count @@ -145,7 +145,7 @@ class Moses(): if (store == 'all' or store == 'source') and not (filter_by_src and not src in filter_by_src): if mode == 'counts' and not priority == 2: #priority 2 is MAP try: - self.phrase_source[src][i] = float(line[-1].split()[1]) + self.phrase_source[src][i] = float(line[4].split()[1]) except: sys.stderr.write(str(line)+'\n') sys.stderr.write('ERROR: Counts are missing or misformatted. Maybe your phrase table is from an older Moses version that doesn\'t store counts or word alignment?\n') @@ -156,7 +156,7 @@ class Moses(): if (store == 'all' or store == 'target') and not (filter_by_target and not target in filter_by_target): if mode == 'counts' and not priority == 2: #priority 2 is MAP try: - self.phrase_target[target][i] = float(line[-1].split()[0]) + self.phrase_target[target][i] = float(line[4].split()[0]) except: sys.stderr.write(str(line)+'\n') sys.stderr.write('ERROR: Counts are missing or misformatted. Maybe your phrase table is from an older Moses version that doesn\'t store counts or word alignment?\n') @@ -210,6 +210,9 @@ class Moses(): for line in model: line = line.rstrip().split(b' ||| ') + if line[-1].endswith(b' |||'): + line[-1] = line[-1][:-4] + line.append('') if increment != line[0]: stack[i] = line @@ -300,8 +303,9 @@ class Moses(): def store_info(self,src,target,line): """store alignment info and comment section for re-use in output""" - if len(line) == 5: - self.phrase_pairs[src][target][1] = line[3:5] + if len(line) >= 5: + if not self.phrase_pairs[src][target][1]: + self.phrase_pairs[src][target][1] = line[3:] # assuming that alignment is empty elif len(line) == 4: @@ -373,7 +377,8 @@ class Moses(): return '' # information specific to Moses model: alignment info and comment section with target and source counts - alignment,comments = self.phrase_pairs[src][target][1] + additional_entries = self.phrase_pairs[src][target][1] + alignment = additional_entries[0] if alignment: extra_space = b' ' else: @@ -384,7 +389,7 @@ class Moses(): i_f2e = flags['i_f2e'] srccount = dot_product(self.phrase_source[src],weights[i_f2e]) targetcount = dot_product(self.phrase_target[target],weights[i_e2f]) - comments = b"%s %s" %(targetcount,srccount) + additional_entries[1] = b"%s %s" %(targetcount,srccount) features = b' '.join([b'%.6g' %(f) for f in features]) @@ -397,7 +402,7 @@ class Moses(): phrase_penalty = b' 2.718' else: phrase_penalty = b'' - line = b"%s ||| %s ||| %s%s %s||| %s%s||| %s\n" %(src,target,features,origin_features,phrase_penalty,alignment,extra_space,comments) + line = b"%s ||| %s ||| %s%s %s||| %s%s||| %s\n" %(src,target,features,origin_features,phrase_penalty,alignment,extra_space,b' ||| '.join(additional_entries[1:])) return line @@ -473,8 +478,15 @@ class Moses(): for line,line2 in izip(pt_normal,pt_inverse): line = line.split(b' ||| ') + if line[-1].endswith(b' |||'): + line[-1] = line[-1][:-4] + line.append('') + line2 = line2.split(b' ||| ') - + if line2[-1].endswith(b' |||'): + line2[-1] = line2[-1][:-4] + line2.append('') + #scores mid = int(self.number_of_features/2) scores1 = line[2].split() @@ -483,11 +495,11 @@ class Moses(): # marginal counts if mode == 'counts': - src_count = line[-1].split()[1] + src_count = line[4].split()[1] target_count = line2[-1].split()[0] - line[-1] = b' '.join([target_count,src_count]) + b'\n' + line[4] = b' '.join([target_count,src_count]) - pt_out.write(b' ||| '.join(line)) + pt_out.write(b' ||| '.join(line)+ b'\n') pt_normal.close() pt_inverse.close() @@ -685,7 +697,10 @@ class Moses_Alignment(): for line in fileobj: line = line.split(b' ||| ') - + if line[-1].endswith(b' |||'): + line[-1] = line[-1][:-4] + line.append('') + src = line[0] target = line[1] @@ -1528,6 +1543,9 @@ class Combine_TMs(): sys.stderr.write('...'+str(j)) j += 1 line = line.rstrip().split(b' ||| ') + if line[-1].endswith(b' |||'): + line[-1] = line[-1][:-4] + line.append('') self.model_interface.load_phrase_features(line,priority,i,store='all',mode=self.mode,filter_by=self.reference_interface.word_pairs,filter_by_src=self.reference_interface.word_source,filter_by_target=self.reference_interface.word_target,flags=self.flags) sys.stderr.write(' done\n') @@ -1553,6 +1571,9 @@ class Combine_TMs(): sys.stderr.write('...'+str(j)) j += 1 line = line.rstrip().split(b' ||| ') + if line[-1].endswith(b' |||'): + line[-1] = line[-1][:-4] + line.append('') self.model_interface.load_phrase_features(line,priority,i,mode=self.mode,store='target',flags=self.flags) sys.stderr.write(' done\n')