better default configuration for recaser

2024-12-27 22:14:57 +03:00 · 2015-03-11 17:52:30 +00:00 · 2015-03-11 17:52:30 +00:00 · 2ce45229f8
commit 2ce45229f8
parent 1632c5f39d
5 changed files with 40 additions and 25 deletions
--- a/scripts/ems/example/config.basic
+++ b/scripts/ems/example/config.basic
@ -527,9 +527,7 @@ decoder-settings = ""
 #########################################################
 ## RECASER: restore case, this part only trains the model

-[RECASING]
-
-#decoder = $moses-bin-dir/moses
+[RECASING] IGNORE

 ### training data
 # raw input needs to be still tokenized,
@ -537,9 +535,14 @@ decoder-settings = ""
 #
 #tokenized = [LM:europarl:tokenized-corpus]

-# recase-config = 
-
+### additinal settings
+#
+recasing-settings = ""
 #lm-training = $srilm-dir/ngram-count
+decoder = $moses-bin-dir/moses
+
+# already a trained recaser? point to config file
+#recase-config = 

 #######################################################
 ## TRUECASER: train model to truecase corpora and input
--- a/scripts/ems/example/config.factored
+++ b/scripts/ems/example/config.factored
@ -546,9 +546,7 @@ decoder-settings = ""
 #########################################################
 ## RECASER: restore case, this part only trains the model

-[RECASING]
-
-#decoder = $moses-bin-dir/moses
+[RECASING] IGNORE

 ### training data
 # raw input needs to be still tokenized,
@ -556,9 +554,14 @@ decoder-settings = ""
 #
 #tokenized = [LM:europarl:tokenized-corpus]

-# recase-config = 
-
+### additinal settings
+#
+recasing-settings = ""
 #lm-training = $srilm-dir/ngram-count
+decoder = $moses-bin-dir/moses
+
+# already a trained recaser? point to config file
+#recase-config = 

 #######################################################
 ## TRUECASER: train model to truecase corpora and input
--- a/scripts/ems/example/config.hierarchical
+++ b/scripts/ems/example/config.hierarchical
@ -528,9 +528,7 @@ decoder-settings = ""
 #########################################################
 ## RECASER: restore case, this part only trains the model

-[RECASING]
-
-#decoder = $moses-bin-dir/moses
+[RECASING] IGNORE

 ### training data
 # raw input needs to be still tokenized,
@ -538,9 +536,14 @@ decoder-settings = ""
 #
 #tokenized = [LM:europarl:tokenized-corpus]

-# recase-config = 
-
+### additinal settings
+#
+recasing-settings = ""
 #lm-training = $srilm-dir/ngram-count
+decoder = $moses-bin-dir/moses
+
+# already a trained recaser? point to config file
+#recase-config = 

 #######################################################
 ## TRUECASER: train model to truecase corpora and input
--- a/scripts/ems/example/config.syntax
+++ b/scripts/ems/example/config.syntax
@ -532,9 +532,7 @@ decoder-settings = ""
 #########################################################
 ## RECASER: restore case, this part only trains the model

-[RECASING]
-
-#decoder = $moses-bin-dir/moses
+[RECASING] IGNORE

 ### training data
 # raw input needs to be still tokenized,
@ -542,9 +540,14 @@ decoder-settings = ""
 #
 #tokenized = [LM:europarl:tokenized-corpus]

-# recase-config = 
-
+### additinal settings
+#
+recasing-settings = ""
 #lm-training = $srilm-dir/ngram-count
+decoder = $moses-bin-dir/moses
+
+# already a trained recaser? point to config file
+#recase-config = 

 #######################################################
 ## TRUECASER: train model to truecase corpora and input
--- a/scripts/ems/example/config.toy
+++ b/scripts/ems/example/config.toy
@ -510,9 +510,7 @@ decoder-settings = ""
 #########################################################
 ## RECASER: restore case, this part only trains the model

-[RECASING]
-
-#decoder = $moses-bin-dir/moses
+[RECASING] IGNORE

 ### training data
 # raw input needs to be still tokenized,
@ -520,9 +518,14 @@ decoder-settings = ""
 #
 #tokenized = [LM:europarl:tokenized-corpus]

-# recase-config = 
-
+### additinal settings
+#
+recasing-settings = ""
 #lm-training = $srilm-dir/ngram-count
+decoder = $moses-bin-dir/moses
+
+# already a trained recaser? point to config file
+#recase-config = 

 #######################################################
 ## TRUECASER: train model to truecase corpora and input