From d4642a34c1550564b59f852af76426574bfd774f Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Sun, 1 Jan 2017 22:54:48 +0000
Subject: [PATCH 01/12] add completed-hypo to Distortion FF

---
 contrib/moses2/FF/Distortion.cpp | 31 ++++++++++++++++++++++++++-----
 contrib/moses2/FF/Distortion.h   | 11 ++++++++---
 2 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/contrib/moses2/FF/Distortion.cpp b/contrib/moses2/FF/Distortion.cpp
index 343e1d21f..9e55ff798 100644
--- a/contrib/moses2/FF/Distortion.cpp
+++ b/contrib/moses2/FF/Distortion.cpp
@@ -57,6 +57,7 @@ struct DistortionState_traditional: public FFState
 Distortion::Distortion(size_t startInd, const std::string &line) :
     StatefulFeatureFunction(startInd, line)
 {
+  m_completedHypo = false;
   ReadParameters();
 }
 
@@ -65,6 +66,16 @@ Distortion::~Distortion()
   // TODO Auto-generated destructor stub
 }
 
+void Distortion::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "completed-hypo") {
+    m_completedHypo = Scan<bool>(value);
+  }
+  else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+
 FFState* Distortion::BlankState(MemPool &pool, const System &sys) const
 {
   return new (pool.Allocate<DistortionState_traditional>()) DistortionState_traditional();
@@ -110,7 +121,7 @@ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr,
   const DistortionState_traditional &prev =
       static_cast<const DistortionState_traditional&>(prevState);
   SCORE distortionScore = CalculateDistortionScore(prev.range,
-      hypo.GetInputPath().range, prev.first_gap);
+      hypo.GetInputPath().range, prev.first_gap, hypo.GetBitmap());
   //cerr << "distortionScore=" << distortionScore << endl;
 
   scores.PlusEquals(mgr.system, *this, distortionScore);
@@ -123,11 +134,11 @@ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr,
 }
 
 SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr,
-    const int FirstGap) const
+    const int FirstGap, const Bitmap &coverage) const
 {
   bool useEarlyDistortionCost = false;
   if (!useEarlyDistortionCost) {
-    return -(SCORE) ComputeDistortionDistance(prev, curr);
+    return -(SCORE) ComputeDistortionDistance(prev, curr, coverage);
   }
   else {
     /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
@@ -168,7 +179,7 @@ SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr,
 }
 
 int Distortion::ComputeDistortionDistance(const Range& prev,
-    const Range& current) const
+    const Range& current, const Bitmap &coverage) const
 {
   int dist = 0;
   if (prev.GetNumWordsCovered() == 0) {
@@ -176,8 +187,18 @@ int Distortion::ComputeDistortionDistance(const Range& prev,
   }
   else {
     dist = (int) prev.GetEndPos() - (int) current.GetStartPos() + 1;
+    dist = abs(dist);
+
+    if (m_completedHypo && coverage.IsComplete()) {
+      dist += coverage.GetSize() - current.GetEndPos() - 1;
+      /*
+      cerr << "completed=" << coverage << " " << coverage.GetSize() << " "
+          << prev << " "
+          << current << " " << dist << endl;
+      */
+    }
   }
-  return abs(dist);
+  return dist;
 }
 
 void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,
diff --git a/contrib/moses2/FF/Distortion.h b/contrib/moses2/FF/Distortion.h
index 45577d1c3..bc843fe54 100644
--- a/contrib/moses2/FF/Distortion.h
+++ b/contrib/moses2/FF/Distortion.h
@@ -14,6 +14,7 @@
 
 namespace Moses2
 {
+class Bitmap;
 
 class Distortion: public StatefulFeatureFunction
 {
@@ -21,6 +22,8 @@ public:
   Distortion(size_t startInd, const std::string &line);
   virtual ~Distortion();
 
+  virtual void SetParameter(const std::string& key, const std::string& value);
+
   virtual FFState* BlankState(MemPool &pool, const System &sys) const;
   virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
       const InputType &input, const Hypothesis &hypo) const;
@@ -48,10 +51,12 @@ public:
       FFState &state) const;
 
 protected:
-  SCORE CalculateDistortionScore(const Range &prev, const Range &curr,
-      const int FirstGap) const;
+  bool m_completedHypo;
 
-  int ComputeDistortionDistance(const Range& prev, const Range& current) const;
+  SCORE CalculateDistortionScore(const Range &prev, const Range &curr,
+      const int FirstGap, const Bitmap &coverage) const;
+
+  int ComputeDistortionDistance(const Range& prev, const Range& current, const Bitmap &coverage) const;
 
 };
 

From 29b0072edac3312a82564ea614a5c64030997061 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Mon, 2 Jan 2017 06:02:54 -0500
Subject: [PATCH 02/12] CreateProbingPT2 -> CreateProbingPT

---
 scripts/training/filter-model-given-input.pl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/training/filter-model-given-input.pl b/scripts/training/filter-model-given-input.pl
index a16aeac4a..65b2e3502 100755
--- a/scripts/training/filter-model-given-input.pl
+++ b/scripts/training/filter-model-given-input.pl
@@ -228,7 +228,7 @@ while ( my $line = <INI> ) {
                 $phrase_table_impl = "PhraseDictionaryOnDisk";
                 @toks = set_value( \@toks, "path", "$new_name.bin$table_flag" );
             }
-            elsif ( $binarizer =~ /CreateProbingPT2/ ) {
+            elsif ( $binarizer =~ /CreateProbingPT/ ) {
                 $phrase_table_impl = "ProbingPT";
                 @toks = set_value( \@toks, "path", "$new_name.probing$table_flag" );
             }
@@ -488,7 +488,7 @@ for ( my $i = 0 ; $i <= $#TABLE ; $i++ ) {
                 my $cmd = "$binarizer $mid_file $new_file.bin";
                 safesystem($cmd) or die "Can't binarize";
             }
-            elsif ( $binarizer =~ /CreateProbingPT2/ ) {
+            elsif ( $binarizer =~ /CreateProbingPT/ ) {
                 my $cmd = "$binarizer --input-pt $mid_file --output-dir $new_file.probing";
                 if ($opt_hierarchical) {
 		    $cmd .= " --scfg";
@@ -509,8 +509,8 @@ for ( my $i = 0 ; $i <= $#TABLE ; $i++ ) {
             if ( $binarizer =~ /CreateOnDiskPt/ ) {
                 $lexbin =~ s/CreateOnDiskPt/processLexicalTable/;
             }
-            elsif ( $binarizer =~ /CreateProbingPT2/ ) {
-                $lexbin =~ s/CreateProbingPT2/processLexicalTableMin/;
+            elsif ( $binarizer =~ /CreateProbingPT/ ) {
+                $lexbin =~ s/CreateProbingPT/processLexicalTableMin/;
             }
 
             $lexbin =~ s/PhraseTable/LexicalTable/;

From cf93594af98e35329be7120d01255a98d0ad1fa4 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Mon, 2 Jan 2017 12:44:08 +0000
Subject: [PATCH 03/12] re-implement -feature-overwrite

---
 contrib/moses2/FF/FeatureFunction.h    |  2 +-
 contrib/moses2/FF/FeatureFunctions.cpp | 42 +++++++++++++++++++++++++-
 contrib/moses2/FF/FeatureFunctions.h   |  5 ++-
 contrib/moses2/legacy/Parameter.cpp    |  4 +--
 4 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/contrib/moses2/FF/FeatureFunction.h b/contrib/moses2/FF/FeatureFunction.h
index d38c72b89..1e25fce39 100644
--- a/contrib/moses2/FF/FeatureFunction.h
+++ b/contrib/moses2/FF/FeatureFunction.h
@@ -80,6 +80,7 @@ public:
     return m_tuneable;
   }
 
+  virtual void SetParameter(const std::string& key, const std::string& value);
 
   // may have more factors than actually need, but not guaranteed.
   virtual void
@@ -118,7 +119,6 @@ protected:
   std::vector<std::vector<std::string> > m_args;
   bool m_tuneable;
 
-  virtual void SetParameter(const std::string& key, const std::string& value);
   virtual void ReadParameters();
   void ParseLine(const std::string &line);
 };
diff --git a/contrib/moses2/FF/FeatureFunctions.cpp b/contrib/moses2/FF/FeatureFunctions.cpp
index 8ca145060..49a0ace67 100644
--- a/contrib/moses2/FF/FeatureFunctions.cpp
+++ b/contrib/moses2/FF/FeatureFunctions.cpp
@@ -103,8 +103,9 @@ void FeatureFunctions::Create()
         unkWP->SetParameter("suffix", m_system.options.unk.suffix);
       }
     }
-
   }
+
+  OverrideFeatures();
 }
 
 FeatureFunction *FeatureFunctions::Create(const std::string &line)
@@ -150,6 +151,17 @@ const FeatureFunction *FeatureFunctions::FindFeatureFunction(
 	return NULL;
 }
 
+FeatureFunction *FeatureFunctions::FindFeatureFunction(
+    const std::string &name)
+{
+  BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
+    if (ff->GetName() == name) {
+    return const_cast<FeatureFunction *>(ff);
+    }
+  }
+  return NULL;
+}
+
 const PhraseTable *FeatureFunctions::GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd)
 {
   // assume only 1 unk wp
@@ -243,5 +255,33 @@ void FeatureFunctions::ShowWeights(const Weights &allWeights)
   }
 }
 
+void FeatureFunctions::OverrideFeatures()
+{
+  const Parameter &parameter = m_system.params;
+
+  const PARAM_VEC *params = parameter.GetParam("feature-overwrite");
+  for (size_t i = 0; params && i < params->size(); ++i) {
+    const string &str = params->at(i);
+    vector<string> toks = Tokenize(str);
+    UTIL_THROW_IF2(toks.size() <= 1, "Incorrect format for feature override: " << str);
+
+    FeatureFunction *ff = FindFeatureFunction(toks[0]);
+    UTIL_THROW_IF2(ff == NULL, "Feature function not found: " << toks[0]);
+
+    for (size_t j = 1; j < toks.size(); ++j) {
+      const string &keyValStr = toks[j];
+      vector<string> keyVal = Tokenize(keyValStr, "=");
+      UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr);
+
+      cerr << "Override " << ff->GetName() << " "
+              << keyVal[0] << "=" << keyVal[1] << endl;
+
+      ff->SetParameter(keyVal[0], keyVal[1]);
+
+    }
+  }
+
+}
+
 }
 
diff --git a/contrib/moses2/FF/FeatureFunctions.h b/contrib/moses2/FF/FeatureFunctions.h
index 74c77c7e6..2232e2a97 100644
--- a/contrib/moses2/FF/FeatureFunctions.h
+++ b/contrib/moses2/FF/FeatureFunctions.h
@@ -95,10 +95,13 @@ protected:
   System &m_system;
   size_t m_ffStartInd;
 
+  FeatureRegistry m_registry;
+
   FeatureFunction *Create(const std::string &line);
   std::string GetDefaultName(const std::string &stub);
+  void OverrideFeatures();
+  FeatureFunction *FindFeatureFunction(const std::string &name);
 
-  FeatureRegistry m_registry;
 };
 
 }
diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp
index 666eb0e98..bd2cd4676 100644
--- a/contrib/moses2/legacy/Parameter.cpp
+++ b/contrib/moses2/legacy/Parameter.cpp
@@ -94,8 +94,8 @@ Parameter::Parameter()
   AddParam(search_opts, "weight",
       "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
 
-  //AddParam(search_opts, "feature-overwrite",
-  //    "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\"");
+  AddParam(search_opts, "feature-overwrite",
+      "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\"");
 
   po::options_description tune_opts("Options used in tuning.");
   AddParam(tune_opts, "weight-overwrite",

From ab2e48415fa50faa41106f9f69339ff4ab01de73 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Mon, 2 Jan 2017 15:55:33 -0500
Subject: [PATCH 04/12] add back -text-type for EMS

---
 contrib/moses2/legacy/Parameter.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/contrib/moses2/legacy/Parameter.cpp b/contrib/moses2/legacy/Parameter.cpp
index bd2cd4676..870a49f2a 100644
--- a/contrib/moses2/legacy/Parameter.cpp
+++ b/contrib/moses2/legacy/Parameter.cpp
@@ -373,6 +373,9 @@ Parameter::Parameter()
   ///////////////////////////////////////////////////////////////////////////////////////
   // DEPRECATED options
   po::options_description deprec_opts("Deprecated Options");
+  AddParam(deprec_opts, "text-type",
+	   "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
+
   /*
   AddParam(deprec_opts, "link-param-count",
       "DEPRECATED. DO NOT USE. Number of parameters on word links when using confusion networks or lattices (default = 1)");
@@ -412,8 +415,6 @@ Parameter::Parameter()
       "DEPRECATED. DO NOT USE. weight for unknown word penalty");
   AddParam(deprec_opts, "weight-e", "e",
       "DEPRECATED. DO NOT USE. weight for word deletion");
-  AddParam(deprec_opts, "text-type",
-      "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
   AddParam(deprec_opts, "input-scores",
       "DEPRECATED. DO NOT USE. 2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)");
   AddParam(deprec_opts, "dlm-model",

From ff12a13eaaef2e6272123d5865f516ed4513bc07 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Mon, 2 Jan 2017 16:37:56 -0500
Subject: [PATCH 05/12] re-tune if decoder changed. eg moses -> moses2

---
 scripts/ems/experiment.meta | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 16fc20336..d6e6dc133 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -1196,7 +1196,7 @@ tune
 	default-name: tuning/moses.ini
 	tmp-name: tuning/tmp
 	final-model: yes
-	rerun-on-change: decoder-settings tuning-settings nbest lambda async
+	rerun-on-change: decoder decoder-settings tuning-settings nbest lambda async
 	not-error: trans: No such file or directory
 thot-tune
 	in: TRAINING:config input reference

From 80bd5597578cddc72244c8f53d18a2aabdca27b9 Mon Sep 17 00:00:00 2001
From: MosesAdmin <moses-support-owner@mit.edu>
Date: Tue, 3 Jan 2017 00:00:36 +0000
Subject: [PATCH 06/12] daily automatic beautifier

---
 moses/LM/InMemoryPerSentenceOnDemandLM.cpp | 10 ++++++----
 moses/LM/InMemoryPerSentenceOnDemandLM.h   |  4 ++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
index 12ef78f4e..364aebe42 100644
--- a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
@@ -17,7 +17,7 @@ using namespace std;
 
 namespace Moses
 {
-  InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false)
+InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false)
 {
   ReadParameters();
 }
@@ -26,7 +26,8 @@ InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM()
 {
 }
 
-void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {
+void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask)
+{
 
   // The context scope object for this translation task
   //     contains a map of translation task-specific data
@@ -63,14 +64,15 @@ void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {
 
   VERBOSE(1, filename);
   if (initialized) {
-    VERBOSE(1, "\tLM initialized\n"); 
+    VERBOSE(1, "\tLM initialized\n");
   }
 
   //  std::remove(filename);
 
 }
 
-LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const {
+LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const
+{
 
   LanguageModelKen<lm::ngram::ProbingModel> *lm;
   lm = m_perThreadLM.get();
diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.h b/moses/LM/InMemoryPerSentenceOnDemandLM.h
index f0c1effa7..022ba9289 100644
--- a/moses/LM/InMemoryPerSentenceOnDemandLM.h
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h
@@ -89,7 +89,7 @@ public:
   virtual void sync() {
     GetPerThreadLM().sync();
   }
- 
+
   virtual void SetFFStateIdx(int state_idx) {
     if (initialized) {
       GetPerThreadLM().SetFFStateIdx(state_idx);
@@ -107,7 +107,7 @@ public:
       GetPerThreadLM().ReportHistoryOrder(out, phrase);
     }
   }
-  
+
   virtual void EvaluateInIsolation(const Phrase &source
                                    , const TargetPhrase &targetPhrase
                                    , ScoreComponentCollection &scoreBreakdown

From 02772c07dec22acb1d50397651ad189b0f97e1e6 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Tue, 3 Jan 2017 11:06:26 +0000
Subject: [PATCH 07/12] revert changes to Distortion FF

---
 contrib/moses2/FF/Distortion.cpp | 32 +++++---------------------------
 contrib/moses2/FF/Distortion.h   |  9 ++-------
 2 files changed, 7 insertions(+), 34 deletions(-)

diff --git a/contrib/moses2/FF/Distortion.cpp b/contrib/moses2/FF/Distortion.cpp
index 9e55ff798..1d7b7246d 100644
--- a/contrib/moses2/FF/Distortion.cpp
+++ b/contrib/moses2/FF/Distortion.cpp
@@ -57,7 +57,6 @@ struct DistortionState_traditional: public FFState
 Distortion::Distortion(size_t startInd, const std::string &line) :
     StatefulFeatureFunction(startInd, line)
 {
-  m_completedHypo = false;
   ReadParameters();
 }
 
@@ -66,16 +65,6 @@ Distortion::~Distortion()
   // TODO Auto-generated destructor stub
 }
 
-void Distortion::SetParameter(const std::string& key, const std::string& value)
-{
-  if (key == "completed-hypo") {
-    m_completedHypo = Scan<bool>(value);
-  }
-  else {
-    StatefulFeatureFunction::SetParameter(key, value);
-  }
-}
-
 FFState* Distortion::BlankState(MemPool &pool, const System &sys) const
 {
   return new (pool.Allocate<DistortionState_traditional>()) DistortionState_traditional();
@@ -121,7 +110,7 @@ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr,
   const DistortionState_traditional &prev =
       static_cast<const DistortionState_traditional&>(prevState);
   SCORE distortionScore = CalculateDistortionScore(prev.range,
-      hypo.GetInputPath().range, prev.first_gap, hypo.GetBitmap());
+      hypo.GetInputPath().range, prev.first_gap);
   //cerr << "distortionScore=" << distortionScore << endl;
 
   scores.PlusEquals(mgr.system, *this, distortionScore);
@@ -134,11 +123,11 @@ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr,
 }
 
 SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr,
-    const int FirstGap, const Bitmap &coverage) const
+    const int FirstGap) const
 {
   bool useEarlyDistortionCost = false;
   if (!useEarlyDistortionCost) {
-    return -(SCORE) ComputeDistortionDistance(prev, curr, coverage);
+    return -(SCORE) ComputeDistortionDistance(prev, curr);
   }
   else {
     /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
@@ -179,7 +168,7 @@ SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr,
 }
 
 int Distortion::ComputeDistortionDistance(const Range& prev,
-    const Range& current, const Bitmap &coverage) const
+    const Range& current) const
 {
   int dist = 0;
   if (prev.GetNumWordsCovered() == 0) {
@@ -187,18 +176,8 @@ int Distortion::ComputeDistortionDistance(const Range& prev,
   }
   else {
     dist = (int) prev.GetEndPos() - (int) current.GetStartPos() + 1;
-    dist = abs(dist);
-
-    if (m_completedHypo && coverage.IsComplete()) {
-      dist += coverage.GetSize() - current.GetEndPos() - 1;
-      /*
-      cerr << "completed=" << coverage << " " << coverage.GetSize() << " "
-          << prev << " "
-          << current << " " << dist << endl;
-      */
-    }
   }
-  return dist;
+  return abs(dist);
 }
 
 void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,
@@ -209,4 +188,3 @@ void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,
 }
 
 }
-
diff --git a/contrib/moses2/FF/Distortion.h b/contrib/moses2/FF/Distortion.h
index bc843fe54..45577d1c3 100644
--- a/contrib/moses2/FF/Distortion.h
+++ b/contrib/moses2/FF/Distortion.h
@@ -14,7 +14,6 @@
 
 namespace Moses2
 {
-class Bitmap;
 
 class Distortion: public StatefulFeatureFunction
 {
@@ -22,8 +21,6 @@ public:
   Distortion(size_t startInd, const std::string &line);
   virtual ~Distortion();
 
-  virtual void SetParameter(const std::string& key, const std::string& value);
-
   virtual FFState* BlankState(MemPool &pool, const System &sys) const;
   virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
       const InputType &input, const Hypothesis &hypo) const;
@@ -51,12 +48,10 @@ public:
       FFState &state) const;
 
 protected:
-  bool m_completedHypo;
-
   SCORE CalculateDistortionScore(const Range &prev, const Range &curr,
-      const int FirstGap, const Bitmap &coverage) const;
+      const int FirstGap) const;
 
-  int ComputeDistortionDistance(const Range& prev, const Range& current, const Bitmap &coverage) const;
+  int ComputeDistortionDistance(const Range& prev, const Range& current) const;
 
 };
 

From 2a5e40ed60d351f05ca58ad3be6ec0865d08373f Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linasvepstas@gmail.com>
Date: Wed, 4 Jan 2017 22:01:45 -0600
Subject: [PATCH 08/12] New file: Lithuanian

---
 .../nonbreaking_prefix.lt                     | 110 ++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt

diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
new file mode 100644
index 000000000..d7829e3c0
--- /dev/null
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
@@ -0,0 +1,110 @@
+# Anything in this file, followed by a period (and an upper-case word),
+# does NOT indicate an end-of-sentence marker.
+# Special cases are included for prefixes that ONLY appear before 0-9 numbers.
+
+# Any single upper case letter  followed by a period is not a sentence ender
+# (excluding I occasionally, but we leave it in)
+# usually upper case letters are initials in a name
+A
+Ā
+B
+C
+Č
+D
+E
+Ē
+F
+G
+Ģ
+H
+I
+Ī
+J
+K
+Ķ
+L
+Ļ
+M
+N
+Ņ
+O
+P
+Q
+R
+S
+Š
+T
+U
+Ū
+V
+W
+X
+Y
+Z
+Ž
+
+# Abbreviations m. menesis d. diena  g. gimes
+m
+d
+g
+
+# Day and month abbreviations
+# Pirmadienis Penktadienis
+Pr
+Pn
+Pirm
+Antr
+Treč
+Ketv
+Penkt
+Šešt
+Sekm
+Saus
+Vas
+Kov
+Bal
+Geg
+Birž
+Liep
+Rugpj
+Rugs
+Spal
+Lapkr
+Gruod
+
+# List of titles. These are often followed by upper-case names, but do
+# not indicate sentence breaks
+#
+# Gerbiamasis
+Gerb
+
+# XXX TODO .. Below are not quite correct, copied from latvian
+dr
+Dr
+med
+prof
+Prof
+inž
+Inž
+ist.loc
+Ist.loc
+kor.loc
+Kor.loc
+v.i
+vietn
+Vietn
+
+# misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT
+# fall into this category - it sometimes ends a sentence)
+# angl angliskai
+# dab dabartine
+angl
+dab
+
+
+#Numbers only. These should only induce breaks when followed by a numeric sequence
+# add NUMERIC_ONLY after the word for this function
+#This case is mostly for the english "No." which can either be a sentence of its own, or
+#if followed by a number, a non-breaking prefix
+No #NUMERIC_ONLY#
+Nr #NUMERIC_ONLY#

From 3ef84b133cc8bf64862b3c2dad254e7043439fb7 Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linasvepstas@gmail.com>
Date: Wed, 4 Jan 2017 22:30:53 -0600
Subject: [PATCH 09/12] More abbreviations

---
 .../nonbreaking_prefix.lt                     | 277 +++++++++++++++++-
 1 file changed, 274 insertions(+), 3 deletions(-)

diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
index d7829e3c0..4e2f6677e 100644
--- a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
@@ -43,12 +43,11 @@ Y
 Z
 Ž
 
-# Abbreviations m. menesis d. diena  g. gimes
+# Day and month abbreviations
+# m. menesis d. diena  g. gimes
 m
 d
 g
-
-# Day and month abbreviations
 # Pirmadienis Penktadienis
 Pr
 Pn
@@ -72,6 +71,278 @@ Spal
 Lapkr
 Gruod
 
+# Technical terms, abbreviations used in guidebooks, advertisments, etc.
+# Generally lower-case.
+air
+# airiškai
+amer
+# amerikanizmas
+anat
+# anatomija
+arab
+# arabų
+archeol
+archit
+asm
+# asmuo
+astr
+# astronomija
+austral
+# australiškai
+aut
+# automobilis
+av
+# aviacija
+bažn
+bdv
+# būdvardis
+bibl
+# Biblija
+biol
+# biologija
+bot
+# botanika
+buh
+# buhalterija
+chem
+# chemija
+d
+# didysis
+dgs
+# daugiskaita
+dial
+# dialektizmas
+dipl
+dktv
+# daiktavardis
+džn
+# dažnai
+ekon
+el
+# elektra
+esam
+# esamasis laikas
+euf
+# eufemizmas
+fam
+# familiariai
+farm
+# farmacija
+filos
+# filosofija
+fin
+# finansai
+fiz
+# fizika
+fiziol
+flk
+# folkloras
+fon
+# fonetika
+fot
+# fotografija
+geod
+# geodezija
+geogr
+geol
+# geologija
+geom
+# geometrija
+glžk
+gr
+# graikų
+gram
+her
+# heraldika
+hidr
+# hidrotechnika
+ind
+# Indų
+iron
+# ironiškai
+isp
+# ispanų
+ist
+# istorija
+it
+# italų
+įv
+reikšm
+įv.reikšm
+# įvairiomis reikšmėmis
+jap
+# japonų
+juok
+# juokaujamai
+jūr
+# jūrininkystė
+kalb
+# kalbotyra
+kar
+# karyba
+kas
+# kasyba
+kin
+# kinematografija
+klaus
+# klausiamasis
+knyg
+# knyginis
+kom
+# komercija
+komp
+# kompiuteris
+kosm
+# kosmonautika
+kt
+# kitas
+kul
+# kulinarija
+kuop
+# kuopine
+l
+# laikas
+lit
+# literatūrinis
+log
+# logika
+lot
+# lotynų
+mat
+# matematika
+maž
+# mažybinis
+med
+# medicina
+medž
+# medžioklė
+men
+# menas
+menk
+# menkinamai
+metal
+# metalurgija
+meteor
+min
+# mineralogija
+mit
+# mitologija
+mok
+# mokyklinis
+muz
+# muzikinis
+n
+# naujasis
+neig
+# neigiamasis
+niek
+# niekinamai
+ofic
+# oficialus
+opt
+# optika
+p
+# pietūs
+pan
+# panašiai
+parl
+# parlamentas
+pat
+# patarlė
+paž
+# pažodžiui
+plg
+# palygink
+poet
+# poetizmas
+poligr
+# poligrafija
+polit
+# politika
+ppr
+# paprastai
+pr
+# prancūzų
+prk
+# perkeltine
+psn
+# pasenęs žodis
+psich
+# psichologija
+pvz
+# pavyzdžiui
+r
+# rytai
+rad
+# radiotechnika
+rel
+# religija
+ret
+# retai
+rus
+# rusų
+sen
+# senasis
+sl
+# slengas
+spec
+# specialus
+sport
+stat
+# statyba
+sudurt
+# sudurtinis
+sutr
+# sutrumpintas
+š
+# šiaurė
+šach
+# šachmatai
+šiaur
+škot
+# škotiškai
+šnek
+# šnekamoji
+teatr
+tech
+# technika
+teig
+# teigiamas
+teis
+# teisė
+tekst
+# tekstilė
+tel
+v
+# tik vyriškosios, vakarai
+t.p
+t
+p
+# taip pat
+vaik
+# vaikų
+vart
+# vartojama
+vet
+# veterinarija
+vid
+# vidurinis
+vksm
+# veiksmažodis
+vns
+# vienaskaita
+vok
+# vokiečių
+vulg
+# vulgariai
+zool
+žr
+# žiūrėk
+ž.ū
+ž
+ū
+# žemės ūkis
+
 # List of titles. These are often followed by upper-case names, but do
 # not indicate sentence breaks
 #

From d10ba6f049d8dc08d95a6a6e6934adf808160320 Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linasvepstas@gmail.com>
Date: Wed, 4 Jan 2017 23:52:28 -0600
Subject: [PATCH 10/12] More abbreviations for LLithuanian.

---
 .../nonbreaking_prefix.lt                     | 369 ++++++++++++++++--
 1 file changed, 343 insertions(+), 26 deletions(-)

diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
index 4e2f6677e..fa72196d9 100644
--- a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
@@ -43,11 +43,18 @@ Y
 Z
 Ž
 
+# Initialis -- Džonas
+Dz
+Dž
+Just
+
 # Day and month abbreviations
 # m. menesis d. diena  g. gimes
 m
+mėn
 d
 g
+gim
 # Pirmadienis Penktadienis
 Pr
 Pn
@@ -71,6 +78,279 @@ Spal
 Lapkr
 Gruod
 
+# Business, governmental, geographical terms
+a
+# aikštė
+adv
+# advokatas
+akad
+# akademikas
+aklg
+# akligatvis
+akt
+# aktorius
+al
+# alėja
+A.V
+# antspaudo vieta
+aps
+apskr
+# apskritis
+apyg
+# apygarda
+aps
+apskr
+# apskritis
+asist
+# asistentas
+asmv
+avd
+# asmenvardis
+a.k
+asm
+asm.k
+# asmens kodas
+atsak
+# atsakingasis
+atsisk
+sąsk
+# atsiskaitomoji sąskaita
+aut
+# autorius
+b
+k
+b.k
+# banko kodas
+bkl
+# bakalauras
+bt
+# butas
+buv
+# buvęs, -usi
+dail
+# dailininkas
+dek
+# dekanas
+dėst
+# dėstytojas
+dir
+# direktorius
+dirig
+# dirigentas
+doc
+# docentas
+drp
+# durpynas
+dš
+# dešinysis
+egz
+# egzempliorius
+eil
+# eilutė
+ekon
+# ekonomika
+el
+# elektroninis
+etc
+ež
+# ežeras
+faks
+# faksas
+fak
+# fakultetas
+gen
+# generolas
+gyd
+# gydytojas
+gv
+# gyvenvietė
+įl
+# įlanka
+Įn
+# įnagininkas
+insp
+# inspektorius
+pan
+# ir panašiai
+t.t
+# ir taip toliau
+k.a
+# kaip antai
+kand
+# kandidatas
+kat
+# katedra
+kyš
+# kyšulys
+kl
+# klasė
+kln
+# kalnas
+kn
+# knyga
+koresp
+# korespondentas
+kpt
+# kapitonas
+kr
+# kairysis
+kt
+# kitas
+kun
+# kunigas
+l
+e
+p
+l.e.p
+# laikinai einantis pareigas
+ltn
+# leitenantas
+m
+mst
+# miestas
+m.e
+# mūsų eros
+m.m
+# mokslo metai
+mot
+# moteris
+mstl
+# miestelis
+mgr
+# magistras
+mgnt
+# magistrantas
+mjr
+# majoras
+mln
+# milijonas
+mlrd
+# milijardas
+mok
+# mokinys
+mokyt
+# mokytojas
+moksl
+# mokslinis
+nkt
+# nekaitomas
+ntk
+# neteiktinas
+Nr
+nr
+# numeris
+p
+# ponas
+p.d
+a.d
+# pašto dėžutė, abonentinė dėžutė
+p.m.e
+# prieš mūsų erą
+pan
+# ir panašiai
+pav
+# paveikslas
+pavad
+# pavaduotojas
+pirm
+# pirmininkas
+pl
+# plentas
+plg
+# palygink
+plk
+# pulkininkas; pelkė
+pr
+# prospektas
+Kr
+pr.Kr
+# prieš Kristų
+prok
+# prokuroras
+prot
+# protokolas
+pss
+# pusiasalis
+pšt
+# paštas
+pvz
+# pavyzdžiui
+r
+# rajonas
+red
+# redaktorius
+rš
+# raštų kalbos
+sąs
+# sąsiuvinis
+saviv
+sav
+# savivaldybė
+sekr
+# sekretorius
+sen
+# seniūnija, seniūnas
+sk
+# skaityk; skyrius
+skg
+# skersgatvis
+skyr
+sk
+# skyrius
+skv
+# skveras
+sp
+# spauda; spaustuvė
+spec
+# specialistas
+sr
+# sritis
+st
+# stotis
+str
+# straipsnis
+stud
+# studentas
+š
+š.m
+# šių metų
+šnek
+# šnekamosios
+tir
+# tiražas
+tūkst
+# tūkstantis
+up
+# upė
+upl
+# upelis
+vad
+# vadinamasis, -oji
+vlsč
+# valsčius
+ved
+# vedėjas
+vet
+# veterinarija
+virš
+# viršininkas, viršaitis
+vyr
+# vyriausiasis, -ioji; vyras
+vyresn
+# vyresnysis
+vlsč
+# valsčius
+vs
+# viensėdis
+Vt
+vt
+# vietininkas
+vtv
+vv
+# vietovardis
+žml
+# žemėlapis
+
 # Technical terms, abbreviations used in guidebooks, advertisments, etc.
 # Generally lower-case.
 air
@@ -79,6 +359,8 @@ amer
 # amerikanizmas
 anat
 # anatomija
+angl
+# angl. angliskai
 arab
 # arabų
 archeol
@@ -102,12 +384,21 @@ biol
 # biologija
 bot
 # botanika
+brt
+# burtai, burtažodis.
+brus
+# baltarusių
 buh
 # buhalterija
 chem
 # chemija
-d
-# didysis
+col
+# collectivum
+con
+conj
+# conjunctivus, jungtukas
+dab
+# dab. dabartine
 dgs
 # daugiskaita
 dial
@@ -128,6 +419,8 @@ fam
 # familiariai
 farm
 # farmacija
+filol
+# filologija
 filos
 # filosofija
 fin
@@ -135,6 +428,7 @@ fin
 fiz
 # fizika
 fiziol
+# fiziologija
 flk
 # folkloras
 fon
@@ -163,6 +457,7 @@ iron
 isp
 # ispanų
 ist
+istor
 # istorija
 it
 # italų
@@ -204,6 +499,8 @@ l
 # laikas
 lit
 # literatūrinis
+lingv
+# lingvistika
 log
 # logika
 lot
@@ -229,18 +526,24 @@ mit
 # mitologija
 mok
 # mokyklinis
+ms
+# mįslė
 muz
 # muzikinis
 n
 # naujasis
 neig
 # neigiamasis
+neol
+# neologizmas
 niek
 # niekinamai
 ofic
 # oficialus
 opt
 # optika
+orig
+# original
 p
 # pietūs
 pan
@@ -255,16 +558,25 @@ plg
 # palygink
 poet
 # poetizmas
+poez
+#  poezija
 poligr
 # poligrafija
 polit
 # politika
 ppr
 # paprastai
+pranc
 pr
-# prancūzų
+# prancūzų, prūsų
+priet
+# prietaras
+prek
+# prekyba
 prk
 # perkeltine
+prs
+# persona, asmuo
 psn
 # pasenęs žodis
 psich
@@ -284,7 +596,9 @@ rus
 sen
 # senasis
 sl
-# slengas
+# slengas, slavų
+sov
+# sovietinis
 spec
 # specialus
 sport
@@ -294,6 +608,8 @@ sudurt
 # sudurtinis
 sutr
 # sutrumpintas
+suv
+# suvalkiečių
 š
 # šiaurė
 šach
@@ -305,6 +621,7 @@ sutr
 # šnekamoji
 teatr
 tech
+techn
 # technika
 teig
 # teigiamas
@@ -313,12 +630,19 @@ teis
 tekst
 # tekstilė
 tel
+# telefonas
+teol
+# teologija
 v
 # tik vyriškosios, vakarai
 t.p
 t
 p
-# taip pat
+# ir taip pat
+t.t
+# ir taip toliau
+t.y
+# tai yra
 vaik
 # vaikų
 vart
@@ -336,6 +660,7 @@ vok
 vulg
 # vulgariai
 zool
+# zoologija
 žr
 # žiūrėk
 ž.ū
@@ -346,31 +671,24 @@ zool
 # List of titles. These are often followed by upper-case names, but do
 # not indicate sentence breaks
 #
+# Jo Eminencija
+Em.
 # Gerbiamasis
 Gerb
-
-# XXX TODO .. Below are not quite correct, copied from latvian
-dr
-Dr
-med
-prof
+gerb
+#  malonus
+malon
+# profesorius
 Prof
+prof
+# daktaras (mokslų)
+Dr
+dr
+habil
+med
+# inž inžinierius
 inž
 Inž
-ist.loc
-Ist.loc
-kor.loc
-Kor.loc
-v.i
-vietn
-Vietn
-
-# misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT
-# fall into this category - it sometimes ends a sentence)
-# angl angliskai
-# dab dabartine
-angl
-dab
 
 
 #Numbers only. These should only induce breaks when followed by a numeric sequence
@@ -378,4 +696,3 @@ dab
 #This case is mostly for the english "No." which can either be a sentence of its own, or
 #if followed by a number, a non-breaking prefix
 No #NUMERIC_ONLY#
-Nr #NUMERIC_ONLY#

From ab6816f9a755f37de00090829f62848372e8222e Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linasvepstas@gmail.com>
Date: Thu, 5 Jan 2017 10:08:06 -0600
Subject: [PATCH 11/12] Purely cosmetic cleanup.

Use same indentation style throughout; wrap long lines; capitalize
sentences; add punctuation; remove trailing whitespace.
---
 scripts/ems/support/split-sentences.perl | 45 ++++++++++++------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl
index f72767054..7bad038a1 100755
--- a/scripts/ems/support/split-sentences.perl
+++ b/scripts/ems/support/split-sentences.perl
@@ -29,10 +29,10 @@ while (@ARGV) {
 }
 
 if ($HELP) {
-    print "Usage ./split-sentences.perl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n";
-    print "-q: quiet mode\n";
-    print "-b: no output buffering (for use in bidirectional pipes)\n";
-    exit;
+	print "Usage ./split-sentences.pl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n";
+	print "-q: quiet mode\n";
+	print "-b: no output buffering (for use in bidirectional pipes)\n";
+	exit;
 }
 if (!$QUIET) {
 	print STDERR "Sentence Splitter v3\n";
@@ -64,9 +64,9 @@ if (-e "$prefixfile") {
 	close(PREFIX);
 }
 
-##loop text, add lines together until we get a blank line or a <p>
+## Loop over text, add lines together until we get a blank line or a <p>
 my $text = "";
-while(<STDIN>) {
+while (<STDIN>) {
 	chop;
 	if (/^<.+>$/ || /^\s*$/) {
 		#time to process this block, we've hit a blank or <p>
@@ -79,7 +79,7 @@ while(<STDIN>) {
 		$text .= $_. " ";
 	}
 }
-#do the leftover text
+# Do the leftover text.
 &do_it_for($text,"") if $text;
 
 
@@ -91,28 +91,32 @@ sub do_it_for {
 }
 
 sub preprocess {
-	#this is one paragraph
+	# This is one paragraph.
 	my($text) = @_;
 
-	# clean up spaces at head and tail of each line as well as any double-spacing
+	# Clean up spaces at head and tail of each line, as well as
+	# any double-spacing.
 	$text =~ s/ +/ /g;
 	$text =~ s/\n /\n/g;
 	$text =~ s/ \n/\n/g;
 	$text =~ s/^ //g;
 	$text =~ s/ $//g;
 
-	#####add sentence breaks as needed#####
+	##### Add sentence breaks as needed #####
 
-	#non-period end of sentence markers (?!) followed by sentence starters.
+	# Non-period end of sentence markers (?!) followed by sentence starters.
 	$text =~ s/([?!]) +([\'\"\(\[\¿\¡\p{IsPi}]*[\p{IsUpper}])/$1\n$2/g;
 
-	#multi-dots followed by sentence starters
+	# Multi-dots followed by sentence starters.
 	$text =~ s/(\.[\.]+) +([\'\"\(\[\¿\¡\p{IsPi}]*[\p{IsUpper}])/$1\n$2/g;
 
-	# add breaks for sentences that end with some sort of punctuation inside a quote or parenthetical and are followed by a possible sentence starter punctuation and upper case
+	# Add breaks for sentences that end with some sort of punctuation
+	# inside a quote or parenthetical and are followed by a possible
+	# sentence starter punctuation and upper case.
 	$text =~ s/([?!\.][\ ]*[\'\"\)\]\p{IsPf}]+) +([\'\"\(\[\¿\¡\p{IsPi}]*[\ ]*[\p{IsUpper}])/$1\n$2/g;
 
-	# add breaks for sentences that end with some sort of punctuation are followed by a sentence starter punctuation and upper case
+	# Add breaks for sentences that end with some sort of punctuation,
+	# and are followed by a sentence starter punctuation and upper case.
 	$text =~ s/([?!\.]) +([\'\"\(\[\¿\¡\p{IsPi}]+[\ ]*[\p{IsUpper}])/$1\n$2/g;
 
 	# special punctuation cases are covered. Check all remaining periods.
@@ -130,30 +134,27 @@ sub preprocess {
 			} elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) {
 				#not breaking - upper case acronym
 			} elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/) {
-				#the next word has a bunch of initial quotes, maybe a space, then either upper case or a number
+				# The next word has a bunch of initial quotes, maybe a
+				# space, then either upper case or a number
 				$words[$i] = $words[$i]."\n" unless ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 2 && !$starting_punct && ($words[$i+1] =~ /^[0-9]+/));
 				#we always add a return for these unless we have a numeric non-breaker and a number start
 			}
-
 		}
 		$text = $text.$words[$i]." ";
 	}
 
-	#we stopped one token from the end to allow for easy look-ahead. Append it now.
+	# We stopped one token from the end to allow for easy look-ahead. Append it now.
 	$text = $text.$words[$i];
 
-	# clean up spaces at head and tail of each line as well as any double-spacing
+	# Clean up spaces at head and tail of each line as well as any double-spacing
 	$text =~ s/ +/ /g;
 	$text =~ s/\n /\n/g;
 	$text =~ s/ \n/\n/g;
 	$text =~ s/^ //g;
 	$text =~ s/ $//g;
 
-	#add trailing break
+	# Add trailing break.
 	$text .= "\n" unless $text =~ /\n$/;
 
 	return $text;
-
 }
-
-

From 9f5500a3a8df10ff0a99238c8c81679c9b9420a2 Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linasvepstas@gmail.com>
Date: Thu, 5 Jan 2017 10:09:34 -0600
Subject: [PATCH 12/12] oops.

---
 scripts/ems/support/split-sentences.perl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl
index 7bad038a1..7f2fb3ced 100755
--- a/scripts/ems/support/split-sentences.perl
+++ b/scripts/ems/support/split-sentences.perl
@@ -29,7 +29,7 @@ while (@ARGV) {
 }
 
 if ($HELP) {
-	print "Usage ./split-sentences.pl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n";
+	print "Usage ./split-sentences.perl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n";
 	print "-q: quiet mode\n";
 	print "-b: no output buffering (for use in bidirectional pipes)\n";
 	exit;