Merge

2025-01-05 02:22:21 +03:00 · 2017-01-05 15:53:04 -06:00 · 2017-01-05 15:53:04 -06:00 · 171edca393
commit 171edca393
parent 1f744ecd9b 138ebf1f1e
11 changed files with 784 additions and 40 deletions
--- a/contrib/moses2/FF/Distortion.cpp
+++ b/contrib/moses2/FF/Distortion.cpp
@ -188,4 +188,3 @@ void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,
 }

 }
-
--- a/contrib/moses2/FF/FeatureFunction.h
+++ b/contrib/moses2/FF/FeatureFunction.h
@ -80,6 +80,7 @@ public:
    return m_tuneable;
  }

+  virtual void SetParameter(const std::string& key, const std::string& value);

  // may have more factors than actually need, but not guaranteed.
  virtual void
@ -118,7 +119,6 @@ protected:
  std::vector<std::vector<std::string> > m_args;
  bool m_tuneable;

-  virtual void SetParameter(const std::string& key, const std::string& value);
  virtual void ReadParameters();
  void ParseLine(const std::string &line);
 };
--- a/contrib/moses2/FF/FeatureFunctions.cpp
+++ b/contrib/moses2/FF/FeatureFunctions.cpp
@ -103,8 +103,9 @@ void FeatureFunctions::Create()
        unkWP->SetParameter("suffix", m_system.options.unk.suffix);
      }
    }
-
  }
+
+  OverrideFeatures();
 }

 FeatureFunction *FeatureFunctions::Create(const std::string &line)
@ -150,6 +151,17 @@ const FeatureFunction *FeatureFunctions::FindFeatureFunction(
 	return NULL;
 }

+FeatureFunction *FeatureFunctions::FindFeatureFunction(
+    const std::string &name)
+{
+  BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
+    if (ff->GetName() == name) {
+    return const_cast<FeatureFunction *>(ff);
+    }
+  }
+  return NULL;
+}
+
 const PhraseTable *FeatureFunctions::GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd)
 {
  // assume only 1 unk wp
@ -243,5 +255,33 @@ void FeatureFunctions::ShowWeights(const Weights &allWeights)
  }
 }

+void FeatureFunctions::OverrideFeatures()
+{
+  const Parameter &parameter = m_system.params;
+
+  const PARAM_VEC *params = parameter.GetParam("feature-overwrite");
+  for (size_t i = 0; params && i < params->size(); ++i) {
+    const string &str = params->at(i);
+    vector<string> toks = Tokenize(str);
+    UTIL_THROW_IF2(toks.size() <= 1, "Incorrect format for feature override: " << str);
+
+    FeatureFunction *ff = FindFeatureFunction(toks[0]);
+    UTIL_THROW_IF2(ff == NULL, "Feature function not found: " << toks[0]);
+
+    for (size_t j = 1; j < toks.size(); ++j) {
+      const string &keyValStr = toks[j];
+      vector<string> keyVal = Tokenize(keyValStr, "=");
+      UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr);
+
+      cerr << "Override " << ff->GetName() << " "
+              << keyVal[0] << "=" << keyVal[1] << endl;
+
+      ff->SetParameter(keyVal[0], keyVal[1]);
+
+    }
+  }
+
+}
+
 }

--- a/contrib/moses2/FF/FeatureFunctions.h
+++ b/contrib/moses2/FF/FeatureFunctions.h
@ -95,10 +95,13 @@ protected:
  System &m_system;
  size_t m_ffStartInd;

+  FeatureRegistry m_registry;
+
  FeatureFunction *Create(const std::string &line);
  std::string GetDefaultName(const std::string &stub);
+  void OverrideFeatures();
+  FeatureFunction *FindFeatureFunction(const std::string &name);

-  FeatureRegistry m_registry;
 };

 }
--- a/contrib/moses2/legacy/Parameter.cpp
+++ b/contrib/moses2/legacy/Parameter.cpp
@ -94,8 +94,8 @@ Parameter::Parameter()
  AddParam(search_opts, "weight",
      "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");

-  //AddParam(search_opts, "feature-overwrite",
-  //    "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\"");
+  AddParam(search_opts, "feature-overwrite",
+      "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\"");

  po::options_description tune_opts("Options used in tuning.");
  AddParam(tune_opts, "weight-overwrite",
@ -373,6 +373,9 @@ Parameter::Parameter()
  ///////////////////////////////////////////////////////////////////////////////////////
  // DEPRECATED options
  po::options_description deprec_opts("Deprecated Options");
+  AddParam(deprec_opts, "text-type",
+	   "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
+
  /*
  AddParam(deprec_opts, "link-param-count",
      "DEPRECATED. DO NOT USE. Number of parameters on word links when using confusion networks or lattices (default = 1)");
@ -412,8 +415,6 @@ Parameter::Parameter()
      "DEPRECATED. DO NOT USE. weight for unknown word penalty");
  AddParam(deprec_opts, "weight-e", "e",
      "DEPRECATED. DO NOT USE. weight for word deletion");
-  AddParam(deprec_opts, "text-type",
-      "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
  AddParam(deprec_opts, "input-scores",
      "DEPRECATED. DO NOT USE. 2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)");
  AddParam(deprec_opts, "dlm-model",
--- a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
@ -17,7 +17,7 @@ using namespace std;

 namespace Moses
 {
-  InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), m_factorType(0)
+InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false)
 {
  ReadParameters();
 }
@ -26,7 +26,8 @@ InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM()
 {
 }
  
-void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {
+void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask)
+{

  // The context scope object for this translation task
  //     contains a map of translation task-specific data
@ -66,7 +67,8 @@ void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {

 }

-LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const {
+LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const
+{

  LanguageModelKen<lm::ngram::ProbingModel> *lm;
  lm = m_perThreadLM.get();
--- a/moses/LM/InMemoryPerSentenceOnDemandLM.h
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h
@ -102,7 +102,7 @@ public:
      UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::sync called prior to being initialized");
    }
  }
- 
+
  virtual void SetFFStateIdx(int state_idx) {
    if (isInitialized()) {
      GetPerThreadLM().SetFFStateIdx(state_idx);
@ -126,7 +126,7 @@ public:
      UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::ReportHistoryOrder called prior to being initialized");
    }
  }
-  
+
  virtual void EvaluateInIsolation(const Phrase &source
                                   , const TargetPhrase &targetPhrase
                                   , ScoreComponentCollection &scoreBreakdown
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@ -1196,7 +1196,7 @@ tune
 	default-name: tuning/moses.ini
 	tmp-name: tuning/tmp
 	final-model: yes
-	rerun-on-change: decoder-settings tuning-settings nbest lambda async
+	rerun-on-change: decoder decoder-settings tuning-settings nbest lambda async
 	not-error: trans: No such file or directory
 thot-tune
 	in: TRAINING:config input reference
--- a/scripts/ems/support/split-sentences.perl
+++ b/scripts/ems/support/split-sentences.perl
@ -29,10 +29,10 @@ while (@ARGV) {
 }

 if ($HELP) {
-    print "Usage ./split-sentences.perl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n";
-    print "-q: quiet mode\n";
-    print "-b: no output buffering (for use in bidirectional pipes)\n";
-    exit;
+	print "Usage ./split-sentences.perl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n";
+	print "-q: quiet mode\n";
+	print "-b: no output buffering (for use in bidirectional pipes)\n";
+	exit;
 }
 if (!$QUIET) {
 	print STDERR "Sentence Splitter v3\n";
@ -64,9 +64,9 @@ if (-e "$prefixfile") {
 	close(PREFIX);
 }

-##loop text, add lines together until we get a blank line or a <p>
+## Loop over text, add lines together until we get a blank line or a <p>
 my $text = "";
-while(<STDIN>) {
+while (<STDIN>) {
 	chop;
 	if (/^<.+>$/ || /^\s*$/) {
 		#time to process this block, we've hit a blank or <p>
@ -79,7 +79,7 @@ while(<STDIN>) {
 		$text .= $_. " ";
 	}
 }
-#do the leftover text
+# Do the leftover text.
 &do_it_for($text,"") if $text;


@ -91,28 +91,32 @@ sub do_it_for {
 }

 sub preprocess {
-	#this is one paragraph
+	# This is one paragraph.
 	my($text) = @_;

-	# clean up spaces at head and tail of each line as well as any double-spacing
+	# Clean up spaces at head and tail of each line, as well as
+	# any double-spacing.
 	$text =~ s/ +/ /g;
 	$text =~ s/\n /\n/g;
 	$text =~ s/ \n/\n/g;
 	$text =~ s/^ //g;
 	$text =~ s/ $//g;

-	#####add sentence breaks as needed#####
+	##### Add sentence breaks as needed #####

-	#non-period end of sentence markers (?!) followed by sentence starters.
+	# Non-period end of sentence markers (?!) followed by sentence starters.
 	$text =~ s/([?!]) +([\'\"\(\[\¿\¡\p{IsPi}]*[\p{IsUpper}])/$1\n$2/g;

-	#multi-dots followed by sentence starters
+	# Multi-dots followed by sentence starters.
 	$text =~ s/(\.[\.]+) +([\'\"\(\[\¿\¡\p{IsPi}]*[\p{IsUpper}])/$1\n$2/g;

-	# add breaks for sentences that end with some sort of punctuation inside a quote or parenthetical and are followed by a possible sentence starter punctuation and upper case
+	# Add breaks for sentences that end with some sort of punctuation
+	# inside a quote or parenthetical and are followed by a possible
+	# sentence starter punctuation and upper case.
 	$text =~ s/([?!\.][\ ]*[\'\"\)\]\p{IsPf}]+) +([\'\"\(\[\¿\¡\p{IsPi}]*[\ ]*[\p{IsUpper}])/$1\n$2/g;

-	# add breaks for sentences that end with some sort of punctuation are followed by a sentence starter punctuation and upper case
+	# Add breaks for sentences that end with some sort of punctuation,
+	# and are followed by a sentence starter punctuation and upper case.
 	$text =~ s/([?!\.]) +([\'\"\(\[\¿\¡\p{IsPi}]+[\ ]*[\p{IsUpper}])/$1\n$2/g;

 	# special punctuation cases are covered. Check all remaining periods.
@ -130,30 +134,27 @@ sub preprocess {
 			} elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) {
 				#not breaking - upper case acronym
 			} elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/) {
-				#the next word has a bunch of initial quotes, maybe a space, then either upper case or a number
+				# The next word has a bunch of initial quotes, maybe a
+				# space, then either upper case or a number
 				$words[$i] = $words[$i]."\n" unless ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 2 && !$starting_punct && ($words[$i+1] =~ /^[0-9]+/));
 				#we always add a return for these unless we have a numeric non-breaker and a number start
 			}
-
 		}
 		$text = $text.$words[$i]." ";
 	}

-	#we stopped one token from the end to allow for easy look-ahead. Append it now.
+	# We stopped one token from the end to allow for easy look-ahead. Append it now.
 	$text = $text.$words[$i];

-	# clean up spaces at head and tail of each line as well as any double-spacing
+	# Clean up spaces at head and tail of each line as well as any double-spacing
 	$text =~ s/ +/ /g;
 	$text =~ s/\n /\n/g;
 	$text =~ s/ \n/\n/g;
 	$text =~ s/^ //g;
 	$text =~ s/ $//g;

-	#add trailing break
+	# Add trailing break.
 	$text .= "\n" unless $text =~ /\n$/;

 	return $text;
-
 }
-
-
--- a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
@ -0,0 +1,698 @@
+# Anything in this file, followed by a period (and an upper-case word),
+# does NOT indicate an end-of-sentence marker.
+# Special cases are included for prefixes that ONLY appear before 0-9 numbers.
+
+# Any single upper case letter  followed by a period is not a sentence ender
+# (excluding I occasionally, but we leave it in)
+# usually upper case letters are initials in a name
+A
+Ā
+B
+C
+Č
+D
+E
+Ē
+F
+G
+Ģ
+H
+I
+Ī
+J
+K
+Ķ
+L
+Ļ
+M
+N
+Ņ
+O
+P
+Q
+R
+S
+Š
+T
+U
+Ū
+V
+W
+X
+Y
+Z
+Ž
+
+# Initialis -- Džonas
+Dz
+Dž
+Just
+
+# Day and month abbreviations
+# m. menesis d. diena  g. gimes
+m
+mėn
+d
+g
+gim
+# Pirmadienis Penktadienis
+Pr
+Pn
+Pirm
+Antr
+Treč
+Ketv
+Penkt
+Šešt
+Sekm
+Saus
+Vas
+Kov
+Bal
+Geg
+Birž
+Liep
+Rugpj
+Rugs
+Spal
+Lapkr
+Gruod
+
+# Business, governmental, geographical terms
+a
+# aikštė
+adv
+# advokatas
+akad
+# akademikas
+aklg
+# akligatvis
+akt
+# aktorius
+al
+# alėja
+A.V
+# antspaudo vieta
+aps
+apskr
+# apskritis
+apyg
+# apygarda
+aps
+apskr
+# apskritis
+asist
+# asistentas
+asmv
+avd
+# asmenvardis
+a.k
+asm
+asm.k
+# asmens kodas
+atsak
+# atsakingasis
+atsisk
+sąsk
+# atsiskaitomoji sąskaita
+aut
+# autorius
+b
+k
+b.k
+# banko kodas
+bkl
+# bakalauras
+bt
+# butas
+buv
+# buvęs, -usi
+dail
+# dailininkas
+dek
+# dekanas
+dėst
+# dėstytojas
+dir
+# direktorius
+dirig
+# dirigentas
+doc
+# docentas
+drp
+# durpynas
+dš
+# dešinysis
+egz
+# egzempliorius
+eil
+# eilutė
+ekon
+# ekonomika
+el
+# elektroninis
+etc
+ež
+# ežeras
+faks
+# faksas
+fak
+# fakultetas
+gen
+# generolas
+gyd
+# gydytojas
+gv
+# gyvenvietė
+įl
+# įlanka
+Įn
+# įnagininkas
+insp
+# inspektorius
+pan
+# ir panašiai
+t.t
+# ir taip toliau
+k.a
+# kaip antai
+kand
+# kandidatas
+kat
+# katedra
+kyš
+# kyšulys
+kl
+# klasė
+kln
+# kalnas
+kn
+# knyga
+koresp
+# korespondentas
+kpt
+# kapitonas
+kr
+# kairysis
+kt
+# kitas
+kun
+# kunigas
+l
+e
+p
+l.e.p
+# laikinai einantis pareigas
+ltn
+# leitenantas
+m
+mst
+# miestas
+m.e
+# mūsų eros
+m.m
+# mokslo metai
+mot
+# moteris
+mstl
+# miestelis
+mgr
+# magistras
+mgnt
+# magistrantas
+mjr
+# majoras
+mln
+# milijonas
+mlrd
+# milijardas
+mok
+# mokinys
+mokyt
+# mokytojas
+moksl
+# mokslinis
+nkt
+# nekaitomas
+ntk
+# neteiktinas
+Nr
+nr
+# numeris
+p
+# ponas
+p.d
+a.d
+# pašto dėžutė, abonentinė dėžutė
+p.m.e
+# prieš mūsų erą
+pan
+# ir panašiai
+pav
+# paveikslas
+pavad
+# pavaduotojas
+pirm
+# pirmininkas
+pl
+# plentas
+plg
+# palygink
+plk
+# pulkininkas; pelkė
+pr
+# prospektas
+Kr
+pr.Kr
+# prieš Kristų
+prok
+# prokuroras
+prot
+# protokolas
+pss
+# pusiasalis
+pšt
+# paštas
+pvz
+# pavyzdžiui
+r
+# rajonas
+red
+# redaktorius
+rš
+# raštų kalbos
+sąs
+# sąsiuvinis
+saviv
+sav
+# savivaldybė
+sekr
+# sekretorius
+sen
+# seniūnija, seniūnas
+sk
+# skaityk; skyrius
+skg
+# skersgatvis
+skyr
+sk
+# skyrius
+skv
+# skveras
+sp
+# spauda; spaustuvė
+spec
+# specialistas
+sr
+# sritis
+st
+# stotis
+str
+# straipsnis
+stud
+# studentas
+š
+š.m
+# šių metų
+šnek
+# šnekamosios
+tir
+# tiražas
+tūkst
+# tūkstantis
+up
+# upė
+upl
+# upelis
+vad
+# vadinamasis, -oji
+vlsč
+# valsčius
+ved
+# vedėjas
+vet
+# veterinarija
+virš
+# viršininkas, viršaitis
+vyr
+# vyriausiasis, -ioji; vyras
+vyresn
+# vyresnysis
+vlsč
+# valsčius
+vs
+# viensėdis
+Vt
+vt
+# vietininkas
+vtv
+vv
+# vietovardis
+žml
+# žemėlapis
+
+# Technical terms, abbreviations used in guidebooks, advertisments, etc.
+# Generally lower-case.
+air
+# airiškai
+amer
+# amerikanizmas
+anat
+# anatomija
+angl
+# angl. angliskai
+arab
+# arabų
+archeol
+archit
+asm
+# asmuo
+astr
+# astronomija
+austral
+# australiškai
+aut
+# automobilis
+av
+# aviacija
+bažn
+bdv
+# būdvardis
+bibl
+# Biblija
+biol
+# biologija
+bot
+# botanika
+brt
+# burtai, burtažodis.
+brus
+# baltarusių
+buh
+# buhalterija
+chem
+# chemija
+col
+# collectivum
+con
+conj
+# conjunctivus, jungtukas
+dab
+# dab. dabartine
+dgs
+# daugiskaita
+dial
+# dialektizmas
+dipl
+dktv
+# daiktavardis
+džn
+# dažnai
+ekon
+el
+# elektra
+esam
+# esamasis laikas
+euf
+# eufemizmas
+fam
+# familiariai
+farm
+# farmacija
+filol
+# filologija
+filos
+# filosofija
+fin
+# finansai
+fiz
+# fizika
+fiziol
+# fiziologija
+flk
+# folkloras
+fon
+# fonetika
+fot
+# fotografija
+geod
+# geodezija
+geogr
+geol
+# geologija
+geom
+# geometrija
+glžk
+gr
+# graikų
+gram
+her
+# heraldika
+hidr
+# hidrotechnika
+ind
+# Indų
+iron
+# ironiškai
+isp
+# ispanų
+ist
+istor
+# istorija
+it
+# italų
+įv
+reikšm
+įv.reikšm
+# įvairiomis reikšmėmis
+jap
+# japonų
+juok
+# juokaujamai
+jūr
+# jūrininkystė
+kalb
+# kalbotyra
+kar
+# karyba
+kas
+# kasyba
+kin
+# kinematografija
+klaus
+# klausiamasis
+knyg
+# knyginis
+kom
+# komercija
+komp
+# kompiuteris
+kosm
+# kosmonautika
+kt
+# kitas
+kul
+# kulinarija
+kuop
+# kuopine
+l
+# laikas
+lit
+# literatūrinis
+lingv
+# lingvistika
+log
+# logika
+lot
+# lotynų
+mat
+# matematika
+maž
+# mažybinis
+med
+# medicina
+medž
+# medžioklė
+men
+# menas
+menk
+# menkinamai
+metal
+# metalurgija
+meteor
+min
+# mineralogija
+mit
+# mitologija
+mok
+# mokyklinis
+ms
+# mįslė
+muz
+# muzikinis
+n
+# naujasis
+neig
+# neigiamasis
+neol
+# neologizmas
+niek
+# niekinamai
+ofic
+# oficialus
+opt
+# optika
+orig
+# original
+p
+# pietūs
+pan
+# panašiai
+parl
+# parlamentas
+pat
+# patarlė
+paž
+# pažodžiui
+plg
+# palygink
+poet
+# poetizmas
+poez
+#  poezija
+poligr
+# poligrafija
+polit
+# politika
+ppr
+# paprastai
+pranc
+pr
+# prancūzų, prūsų
+priet
+# prietaras
+prek
+# prekyba
+prk
+# perkeltine
+prs
+# persona, asmuo
+psn
+# pasenęs žodis
+psich
+# psichologija
+pvz
+# pavyzdžiui
+r
+# rytai
+rad
+# radiotechnika
+rel
+# religija
+ret
+# retai
+rus
+# rusų
+sen
+# senasis
+sl
+# slengas, slavų
+sov
+# sovietinis
+spec
+# specialus
+sport
+stat
+# statyba
+sudurt
+# sudurtinis
+sutr
+# sutrumpintas
+suv
+# suvalkiečių
+š
+# šiaurė
+šach
+# šachmatai
+šiaur
+škot
+# škotiškai
+šnek
+# šnekamoji
+teatr
+tech
+techn
+# technika
+teig
+# teigiamas
+teis
+# teisė
+tekst
+# tekstilė
+tel
+# telefonas
+teol
+# teologija
+v
+# tik vyriškosios, vakarai
+t.p
+t
+p
+# ir taip pat
+t.t
+# ir taip toliau
+t.y
+# tai yra
+vaik
+# vaikų
+vart
+# vartojama
+vet
+# veterinarija
+vid
+# vidurinis
+vksm
+# veiksmažodis
+vns
+# vienaskaita
+vok
+# vokiečių
+vulg
+# vulgariai
+zool
+# zoologija
+žr
+# žiūrėk
+ž.ū
+ž
+ū
+# žemės ūkis
+
+# List of titles. These are often followed by upper-case names, but do
+# not indicate sentence breaks
+#
+# Jo Eminencija
+Em.
+# Gerbiamasis
+Gerb
+gerb
+#  malonus
+malon
+# profesorius
+Prof
+prof
+# daktaras (mokslų)
+Dr
+dr
+habil
+med
+# inž inžinierius
+inž
+Inž
+
+
+#Numbers only. These should only induce breaks when followed by a numeric sequence
+# add NUMERIC_ONLY after the word for this function
+#This case is mostly for the english "No." which can either be a sentence of its own, or
+#if followed by a number, a non-breaking prefix
+No #NUMERIC_ONLY#
--- a/scripts/training/filter-model-given-input.pl
+++ b/scripts/training/filter-model-given-input.pl
@ -228,7 +228,7 @@ while ( my $line = <INI> ) {
                $phrase_table_impl = "PhraseDictionaryOnDisk";
                @toks = set_value( \@toks, "path", "$new_name.bin$table_flag" );
            }
-            elsif ( $binarizer =~ /CreateProbingPT2/ ) {
+            elsif ( $binarizer =~ /CreateProbingPT/ ) {
                $phrase_table_impl = "ProbingPT";
                @toks = set_value( \@toks, "path", "$new_name.probing$table_flag" );
            }
@ -488,7 +488,7 @@ for ( my $i = 0 ; $i <= $#TABLE ; $i++ ) {
                my $cmd = "$binarizer $mid_file $new_file.bin";
                safesystem($cmd) or die "Can't binarize";
            }
-            elsif ( $binarizer =~ /CreateProbingPT2/ ) {
+            elsif ( $binarizer =~ /CreateProbingPT/ ) {
                my $cmd = "$binarizer --input-pt $mid_file --output-dir $new_file.probing";
                if ($opt_hierarchical) {
 		    $cmd .= " --scfg";
@ -509,8 +509,8 @@ for ( my $i = 0 ; $i <= $#TABLE ; $i++ ) {
            if ( $binarizer =~ /CreateOnDiskPt/ ) {
                $lexbin =~ s/CreateOnDiskPt/processLexicalTable/;
            }
-            elsif ( $binarizer =~ /CreateProbingPT2/ ) {
-                $lexbin =~ s/CreateProbingPT2/processLexicalTableMin/;
+            elsif ( $binarizer =~ /CreateProbingPT/ ) {
+                $lexbin =~ s/CreateProbingPT/processLexicalTableMin/;
            }

            $lexbin =~ s/PhraseTable/LexicalTable/;