Merge

2025-01-06 19:49:41 +03:00 · 2017-01-05 15:53:04 -06:00 · 2017-01-05 15:53:04 -06:00 · 171edca393
commit 171edca393
parent 1f744ecd9b 138ebf1f1e
11 changed files with 784 additions and 40 deletions
--- a/contrib/moses2/FF/Distortion.cpp
+++ b/contrib/moses2/FF/Distortion.cpp
@ -188,4 +188,3 @@ void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,
 }
 }
--- a/contrib/moses2/FF/FeatureFunction.h
+++ b/contrib/moses2/FF/FeatureFunction.h
@ -80,6 +80,7 @@ public:
    return m_tuneable;
  }
  virtual void SetParameter(const std::string& key, const std::string& value);
  // may have more factors than actually need, but not guaranteed.
  virtual void
@ -118,7 +119,6 @@ protected:
  std::vector<std::vector<std::string> > m_args;
  bool m_tuneable;
  virtual void SetParameter(const std::string& key, const std::string& value);
  virtual void ReadParameters();
  void ParseLine(const std::string &line);
 };
--- a/contrib/moses2/FF/FeatureFunctions.cpp
+++ b/contrib/moses2/FF/FeatureFunctions.cpp
@ -103,8 +103,9 @@ void FeatureFunctions::Create()
        unkWP->SetParameter("suffix", m_system.options.unk.suffix);
      }
    }
  }
  OverrideFeatures();
 }
 FeatureFunction *FeatureFunctions::Create(const std::string &line)
@ -150,6 +151,17 @@ const FeatureFunction *FeatureFunctions::FindFeatureFunction(
 	return NULL;
 }
 FeatureFunction *FeatureFunctions::FindFeatureFunction(
    const std::string &name)
 {
  BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
    if (ff->GetName() == name) {
    return const_cast<FeatureFunction *>(ff);
    }
  }
  return NULL;
 }
 const PhraseTable *FeatureFunctions::GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd)
 {
  // assume only 1 unk wp
@ -243,5 +255,33 @@ void FeatureFunctions::ShowWeights(const Weights &allWeights)
  }
 }
 void FeatureFunctions::OverrideFeatures()
 {
  const Parameter &parameter = m_system.params;
  const PARAM_VEC *params = parameter.GetParam("feature-overwrite");
  for (size_t i = 0; params && i < params->size(); ++i) {
    const string &str = params->at(i);
    vector<string> toks = Tokenize(str);
    UTIL_THROW_IF2(toks.size() <= 1, "Incorrect format for feature override: " << str);
    FeatureFunction *ff = FindFeatureFunction(toks[0]);
    UTIL_THROW_IF2(ff == NULL, "Feature function not found: " << toks[0]);
    for (size_t j = 1; j < toks.size(); ++j) {
      const string &keyValStr = toks[j];
      vector<string> keyVal = Tokenize(keyValStr, "=");
      UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr);
      cerr << "Override " << ff->GetName() << " "
              << keyVal[0] << "=" << keyVal[1] << endl;
      ff->SetParameter(keyVal[0], keyVal[1]);
    }
  }
 }
 }
--- a/contrib/moses2/FF/FeatureFunctions.h
+++ b/contrib/moses2/FF/FeatureFunctions.h
@ -95,10 +95,13 @@ protected:
  System &m_system;
  size_t m_ffStartInd;
  FeatureRegistry m_registry;
  FeatureFunction *Create(const std::string &line);
  std::string GetDefaultName(const std::string &stub);
  void OverrideFeatures();
  FeatureFunction *FindFeatureFunction(const std::string &name);
  FeatureRegistry m_registry;
 };
 }
--- a/contrib/moses2/legacy/Parameter.cpp
+++ b/contrib/moses2/legacy/Parameter.cpp
@ -94,8 +94,8 @@ Parameter::Parameter()
  AddParam(search_opts, "weight",
      "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
-  //AddParam(search_opts, "feature-overwrite",
+  AddParam(search_opts, "feature-overwrite",
-  //    "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\"");
+      "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\"");
  po::options_description tune_opts("Options used in tuning.");
  AddParam(tune_opts, "weight-overwrite",
@ -373,6 +373,9 @@ Parameter::Parameter()
  ///////////////////////////////////////////////////////////////////////////////////////
  // DEPRECATED options
  po::options_description deprec_opts("Deprecated Options");
  AddParam(deprec_opts, "text-type",
 	   "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
  /*
  AddParam(deprec_opts, "link-param-count",
      "DEPRECATED. DO NOT USE. Number of parameters on word links when using confusion networks or lattices (default = 1)");
@ -412,8 +415,6 @@ Parameter::Parameter()
      "DEPRECATED. DO NOT USE. weight for unknown word penalty");
  AddParam(deprec_opts, "weight-e", "e",
      "DEPRECATED. DO NOT USE. weight for word deletion");
  AddParam(deprec_opts, "text-type",
      "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
  AddParam(deprec_opts, "input-scores",
      "DEPRECATED. DO NOT USE. 2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)");
  AddParam(deprec_opts, "dlm-model",
--- a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
@ -17,7 +17,7 @@ using namespace std;
 namespace Moses
 {
-  InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), m_factorType(0)
+InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false)
 {
  ReadParameters();
 }
@ -26,7 +26,8 @@ InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM()
 {
 }
-void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {
+void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask)
 {
  // The context scope object for this translation task
  //     contains a map of translation task-specific data
@ -66,7 +67,8 @@ void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {
 }
-LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const {
+LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const
 {
  LanguageModelKen<lm::ngram::ProbingModel> *lm;
  lm = m_perThreadLM.get();
--- a/moses/LM/InMemoryPerSentenceOnDemandLM.h
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h
@ -102,7 +102,7 @@ public:
      UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::sync called prior to being initialized");
    }
  }
- 
+
  virtual void SetFFStateIdx(int state_idx) {
    if (isInitialized()) {
      GetPerThreadLM().SetFFStateIdx(state_idx);
@ -126,7 +126,7 @@ public:
      UTIL_THROW(util::Exception, "WARNING: InMemoryPerSentenceOnDemand::ReportHistoryOrder called prior to being initialized");
    }
  }
-  
+
  virtual void EvaluateInIsolation(const Phrase &source
                                   , const TargetPhrase &targetPhrase
                                   , ScoreComponentCollection &scoreBreakdown
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@ -1196,7 +1196,7 @@ tune
 	default-name: tuning/moses.ini
 	tmp-name: tuning/tmp
 	final-model: yes
-	rerun-on-change: decoder-settings tuning-settings nbest lambda async
+	rerun-on-change: decoder decoder-settings tuning-settings nbest lambda async
 	not-error: trans: No such file or directory
 thot-tune
 	in: TRAINING:config input reference
--- a/scripts/ems/support/split-sentences.perl
+++ b/scripts/ems/support/split-sentences.perl
@ -29,10 +29,10 @@ while (@ARGV) {
 }
 if ($HELP) {
-    print "Usage ./split-sentences.perl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n";
+	print "Usage ./split-sentences.perl (-l [en|de|...]) [-q] [-b] < textfile > splitfile\n";
-    print "-q: quiet mode\n";
+	print "-q: quiet mode\n";
-    print "-b: no output buffering (for use in bidirectional pipes)\n";
+	print "-b: no output buffering (for use in bidirectional pipes)\n";
-    exit;
+	exit;
 }
 if (!$QUIET) {
 	print STDERR "Sentence Splitter v3\n";
@ -64,9 +64,9 @@ if (-e "$prefixfile") {
 	close(PREFIX);
 }
-##loop text, add lines together until we get a blank line or a <p>
+## Loop over text, add lines together until we get a blank line or a <p>
 my $text = "";
-while(<STDIN>) {
+while (<STDIN>) {
 	chop;
 	if (/^<.+>$/ || /^\s*$/) {
 		#time to process this block, we've hit a blank or <p>
@ -79,7 +79,7 @@ while(<STDIN>) {
 		$text .= $_. " ";
 	}
 }
-#do the leftover text
+# Do the leftover text.
 &do_it_for($text,"") if $text;
@ -91,28 +91,32 @@ sub do_it_for {
 }
 sub preprocess {
-	#this is one paragraph
+	# This is one paragraph.
 	my($text) = @_;
-	# clean up spaces at head and tail of each line as well as any double-spacing
+	# Clean up spaces at head and tail of each line, as well as
 	# any double-spacing.
 	$text =~ s/ +/ /g;
 	$text =~ s/\n /\n/g;
 	$text =~ s/ \n/\n/g;
 	$text =~ s/^ //g;
 	$text =~ s/ $//g;
-	#####add sentence breaks as needed#####
+	##### Add sentence breaks as needed #####
-	#non-period end of sentence markers (?!) followed by sentence starters.
+	# Non-period end of sentence markers (?!) followed by sentence starters.
 	$text =~ s/([?!]) +([\'\"\(\[\¿\¡\p{IsPi}]*[\p{IsUpper}])/$1\n$2/g;
-	#multi-dots followed by sentence starters
+	# Multi-dots followed by sentence starters.
 	$text =~ s/(\.[\.]+) +([\'\"\(\[\¿\¡\p{IsPi}]*[\p{IsUpper}])/$1\n$2/g;
-	# add breaks for sentences that end with some sort of punctuation inside a quote or parenthetical and are followed by a possible sentence starter punctuation and upper case
+	# Add breaks for sentences that end with some sort of punctuation
 	# inside a quote or parenthetical and are followed by a possible
 	# sentence starter punctuation and upper case.
 	$text =~ s/([?!\.][\ ]*[\'\"\)\]\p{IsPf}]+) +([\'\"\(\[\¿\¡\p{IsPi}]*[\ ]*[\p{IsUpper}])/$1\n$2/g;
-	# add breaks for sentences that end with some sort of punctuation are followed by a sentence starter punctuation and upper case
+	# Add breaks for sentences that end with some sort of punctuation,
 	# and are followed by a sentence starter punctuation and upper case.
 	$text =~ s/([?!\.]) +([\'\"\(\[\¿\¡\p{IsPi}]+[\ ]*[\p{IsUpper}])/$1\n$2/g;
 	# special punctuation cases are covered. Check all remaining periods.
@ -130,30 +134,27 @@ sub preprocess {
 			} elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) {
 				#not breaking - upper case acronym
 			} elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/) {
-				#the next word has a bunch of initial quotes, maybe a space, then either upper case or a number
+				# The next word has a bunch of initial quotes, maybe a
 				# space, then either upper case or a number
 				$words[$i] = $words[$i]."\n" unless ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 2 && !$starting_punct && ($words[$i+1] =~ /^[0-9]+/));
 				#we always add a return for these unless we have a numeric non-breaker and a number start
 			}
 		}
 		$text = $text.$words[$i]." ";
 	}
-	#we stopped one token from the end to allow for easy look-ahead. Append it now.
+	# We stopped one token from the end to allow for easy look-ahead. Append it now.
 	$text = $text.$words[$i];
-	# clean up spaces at head and tail of each line as well as any double-spacing
+	# Clean up spaces at head and tail of each line as well as any double-spacing
 	$text =~ s/ +/ /g;
 	$text =~ s/\n /\n/g;
 	$text =~ s/ \n/\n/g;
 	$text =~ s/^ //g;
 	$text =~ s/ $//g;
-	#add trailing break
+	# Add trailing break.
 	$text .= "\n" unless $text =~ /\n$/;
 	return $text;
 }
--- a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
@ -0,0 +1,698 @@
 # Anything in this file, followed by a period (and an upper-case word),
 # does NOT indicate an end-of-sentence marker.
 # Special cases are included for prefixes that ONLY appear before 0-9 numbers.
 # Any single upper case letter  followed by a period is not a sentence ender
 # (excluding I occasionally, but we leave it in)
 # usually upper case letters are initials in a name
 A
 Ā
 B
 C
 Č
 D
 E
 Ē
 F
 G
 Ģ
 H
 I
 Ī
 J
 K
 Ķ
 L
 Ļ
 M
 N
 Ņ
 O
 P
 Q
 R
 S
 Š
 T
 U
 Ū
 V
 W
 X
 Y
 Z
 Ž
 # Initialis -- Džonas
 Dz
 Dž
 Just
 # Day and month abbreviations
 # m. menesis d. diena  g. gimes
 m
 mėn
 d
 g
 gim
 # Pirmadienis Penktadienis
 Pr
 Pn
 Pirm
 Antr
 Treč
 Ketv
 Penkt
 Šešt
 Sekm
 Saus
 Vas
 Kov
 Bal
 Geg
 Birž
 Liep
 Rugpj
 Rugs
 Spal
 Lapkr
 Gruod
 # Business, governmental, geographical terms
 a
 # aikštė
 adv
 # advokatas
 akad
 # akademikas
 aklg
 # akligatvis
 akt
 # aktorius
 al
 # alėja
 A.V
 # antspaudo vieta
 aps
 apskr
 # apskritis
 apyg
 # apygarda
 aps
 apskr
 # apskritis
 asist
 # asistentas
 asmv
 avd
 # asmenvardis
 a.k
 asm
 asm.k
 # asmens kodas
 atsak
 # atsakingasis
 atsisk
 sąsk
 # atsiskaitomoji sąskaita
 aut
 # autorius
 b
 k
 b.k
 # banko kodas
 bkl
 # bakalauras
 bt
 # butas
 buv
 # buvęs, -usi
 dail
 # dailininkas
 dek
 # dekanas
 dėst
 # dėstytojas
 dir
 # direktorius
 dirig
 # dirigentas
 doc
 # docentas
 drp
 # durpynas
 dš
 # dešinysis
 egz
 # egzempliorius
 eil
 # eilutė
 ekon
 # ekonomika
 el
 # elektroninis
 etc
 ež
 # ežeras
 faks
 # faksas
 fak
 # fakultetas
 gen
 # generolas
 gyd
 # gydytojas
 gv
 # gyvenvietė
 įl
 # įlanka
 Įn
 # įnagininkas
 insp
 # inspektorius
 pan
 # ir panašiai
 t.t
 # ir taip toliau
 k.a
 # kaip antai
 kand
 # kandidatas
 kat
 # katedra
 kyš
 # kyšulys
 kl
 # klasė
 kln
 # kalnas
 kn
 # knyga
 koresp
 # korespondentas
 kpt
 # kapitonas
 kr
 # kairysis
 kt
 # kitas
 kun
 # kunigas
 l
 e
 p
 l.e.p
 # laikinai einantis pareigas
 ltn
 # leitenantas
 m
 mst
 # miestas
 m.e
 # mūsų eros
 m.m
 # mokslo metai
 mot
 # moteris
 mstl
 # miestelis
 mgr
 # magistras
 mgnt
 # magistrantas
 mjr
 # majoras
 mln
 # milijonas
 mlrd
 # milijardas
 mok
 # mokinys
 mokyt
 # mokytojas
 moksl
 # mokslinis
 nkt
 # nekaitomas
 ntk
 # neteiktinas
 Nr
 nr
 # numeris
 p
 # ponas
 p.d
 a.d
 # pašto dėžutė, abonentinė dėžutė
 p.m.e
 # prieš mūsų erą
 pan
 # ir panašiai
 pav
 # paveikslas
 pavad
 # pavaduotojas
 pirm
 # pirmininkas
 pl
 # plentas
 plg
 # palygink
 plk
 # pulkininkas; pelkė
 pr
 # prospektas
 Kr
 pr.Kr
 # prieš Kristų
 prok
 # prokuroras
 prot
 # protokolas
 pss
 # pusiasalis
 pšt
 # paštas
 pvz
 # pavyzdžiui
 r
 # rajonas
 red
 # redaktorius
 rš
 # raštų kalbos
 sąs
 # sąsiuvinis
 saviv
 sav
 # savivaldybė
 sekr
 # sekretorius
 sen
 # seniūnija, seniūnas
 sk
 # skaityk; skyrius
 skg
 # skersgatvis
 skyr
 sk
 # skyrius
 skv
 # skveras
 sp
 # spauda; spaustuvė
 spec
 # specialistas
 sr
 # sritis
 st
 # stotis
 str
 # straipsnis
 stud
 # studentas
 š
 š.m
 # šių metų
 šnek
 # šnekamosios
 tir
 # tiražas
 tūkst
 # tūkstantis
 up
 # upė
 upl
 # upelis
 vad
 # vadinamasis, -oji
 vlsč
 # valsčius
 ved
 # vedėjas
 vet
 # veterinarija
 virš
 # viršininkas, viršaitis
 vyr
 # vyriausiasis, -ioji; vyras
 vyresn
 # vyresnysis
 vlsč
 # valsčius
 vs
 # viensėdis
 Vt
 vt
 # vietininkas
 vtv
 vv
 # vietovardis
 žml
 # žemėlapis
 # Technical terms, abbreviations used in guidebooks, advertisments, etc.
 # Generally lower-case.
 air
 # airiškai
 amer
 # amerikanizmas
 anat
 # anatomija
 angl
 # angl. angliskai
 arab
 # arabų
 archeol
 archit
 asm
 # asmuo
 astr
 # astronomija
 austral
 # australiškai
 aut
 # automobilis
 av
 # aviacija
 bažn
 bdv
 # būdvardis
 bibl
 # Biblija
 biol
 # biologija
 bot
 # botanika
 brt
 # burtai, burtažodis.
 brus
 # baltarusių
 buh
 # buhalterija
 chem
 # chemija
 col
 # collectivum
 con
 conj
 # conjunctivus, jungtukas
 dab
 # dab. dabartine
 dgs
 # daugiskaita
 dial
 # dialektizmas
 dipl
 dktv
 # daiktavardis
 džn
 # dažnai
 ekon
 el
 # elektra
 esam
 # esamasis laikas
 euf
 # eufemizmas
 fam
 # familiariai
 farm
 # farmacija
 filol
 # filologija
 filos
 # filosofija
 fin
 # finansai
 fiz
 # fizika
 fiziol
 # fiziologija
 flk
 # folkloras
 fon
 # fonetika
 fot
 # fotografija
 geod
 # geodezija
 geogr
 geol
 # geologija
 geom
 # geometrija
 glžk
 gr
 # graikų
 gram
 her
 # heraldika
 hidr
 # hidrotechnika
 ind
 # Indų
 iron
 # ironiškai
 isp
 # ispanų
 ist
 istor
 # istorija
 it
 # italų
 įv
 reikšm
 įv.reikšm
 # įvairiomis reikšmėmis
 jap
 # japonų
 juok
 # juokaujamai
 jūr
 # jūrininkystė
 kalb
 # kalbotyra
 kar
 # karyba
 kas
 # kasyba
 kin
 # kinematografija
 klaus
 # klausiamasis
 knyg
 # knyginis
 kom
 # komercija
 komp
 # kompiuteris
 kosm
 # kosmonautika
 kt
 # kitas
 kul
 # kulinarija
 kuop
 # kuopine
 l
 # laikas
 lit
 # literatūrinis
 lingv
 # lingvistika
 log
 # logika
 lot
 # lotynų
 mat
 # matematika
 maž
 # mažybinis
 med
 # medicina
 medž
 # medžioklė
 men
 # menas
 menk
 # menkinamai
 metal
 # metalurgija
 meteor
 min
 # mineralogija
 mit
 # mitologija
 mok
 # mokyklinis
 ms
 # mįslė
 muz
 # muzikinis
 n
 # naujasis
 neig
 # neigiamasis
 neol
 # neologizmas
 niek
 # niekinamai
 ofic
 # oficialus
 opt
 # optika
 orig
 # original
 p
 # pietūs
 pan
 # panašiai
 parl
 # parlamentas
 pat
 # patarlė
 paž
 # pažodžiui
 plg
 # palygink
 poet
 # poetizmas
 poez
 #  poezija
 poligr
 # poligrafija
 polit
 # politika
 ppr
 # paprastai
 pranc
 pr
 # prancūzų, prūsų
 priet
 # prietaras
 prek
 # prekyba
 prk
 # perkeltine
 prs
 # persona, asmuo
 psn
 # pasenęs žodis
 psich
 # psichologija
 pvz
 # pavyzdžiui
 r
 # rytai
 rad
 # radiotechnika
 rel
 # religija
 ret
 # retai
 rus
 # rusų
 sen
 # senasis
 sl
 # slengas, slavų
 sov
 # sovietinis
 spec
 # specialus
 sport
 stat
 # statyba
 sudurt
 # sudurtinis
 sutr
 # sutrumpintas
 suv
 # suvalkiečių
 š
 # šiaurė
 šach
 # šachmatai
 šiaur
 škot
 # škotiškai
 šnek
 # šnekamoji
 teatr
 tech
 techn
 # technika
 teig
 # teigiamas
 teis
 # teisė
 tekst
 # tekstilė
 tel
 # telefonas
 teol
 # teologija
 v
 # tik vyriškosios, vakarai
 t.p
 t
 p
 # ir taip pat
 t.t
 # ir taip toliau
 t.y
 # tai yra
 vaik
 # vaikų
 vart
 # vartojama
 vet
 # veterinarija
 vid
 # vidurinis
 vksm
 # veiksmažodis
 vns
 # vienaskaita
 vok
 # vokiečių
 vulg
 # vulgariai
 zool
 # zoologija
 žr
 # žiūrėk
 ž.ū
 ž
 ū
 # žemės ūkis
 # List of titles. These are often followed by upper-case names, but do
 # not indicate sentence breaks
 #
 # Jo Eminencija
 Em.
 # Gerbiamasis
 Gerb
 gerb
 #  malonus
 malon
 # profesorius
 Prof
 prof
 # daktaras (mokslų)
 Dr
 dr
 habil
 med
 # inž inžinierius
 inž
 Inž
 #Numbers only. These should only induce breaks when followed by a numeric sequence
 # add NUMERIC_ONLY after the word for this function
 #This case is mostly for the english "No." which can either be a sentence of its own, or
 #if followed by a number, a non-breaking prefix
 No #NUMERIC_ONLY#
--- a/scripts/training/filter-model-given-input.pl
+++ b/scripts/training/filter-model-given-input.pl
@ -228,7 +228,7 @@ while ( my $line = <INI> ) {
                $phrase_table_impl = "PhraseDictionaryOnDisk";
                @toks = set_value( \@toks, "path", "$new_name.bin$table_flag" );
            }
-            elsif ( $binarizer =~ /CreateProbingPT2/ ) {
+            elsif ( $binarizer =~ /CreateProbingPT/ ) {
                $phrase_table_impl = "ProbingPT";
                @toks = set_value( \@toks, "path", "$new_name.probing$table_flag" );
            }
@ -488,7 +488,7 @@ for ( my $i = 0 ; $i <= $#TABLE ; $i++ ) {
                my $cmd = "$binarizer $mid_file $new_file.bin";
                safesystem($cmd) or die "Can't binarize";
            }
-            elsif ( $binarizer =~ /CreateProbingPT2/ ) {
+            elsif ( $binarizer =~ /CreateProbingPT/ ) {
                my $cmd = "$binarizer --input-pt $mid_file --output-dir $new_file.probing";
                if ($opt_hierarchical) {
 		    $cmd .= " --scfg";
@ -509,8 +509,8 @@ for ( my $i = 0 ; $i <= $#TABLE ; $i++ ) {
            if ( $binarizer =~ /CreateOnDiskPt/ ) {
                $lexbin =~ s/CreateOnDiskPt/processLexicalTable/;
            }
-            elsif ( $binarizer =~ /CreateProbingPT2/ ) {
+            elsif ( $binarizer =~ /CreateProbingPT/ ) {
-                $lexbin =~ s/CreateProbingPT2/processLexicalTableMin/;
+                $lexbin =~ s/CreateProbingPT/processLexicalTableMin/;
            }
            $lexbin =~ s/PhraseTable/LexicalTable/;
`@ -188,4 +188,3 @@ void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,`
	`}`	`}`

	`}`	`}`