From a3bd695cd4559416ce83194bf9ab6ea83dc8c961 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sun, 13 Jul 2014 02:54:58 +0100 Subject: [PATCH] factor for oov is 0, not - interferes with source input. Add extra argument to lowercase input words or not --- .../training/wrappers/make-factor-brown-cluster-mkcls.perl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl b/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl index 60f341de8..13aa7f912 100755 --- a/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl +++ b/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl @@ -2,7 +2,7 @@ use strict; -my ($cluster_file,$in,$out,$tmp) = @ARGV; +my ($lowercase, $cluster_file,$in,$out,$tmp) = @ARGV; my $CLUSTER = &read_cluster_from_mkcls($cluster_file); @@ -17,7 +17,10 @@ while() { s/ $//; my $first = 1; foreach my $word (split) { - my $cluster = defined($$CLUSTER{$word}) ? $$CLUSTER{$word} : ""; + if ($lowercase) { + $word = lc($word); + } + my $cluster = defined($$CLUSTER{$word}) ? $$CLUSTER{$word} : "0"; print OUT " " unless $first; print OUT $cluster; $first = 0;