This commit is contained in:
Hieu Hoang 2012-10-19 18:38:46 +01:00
parent 7a19ac861c
commit ea48ab7845

View File

@ -1,4 +1,4 @@
#!/usr/bin/perl -w
#!/usr/bin/perl -w -d
binmode( STDIN, ":utf8" );
binmode( STDOUT, ":utf8" );
@ -9,6 +9,12 @@ use File::Basename;
sub trim($);
print STDERR "HELLO ";
for ( my $i = 0 ; $i < scalar @ARGV ; ++$i ) {
print STDERR $ARGV[$i] . " ";
}
print STDERR "GOODBYE \n";
############################################
# START
@ -46,11 +52,14 @@ while ($sentenceInd = <IN>) {
$path = trim($path);
$count = trim($count);
my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) = &create_xml($source, $input, $target, $align, $path);
my ( $frame, $rule_s, $rule_t, $rule_alignment, $rule_alignment_inv ) =
&create_xml( $source, $input, $target, $align, $path );
#print STDOUT $frame."\n";
print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $count\n";
print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $count\n";
print RULE_INV
"$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $count\n";
#print STDOUT "$sentenceInd ||| $score ||| $count\n";
}
@ -65,7 +74,8 @@ close(RULE_INV);
my $lex_file = "-";
my $cmd;
$cmd = "$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $inPath.extract -lexical-file $lex_file -score-options \"--NoLex\" -phrase-translation-table $inPath.pt";
$cmd =
"$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $inPath.extract -lexical-file $lex_file -score-options \"--NoLex\" -phrase-translation-table $inPath.pt";
print STDERR "Executing: $cmd \n";
`$cmd`;
@ -101,8 +111,9 @@ sub create_xml {
}
# end of a mismatch
elsif (!$currently_matching &&
($action eq "M" || $action eq "X")) {
elsif ( !$currently_matching
&& ( $action eq "M" || $action eq "X" ) )
{
# remove use of affected target words
for ( my $ss = $start_s ; $ss < $s ; $ss++ ) {
@ -148,8 +159,10 @@ sub create_xml {
}
}
$FRAME_INPUT{$start_t} .= $insertion;
my %NT = ("start_t" => $start_t,
"start_i" => $start_i );
my %NT = (
"start_t" => $start_t,
"start_i" => $start_i
);
push @NT, \%NT;
}
$currently_matching = 1;
@ -170,9 +183,9 @@ sub create_xml {
push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
}
print STDERR $target . "\n";
foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
foreach (@TARGET_BITMAP) { print STDERR $_; }
print STDERR "\n";
foreach ( sort keys %FRAME_INPUT ) {
print STDERR "$_: $FRAME_INPUT{$_}\n";
}
@ -216,7 +229,8 @@ sub create_xml {
foreach my $s ( sort { $a <=> $b } keys %RULE_ALIGNMENT_S ) {
foreach my $t ( keys %{ $ALIGN{"s"}[$s] } ) {
next unless defined( $RULE_ALIGNMENT_T{$t} );
$rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
$rule_alignment .=
$RULE_ALIGNMENT_S{$s} . "-" . $RULE_ALIGNMENT_T{$t} . " ";
}
}
foreach my $NT (@NT) {
@ -243,6 +257,7 @@ sub create_xml {
push @TARGET_BITMAP, 0; # indicate end
for ( my $t = 0 ; $t <= scalar(@TARGET) ; $t++ ) {
# beginning of tm target inclusion
if ( !$currently_included && $TARGET_BITMAP[$t] ) {
$start_t = $t;
@ -250,8 +265,9 @@ sub create_xml {
}
# end of tm target inclusion (not included word or inserted input)
elsif ($currently_included &&
(!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
elsif ( $currently_included
&& ( !$TARGET_BITMAP[$t] || defined( $FRAME_INPUT{$t} ) ) )
{
# add xml (unless change is at the beginning of the sentence
if ( $start_t >= 0 ) {
my $target = "";
@ -286,23 +302,22 @@ sub create_alignment {
}
# Perl trim function to remove whitespace from the start and end of the string
sub trim($)
{
sub trim($) {
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
# Left trim function to remove leading whitespace
sub ltrim($)
{
sub ltrim($) {
my $string = shift;
$string =~ s/^\s+//;
return $string;
}
# Right trim function to remove trailing whitespace
sub rtrim($)
{
sub rtrim($) {
my $string = shift;
$string =~ s/\s+$//;
return $string;