mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 15:48:05 +03:00
more debugging of tm-mt scripts
This commit is contained in:
parent
13a83ae0c7
commit
41e990a814
@ -77,10 +77,10 @@
|
||||
1E42EFD715C00D6300E937EB /* Match.h */,
|
||||
1E42EFD315C00C0A00E937EB /* SentenceAlignment.h */,
|
||||
1E42EFD215C00BAE00E937EB /* Util.h */,
|
||||
1E42EFD115C00AC100E937EB /* fuzzy-match2.h */,
|
||||
1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */,
|
||||
1E806DD015BED3D4001914A2 /* SuffixArray.h */,
|
||||
1E806DCD15BED3BC001914A2 /* fuzzy-match.cpp */,
|
||||
1E42EFD115C00AC100E937EB /* fuzzy-match2.h */,
|
||||
1E42EFA515BEFABD00E937EB /* fuzzy-match2.cpp */,
|
||||
1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */,
|
||||
1E806DCB15BED3AC001914A2 /* Vocabulary.h */,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -3,74 +3,18 @@
|
||||
type = "1"
|
||||
version = "1.0">
|
||||
<FileBreakpoints>
|
||||
<FileBreakpoint
|
||||
shouldBeEnabled = "Yes"
|
||||
ignoreCount = "0"
|
||||
continueAfterRunningActions = "No"
|
||||
isPathRelative = "0"
|
||||
filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/tm-mt-integration/fuzzy-match.cpp"
|
||||
timestampString = "364836494.083835"
|
||||
startingColumnNumber = "9223372036854775807"
|
||||
endingColumnNumber = "9223372036854775807"
|
||||
startingLineNumber = "825"
|
||||
endingLineNumber = "825"
|
||||
landmarkName = "main(int argc, char* argv[])"
|
||||
landmarkType = "7">
|
||||
</FileBreakpoint>
|
||||
<FileBreakpoint
|
||||
shouldBeEnabled = "Yes"
|
||||
ignoreCount = "0"
|
||||
continueAfterRunningActions = "No"
|
||||
isPathRelative = "0"
|
||||
filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/tm-mt-integration/fuzzy-match.cpp"
|
||||
timestampString = "364836573.089496"
|
||||
startingColumnNumber = "9223372036854775807"
|
||||
endingColumnNumber = "9223372036854775807"
|
||||
startingLineNumber = "206"
|
||||
endingLineNumber = "206"
|
||||
landmarkName = "sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed )"
|
||||
landmarkType = "7">
|
||||
</FileBreakpoint>
|
||||
<FileBreakpoint
|
||||
shouldBeEnabled = "Yes"
|
||||
ignoreCount = "0"
|
||||
continueAfterRunningActions = "No"
|
||||
isPathRelative = "0"
|
||||
filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/tm-mt-integration/fuzzy-match2.cpp"
|
||||
timestampString = "364843192.030752"
|
||||
timestampString = "364924840.150553"
|
||||
startingColumnNumber = "9223372036854775807"
|
||||
endingColumnNumber = "9223372036854775807"
|
||||
startingLineNumber = "851"
|
||||
endingLineNumber = "851"
|
||||
landmarkName = "main(int argc, char* argv[])"
|
||||
landmarkType = "7">
|
||||
</FileBreakpoint>
|
||||
<FileBreakpoint
|
||||
shouldBeEnabled = "Yes"
|
||||
ignoreCount = "0"
|
||||
continueAfterRunningActions = "No"
|
||||
isPathRelative = "0"
|
||||
filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/tm-mt-integration/fuzzy-match2.cpp"
|
||||
timestampString = "364843261.346081"
|
||||
startingColumnNumber = "9223372036854775807"
|
||||
endingColumnNumber = "9223372036854775807"
|
||||
startingLineNumber = "938"
|
||||
endingLineNumber = "938"
|
||||
landmarkName = "main(int argc, char* argv[])"
|
||||
landmarkType = "7">
|
||||
</FileBreakpoint>
|
||||
<FileBreakpoint
|
||||
shouldBeEnabled = "Yes"
|
||||
ignoreCount = "0"
|
||||
continueAfterRunningActions = "No"
|
||||
isPathRelative = "0"
|
||||
filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/tm-mt-integration/fuzzy-match2.cpp"
|
||||
timestampString = "364843304.325754"
|
||||
startingColumnNumber = "9223372036854775807"
|
||||
endingColumnNumber = "9223372036854775807"
|
||||
startingLineNumber = "1035"
|
||||
endingLineNumber = "1035"
|
||||
landmarkName = "main(int argc, char* argv[])"
|
||||
startingLineNumber = "454"
|
||||
endingLineNumber = "454"
|
||||
landmarkName = "create_extract(const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string &path)"
|
||||
landmarkType = "7">
|
||||
</FileBreakpoint>
|
||||
</FileBreakpoints>
|
||||
|
@ -44,6 +44,12 @@
|
||||
ReferencedContainer = "container:fuzzy-match.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
<CommandLineArguments>
|
||||
<CommandLineArgument
|
||||
argument = "--multiple /Users/hieuhoang/workspace/experiment/data/tm-mt-integration//in/ac-test.input.tc.4 /Users/hieuhoang/workspace/experiment/data/tm-mt-integration//in/acquis.truecased.4.en.uniq"
|
||||
isEnabled = "YES">
|
||||
</CommandLineArgument>
|
||||
</CommandLineArguments>
|
||||
<AdditionalOptions>
|
||||
</AdditionalOptions>
|
||||
</LaunchAction>
|
||||
|
276
contrib/tm-mt-integration/create_xml.perl
Executable file
276
contrib/tm-mt-integration/create_xml.perl
Executable file
@ -0,0 +1,276 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
use File::Basename;
|
||||
|
||||
sub trim($);
|
||||
|
||||
my ($source, $input, $target, $align, $path);
|
||||
|
||||
while ($source = <STDIN>) {
|
||||
$input = <STDIN>;
|
||||
$target = <STDIN>;
|
||||
$align = <STDIN>;
|
||||
$path = <STDIN>;
|
||||
chomp($source);
|
||||
chomp($input);
|
||||
chomp($target);
|
||||
chomp($align);
|
||||
chomp($path);
|
||||
$source = trim($source);
|
||||
$input = trim($input);
|
||||
$target = trim($target);
|
||||
$align = trim($align);
|
||||
$path = trim($path);
|
||||
|
||||
my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) = &create_xml($source, $input, $target, $align, $path);
|
||||
|
||||
print STDOUT $frame."\n";
|
||||
print STDOUT "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n";
|
||||
print STDOUT "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n";
|
||||
print STDOUT "$i ||| $match_score ||| $target_count\n";
|
||||
|
||||
}
|
||||
|
||||
#######################################################
|
||||
sub create_xml {
|
||||
my ($source,$input,$target,$alignment,$path) = @_;
|
||||
|
||||
my @INPUT = split(/ /,$input);
|
||||
my @SOURCE = split(/ /,$source);
|
||||
my @TARGET = split(/ /,$target);
|
||||
my %ALIGN = &create_alignment($alignment);
|
||||
|
||||
my %FRAME_INPUT;
|
||||
my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
|
||||
foreach (@TARGET) { push @TARGET_BITMAP,1 }
|
||||
|
||||
### STEP 1: FIND MISMATCHES
|
||||
|
||||
my ($s,$i) = (0,0);
|
||||
my $currently_matching = 0;
|
||||
my ($start_s,$start_i) = (0,0);
|
||||
|
||||
$path .= "X"; # indicate end
|
||||
print STDERR "$input\n$source\n$target\n$path\n";
|
||||
for(my $p=0;$p<length($path);$p++) {
|
||||
my $action = substr($path,$p,1);
|
||||
|
||||
# beginning of a mismatch
|
||||
if ($currently_matching && $action ne "M" && $action ne "X") {
|
||||
$start_i = $i;
|
||||
$start_s = $s;
|
||||
$currently_matching = 0;
|
||||
}
|
||||
|
||||
# end of a mismatch
|
||||
elsif (!$currently_matching &&
|
||||
($action eq "M" || $action eq "X")) {
|
||||
|
||||
# remove use of affected target words
|
||||
for(my $ss = $start_s; $ss<$s; $ss++) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$TARGET_BITMAP[$tt] = 0;
|
||||
}
|
||||
|
||||
# also remove enclosed unaligned words?
|
||||
}
|
||||
|
||||
# are there input words that need to be inserted ?
|
||||
print STDERR "($start_i<$i)?\n";
|
||||
if ($start_i<$i) {
|
||||
|
||||
# take note of input words to be inserted
|
||||
my $insertion = "";
|
||||
for(my $ii = $start_i; $ii<$i; $ii++) {
|
||||
$insertion .= $INPUT[$ii]." ";
|
||||
}
|
||||
|
||||
# find position for inserted input words
|
||||
|
||||
# find first removed target word
|
||||
my $start_t = 1000;
|
||||
for(my $ss = $start_s; $ss<$s; $ss++) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$start_t = $tt if $tt < $start_t;
|
||||
}
|
||||
}
|
||||
|
||||
# end of sentence? add to end
|
||||
if ($start_t == 1000 && $i > $#INPUT) {
|
||||
$start_t = $#TARGET;
|
||||
}
|
||||
|
||||
# backtrack to previous words if unaligned
|
||||
if ($start_t == 1000) {
|
||||
$start_t = -1;
|
||||
for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$start_t = $tt if $tt > $start_t;
|
||||
}
|
||||
}
|
||||
}
|
||||
$FRAME_INPUT{$start_t} .= $insertion;
|
||||
my %NT = ("start_t" => $start_t,
|
||||
"start_i" => $start_i );
|
||||
push @NT,\%NT;
|
||||
}
|
||||
$currently_matching = 1;
|
||||
}
|
||||
|
||||
print STDERR "$action $s $i ($start_s $start_i) $currently_matching";
|
||||
if ($action ne "I") {
|
||||
print STDERR " ->";
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
|
||||
print STDERR " ".$tt;
|
||||
}
|
||||
}
|
||||
print STDERR "\n";
|
||||
$s++ unless $action eq "I";
|
||||
$i++ unless $action eq "D";
|
||||
$ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
|
||||
push @INPUT_BITMAP, 1 if $action eq "M";
|
||||
push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
|
||||
}
|
||||
|
||||
|
||||
print STDERR $target."\n";
|
||||
foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
|
||||
foreach (sort keys %FRAME_INPUT) {
|
||||
print STDERR "$_: $FRAME_INPUT{$_}\n";
|
||||
}
|
||||
|
||||
### STEP 2: BUILD RULE AND FRAME
|
||||
|
||||
# hierarchical rule
|
||||
my $rule_s = "";
|
||||
my $rule_pos_s = 0;
|
||||
my %RULE_ALIGNMENT_S;
|
||||
for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
|
||||
if ($INPUT_BITMAP[$i]) {
|
||||
$rule_s .= $INPUT[$i]." ";
|
||||
$RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
if ($i == $$NT{"start_i"}) {
|
||||
$rule_s .= "[X][X] ";
|
||||
$$NT{"rule_pos_s"} = $rule_pos_s++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $rule_t = "";
|
||||
my $rule_pos_t = 0;
|
||||
my %RULE_ALIGNMENT_T;
|
||||
for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
|
||||
if ($t>=0 && $TARGET_BITMAP[$t]) {
|
||||
$rule_t .= $TARGET[$t]." ";
|
||||
$RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
if ($t == $$NT{"start_t"}) {
|
||||
$rule_t .= "[X][X] ";
|
||||
$$NT{"rule_pos_t"} = $rule_pos_t++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $rule_alignment = "";
|
||||
foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
|
||||
foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
|
||||
next unless defined($RULE_ALIGNMENT_T{$t});
|
||||
$rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
|
||||
}
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
$rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
|
||||
}
|
||||
|
||||
chop($rule_s);
|
||||
chop($rule_t);
|
||||
chop($rule_alignment);
|
||||
|
||||
my $rule_alignment_inv = "";
|
||||
foreach (split(/ /,$rule_alignment)) {
|
||||
/^(\d+)\-(\d+)$/;
|
||||
$rule_alignment_inv .= "$2-$1 ";
|
||||
}
|
||||
chop($rule_alignment_inv);
|
||||
|
||||
# frame
|
||||
my $frame = "";
|
||||
$frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
|
||||
|
||||
my $currently_included = 0;
|
||||
my $start_t = -1;
|
||||
push @TARGET_BITMAP,0; # indicate end
|
||||
|
||||
for(my $t=0;$t<=scalar(@TARGET);$t++) {
|
||||
# beginning of tm target inclusion
|
||||
if (!$currently_included && $TARGET_BITMAP[$t]) {
|
||||
$start_t = $t;
|
||||
$currently_included = 1;
|
||||
}
|
||||
|
||||
# end of tm target inclusion (not included word or inserted input)
|
||||
elsif ($currently_included &&
|
||||
(!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
|
||||
# add xml (unless change is at the beginning of the sentence
|
||||
if ($start_t >= 0) {
|
||||
my $target = "";
|
||||
print STDERR "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
|
||||
for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
|
||||
$target .= $TARGET[$tt] . " ";
|
||||
}
|
||||
chop($target);
|
||||
$frame .= "<xml translation=\"$target\"> x </xml> ";
|
||||
}
|
||||
$currently_included = 0;
|
||||
}
|
||||
|
||||
$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
|
||||
print STDERR "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
|
||||
}
|
||||
|
||||
print STDERR $frame."\n-------------------------------------\n";
|
||||
return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
|
||||
}
|
||||
|
||||
sub create_alignment {
|
||||
my ($line) = @_;
|
||||
my (@ALIGNED_TO_S,@ALIGNED_TO_T);
|
||||
foreach my $point (split(/ /,$line)) {
|
||||
my ($s,$t) = split(/\-/,$point);
|
||||
$ALIGNED_TO_S[$s]{$t}++;
|
||||
$ALIGNED_TO_T[$t]{$s}++;
|
||||
}
|
||||
my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
|
||||
return %ALIGNMENT;
|
||||
}
|
||||
|
||||
# Perl trim function to remove whitespace from the start and end of the string
|
||||
sub trim($)
|
||||
{
|
||||
my $string = shift;
|
||||
$string =~ s/^\s+//;
|
||||
$string =~ s/\s+$//;
|
||||
return $string;
|
||||
}
|
||||
# Left trim function to remove leading whitespace
|
||||
sub ltrim($)
|
||||
{
|
||||
my $string = shift;
|
||||
$string =~ s/^\s+//;
|
||||
return $string;
|
||||
}
|
||||
# Right trim function to remove trailing whitespace
|
||||
sub rtrim($)
|
||||
{
|
||||
my $string = shift;
|
||||
$string =~ s/\s+$//;
|
||||
return $string;
|
||||
}
|
@ -7,6 +7,7 @@
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <time.h>
|
||||
#include <fstream>
|
||||
|
||||
#include "fuzzy-match2.h"
|
||||
|
||||
@ -348,7 +349,7 @@ int main(int argc, char* argv[])
|
||||
// create xml and extract files
|
||||
string inputStr, sourceStr;
|
||||
for (size_t pos = 0; pos < input_length; ++pos) {
|
||||
inputStr += input[i][pos] + " ";
|
||||
inputStr += vocabulary.GetWord(input[i][pos]) + " ";
|
||||
}
|
||||
|
||||
// do not try to find the best ... report multiple matches
|
||||
@ -363,6 +364,11 @@ int main(int argc, char* argv[])
|
||||
cout << letter_cost << "/" << input_letter_length << " ";
|
||||
cout << "(" << best_cost <<"/" << input_length <<") ";
|
||||
cout << "||| " << s << " ||| " << path << endl;
|
||||
|
||||
vector<WORD_ID> &sourceSentence = source[s];
|
||||
vector<SentenceAlignment> &targets = targetAndAlignment[s];
|
||||
create_extract(sourceSentence, targets, inputStr, path);
|
||||
|
||||
}
|
||||
} // if (multiple_flag)
|
||||
else {
|
||||
@ -410,23 +416,8 @@ int main(int argc, char* argv[])
|
||||
|
||||
// creat xml & extracts
|
||||
vector<WORD_ID> &sourceSentence = source[best_match];
|
||||
for (size_t pos = 0; pos < sourceSentence.size(); ++pos) {
|
||||
WORD_ID wordId = sourceSentence[pos];
|
||||
sourceStr += vocabulary.GetWord(wordId) + " ";
|
||||
}
|
||||
|
||||
vector<SentenceAlignment> &targets = targetAndAlignment[best_match];
|
||||
for (size_t targetInd = 0; targetInd < targets.size(); ++targetInd) {
|
||||
const SentenceAlignment &sentenceAlignment = targets[targetInd];
|
||||
string targetStr = sentenceAlignment.getTargetString();
|
||||
string alignStr = sentenceAlignment.getAlignmentString();
|
||||
|
||||
cerr << "create_xml " << endl
|
||||
<< sourceStr << endl
|
||||
<< inputStr << endl
|
||||
<< targetStr << endl
|
||||
<< alignStr << endl;
|
||||
}
|
||||
create_extract(sourceSentence, targets, inputStr, best_path);
|
||||
|
||||
} // else if (multiple_flag)
|
||||
|
||||
@ -435,3 +426,33 @@ int main(int argc, char* argv[])
|
||||
cerr << "total: " << (1000 * (clock()-start_main_clock) / CLOCKS_PER_SEC) << endl;
|
||||
|
||||
}
|
||||
|
||||
void create_extract(const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string &path)
|
||||
{
|
||||
string sourceStr;
|
||||
for (size_t pos = 0; pos < sourceSentence.size(); ++pos) {
|
||||
WORD_ID wordId = sourceSentence[pos];
|
||||
sourceStr += vocabulary.GetWord(wordId) + " ";
|
||||
}
|
||||
|
||||
char *inputFileName = tmpnam(NULL);
|
||||
ofstream inputFile(inputFileName);
|
||||
|
||||
for (size_t targetInd = 0; targetInd < targets.size(); ++targetInd) {
|
||||
const SentenceAlignment &sentenceAlignment = targets[targetInd];
|
||||
string targetStr = sentenceAlignment.getTargetString();
|
||||
string alignStr = sentenceAlignment.getAlignmentString();
|
||||
|
||||
inputFile
|
||||
<< sourceStr << endl
|
||||
<< inputStr << endl
|
||||
<< targetStr << endl
|
||||
<< alignStr << endl
|
||||
<< path << endl;
|
||||
}
|
||||
|
||||
string cmd = string("perl create_xml.perl ") + inputFileName;
|
||||
cerr << cmd << endl;
|
||||
inputFile.close();
|
||||
|
||||
}
|
||||
|
@ -32,7 +32,10 @@ int multiple_flag = false;
|
||||
int multiple_slack = 0;
|
||||
int multiple_max = 100;
|
||||
map< WORD_ID,vector< int > > single_word_index;
|
||||
// global cache for word pairs
|
||||
map< pair< WORD_ID, WORD_ID >, unsigned int > lsed;
|
||||
|
||||
void create_extract(const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string &path);
|
||||
|
||||
void load_corpus( const char* fileName, vector< vector< WORD_ID > > &corpus )
|
||||
{ // source
|
||||
@ -159,9 +162,6 @@ void load_alignment( const char* fileName, vector< vector< SentenceAlignment > >
|
||||
|
||||
/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
|
||||
|
||||
// global cache for word pairs
|
||||
map< pair< WORD_ID, WORD_ID >, unsigned int > lsed;
|
||||
|
||||
unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx )
|
||||
{
|
||||
// check if already computed -> lookup in cache
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/perl -w -d
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
@ -20,12 +20,12 @@ my $cmd;
|
||||
my $TMPDIR=dirname($pt_file) ."/tmp.$$";
|
||||
$cmd = "mkdir -p $TMPDIR";
|
||||
`$cmd`;
|
||||
$TMPDIR = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/out/tmp.3196";
|
||||
|
||||
my $match_file = "$TMPDIR/match";
|
||||
|
||||
# suffix array creation and extraction
|
||||
$cmd = "$RealBin/fuzzy-match --multiple $in_file $source_file > $match_file";
|
||||
print STDERR "$cmd \n";
|
||||
`$cmd`;
|
||||
|
||||
# make into xml and pt
|
||||
@ -47,7 +47,8 @@ while( my $match = <MATCH> ) {
|
||||
|
||||
$score =~ /^(\d+) (.+)/ || die;
|
||||
my ($i,$match_score) = ($1,$2);
|
||||
|
||||
print STDERR "i=$i\n";
|
||||
|
||||
# construct frame
|
||||
if ($sentence < 1e9 && $sentence >= 0) {
|
||||
my $SOURCE = $ALL_SOURCE[$sentence];
|
||||
@ -92,6 +93,8 @@ if ($OUTPUT_RULES)
|
||||
sub create_xml {
|
||||
my ($source,$input,$target,$alignment,$path) = @_;
|
||||
|
||||
print STDERR " HIEU \n $source \n $input \n $target \n $alignment \n $path \n";
|
||||
|
||||
my @INPUT = split(/ /,$input);
|
||||
my @SOURCE = split(/ /,$source);
|
||||
my @TARGET = split(/ /,$target);
|
||||
|
Loading…
Reference in New Issue
Block a user