2012-03-23 11:21:17 +04:00
|
|
|
#!/usr/bin/perl -w
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
|
|
|
|
while(<STDIN>) {
|
|
|
|
chop;
|
|
|
|
|
|
|
|
# avoid general madness
|
2012-05-26 03:09:50 +04:00
|
|
|
s/[\000-\037]//g;
|
2012-03-23 11:21:17 +04:00
|
|
|
s/\s+/ /g;
|
|
|
|
s/^ //g;
|
|
|
|
s/ $//g;
|
|
|
|
|
|
|
|
# special characters in moses
|
2012-05-30 03:58:18 +04:00
|
|
|
s/\&/\&/g; # escape escape
|
2012-06-26 02:37:59 +04:00
|
|
|
s/\|/\|/g; # factor separator
|
2012-05-30 03:58:18 +04:00
|
|
|
s/\</\</g; # xml
|
|
|
|
s/\>/\>/g; # xml
|
|
|
|
s/\'/\'/g; # xml
|
|
|
|
s/\"/\"/g; # xml
|
|
|
|
s/\[/\[/g; # syntax non-terminal
|
|
|
|
s/\]/\]/g; # syntax non-terminal
|
2012-03-23 11:21:17 +04:00
|
|
|
|
2012-05-26 03:09:50 +04:00
|
|
|
# restore xml instructions
|
2012-06-27 01:49:59 +04:00
|
|
|
s/\<(\S+) translation="(.+?)"> (.+?) <\/(\S+)>/\<$1 translation=\"$2\"> $3 <\/$4>/g;
|
2012-03-23 11:21:17 +04:00
|
|
|
print $_."\n";
|
|
|
|
}
|