mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-30 23:42:30 +03:00
add script for acquis cleaning
This commit is contained in:
parent
31c8946c8c
commit
7d96adb2a7
15
scripts/tokenizer/delete-long-words.perl
Executable file
15
scripts/tokenizer/delete-long-words.perl
Executable file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
while(<STDIN>) {
|
||||
chop;
|
||||
my $first = 1;
|
||||
foreach (split) {
|
||||
if (length($_)<200) {
|
||||
print " " unless $first;
|
||||
print $_;
|
||||
$first = 0;
|
||||
}
|
||||
}
|
||||
print "\n";
|
||||
}
|
Loading…
Reference in New Issue
Block a user