mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-02 17:09:36 +03:00
add script for acquis cleaning
This commit is contained in:
parent
31c8946c8c
commit
7d96adb2a7
15
scripts/tokenizer/delete-long-words.perl
Executable file
15
scripts/tokenizer/delete-long-words.perl
Executable file
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/perl -w
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
while(<STDIN>) {
|
||||||
|
chop;
|
||||||
|
my $first = 1;
|
||||||
|
foreach (split) {
|
||||||
|
if (length($_)<200) {
|
||||||
|
print " " unless $first;
|
||||||
|
print $_;
|
||||||
|
$first = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print "\n";
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user