mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
3c07c5df4d
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1307 1f5c12ca-751b-0410-a591-d2e778427230
21 lines
588 B
Perl
Executable File
21 lines
588 B
Perl
Executable File
#!/usr/bin/perl -w
|
|
|
|
# $Id$
|
|
#extract-factors.pl: extract only the desired factors from a factored corpus
|
|
#usage: extract-factors corpusfile factor-index factor-index ... > outfile
|
|
#factor indices start at 0
|
|
#factor indices too large ought to be ignored
|
|
|
|
use strict;
|
|
|
|
my ($filename, @factors) = @ARGV;
|
|
my %indices = map {$_ => 1} @factors;
|
|
|
|
open(INFILE, "<$filename") or die "couldn't open '$filename' for read: $!\n";
|
|
while(my $line = <INFILE>)
|
|
{
|
|
chop $line;
|
|
print join(' ', map {my $i = 0; join('|', grep($indices{$i++}, split(/\|/, $_)))} split(/\s+/, $line)) . "\n";
|
|
}
|
|
close(INFILE);
|