2015-05-17 16:04:04 +03:00
|
|
|
#!/usr/bin/env perl
|
2015-05-29 14:30:26 +03:00
|
|
|
#
|
|
|
|
# This file is part of moses. Its use is licensed under the GNU Lesser General
|
|
|
|
# Public License version 2.1 or, at your option, any later version.
|
2010-10-11 15:32:27 +04:00
|
|
|
|
|
|
|
# $Id: consolidate-training-data.perl 928 2009-09-02 02:58:01Z philipp $
|
|
|
|
|
2015-04-13 19:42:33 +03:00
|
|
|
use warnings;
|
2010-10-11 15:32:27 +04:00
|
|
|
use strict;
|
|
|
|
|
|
|
|
my ($in,$out,$consolidated,@PART) = @ARGV;
|
|
|
|
|
|
|
|
`rm $consolidated.$in` if -e "$consolidated.$in";
|
|
|
|
`rm $consolidated.$out` if -e "$consolidated.$out";
|
|
|
|
if (scalar @PART == 1) {
|
|
|
|
my $part = $PART[0];
|
|
|
|
`ln -s $part.$in $consolidated.$in`;
|
|
|
|
`ln -s $part.$out $consolidated.$out`;
|
|
|
|
exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach my $part (@PART) {
|
|
|
|
die("ERROR: no part $part.$in or $part.$out")
|
|
|
|
if (! -e "$part.$in" || ! -e "$part.$out");
|
|
|
|
my $in_size = `cat $part.$in | wc -l`;
|
|
|
|
my $out_size = `cat $part.$out | wc -l`;
|
|
|
|
die("number of lines don't match: '$part.$in' ($in_size) != '$part.$out' ($out_size)")
|
|
|
|
if $in_size != $out_size;
|
|
|
|
`cat $part.$in >> $consolidated.$in`;
|
|
|
|
`cat $part.$out >> $consolidated.$out`;
|
|
|
|
}
|