Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2015-04-14 11:30:33 +04:00
commit 044968bb4b
136 changed files with 231 additions and 14 deletions

View File

@ -7,8 +7,8 @@
<Project Name="lm" Path="lm/lm.project" Active="No"/>
<Project Name="OnDiskPt" Path="OnDiskPt/OnDiskPt.project" Active="No"/>
<Project Name="search" Path="search/search.project" Active="No"/>
<Project Name="moses" Path="moses/moses.project" Active="No"/>
<Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="Yes"/>
<Project Name="moses" Path="moses/moses.project" Active="Yes"/>
<Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="No"/>
<Project Name="score" Path="score/score.project" Active="No"/>
<Project Name="consolidate" Path="consolidate/consolidate.project" Active="No"/>
<BuildMatrix>

View File

@ -1,5 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="manual-label" InternalType="Console">
<Plugins>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
"enabled": false,
"buildDirectory": "build",
"sourceDirectory": "$(ProjectPath)",
"generator": "",
"buildType": "",
"arguments": [],
"parentProject": ""
}]]]>
</Plugin>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
</Plugins>
<Description/>
<Dependencies/>
<VirtualDirectory Name="manual-label">
@ -14,6 +31,8 @@
<File Name="Main.cpp"/>
<File Name="Main.h"/>
</VirtualDirectory>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
<Settings Type="Executable">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
@ -33,6 +52,8 @@
<Linker Options="" Required="yes">
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
<Library Value="boost_program_options"/>
<Library Value="boost_filesystem"/>
<Library Value="boost_system"/>
</Linker>
<ResourceCompiler Options="" Required="no"/>
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
@ -107,6 +128,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -474,8 +474,6 @@
<File Name="../../../moses/FF/DistortionScoreProducer.h"/>
<File Name="../../../moses/FF/DynamicCacheBasedLanguageModel.cpp"/>
<File Name="../../../moses/FF/DynamicCacheBasedLanguageModel.h"/>
<File Name="../../../moses/FF/ExternalFeature.cpp"/>
<File Name="../../../moses/FF/ExternalFeature.h"/>
<File Name="../../../moses/FF/Factory.cpp"/>
<File Name="../../../moses/FF/Factory.h"/>
<File Name="../../../moses/FF/FeatureFunction.cpp"/>

View File

@ -40,12 +40,12 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
const std::string &inFile,
const RuleTableFF &ff,
HyperTree &trie)
HyperTree &trie,
boost::unordered_set<std::size_t> &sourceTermSet)
{
PrintUserTime(std::string("Start loading HyperTree"));
// const StaticData &staticData = StaticData::Instance();
// const std::string &factorDelimiter = staticData.GetFactorDelimiter();
sourceTermSet.clear();
std::size_t count = 0;
@ -106,6 +106,7 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
// Source-side
HyperPath sourceFragment;
hyperPathLoader.Load(sourceString, sourceFragment);
ExtractSourceTerminalSetFromHyperPath(sourceFragment, sourceTermSet);
// Target-side
TargetPhrase *targetPhrase = new TargetPhrase(&ff);
@ -144,6 +145,23 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
return true;
}
void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath(
const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
{
for (std::vector<HyperPath::NodeSeq>::const_iterator p = hp.nodeSeqs.begin();
p != hp.nodeSeqs.end(); ++p) {
for (std::vector<std::size_t>::const_iterator q = p->begin();
q != p->end(); ++q) {
const std::size_t factorId = *q;
if (factorId >= moses_MaxNumNonterminals &&
factorId != HyperPath::kComma &&
factorId != HyperPath::kEpsilon) {
sourceTerminalSet.insert(factorId);
}
}
}
}
} // namespace F2S
} // namespace Syntax
} // namespace Moses

View File

@ -3,9 +3,12 @@
#include <istream>
#include <vector>
#include <boost/unordered_set.hpp>
#include "moses/TypeDef.h"
#include "moses/Syntax/RuleTableFF.h"
#include "HyperPath.h"
#include "HyperTree.h"
#include "HyperTreeCreator.h"
@ -23,7 +26,12 @@ public:
const std::vector<FactorType> &output,
const std::string &inFile,
const RuleTableFF &,
HyperTree &);
HyperTree &,
boost::unordered_set<std::size_t> &);
private:
void ExtractSourceTerminalSetFromHyperPath(
const HyperPath &, boost::unordered_set<std::size_t> &);
};
} // namespace F2S

View File

@ -38,6 +38,7 @@ Manager<RuleMatcher>::Manager(const InputType &source)
if (const ForestInput *p = dynamic_cast<const ForestInput*>(&source)) {
m_forest = p->GetForest();
m_rootVertex = p->GetRootVertex();
m_sentenceLength = p->GetSize();
} else if (const TreeInput *p = dynamic_cast<const TreeInput*>(&source)) {
T2S::InputTreeBuilder builder;
T2S::InputTree tmpTree;
@ -45,6 +46,7 @@ Manager<RuleMatcher>::Manager(const InputType &source)
boost::shared_ptr<Forest> forest = boost::make_shared<Forest>();
m_rootVertex = T2S::InputTreeToForest(tmpTree, *forest);
m_forest = forest;
m_sentenceLength = p->GetSize();
} else {
UTIL_THROW2("ERROR: F2S::Manager requires input to be a tree or forest");
}
@ -82,8 +84,13 @@ void Manager<RuleMatcher>::Decode()
p = sortedVertices.begin(); p != sortedVertices.end(); ++p) {
const Forest::Vertex &vertex = **p;
// Skip terminal vertices.
// Skip terminal vertices (after checking if they are OOVs).
if (vertex.incoming.empty()) {
if (vertex.pvertex.span.GetStartPos() > 0 &&
vertex.pvertex.span.GetEndPos() < m_sentenceLength-1 &&
IsUnknownSourceWord(vertex.pvertex.symbol)) {
m_oovs.insert(vertex.pvertex.symbol);
}
continue;
}
@ -189,6 +196,21 @@ void Manager<RuleMatcher>::InitializeStacks()
}
}
template<typename RuleMatcher>
bool Manager<RuleMatcher>::IsUnknownSourceWord(const Word &w) const
{
const std::size_t factorId = w[0]->GetId();
const std::vector<RuleTableFF*> &ffs = RuleTableFF::Instances();
for (std::size_t i = 0; i < ffs.size(); ++i) {
RuleTableFF *ff = ffs[i];
const boost::unordered_set<std::size_t> &sourceTerms =
ff->GetSourceTerminalSet();
if (sourceTerms.find(factorId) != sourceTerms.end()) {
return false;
}
}
return true;
}
template<typename RuleMatcher>
const SHyperedge *Manager<RuleMatcher>::GetBestSHyperedge() const

View File

@ -51,10 +51,13 @@ private:
void InitializeStacks();
bool IsUnknownSourceWord(const Word &) const;
void RecombineAndSort(const std::vector<SHyperedge*> &, SVertexStack &);
boost::shared_ptr<const Forest> m_forest;
const Forest::Vertex *m_rootVertex;
std::size_t m_sentenceLength; // Includes <s> and </s>
PVertexToStackMap m_stackMap;
boost::shared_ptr<HyperTree> m_glueRuleTrie;
std::vector<boost::shared_ptr<RuleMatcher> > m_mainRuleMatchers;

View File

@ -35,7 +35,8 @@ void RuleTableFF::Load()
staticData.GetSearchAlgorithm() == SyntaxT2S) {
F2S::HyperTree *trie = new F2S::HyperTree(this);
F2S::HyperTreeLoader loader;
loader.Load(m_input, m_output, m_filePath, *this, *trie);
loader.Load(m_input, m_output, m_filePath, *this, *trie,
m_sourceTerminalSet);
m_table = trie;
} else if (staticData.GetSearchAlgorithm() == SyntaxS2T) {
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();

View File

@ -43,10 +43,17 @@ public:
return 0;
}
// Get the source terminal vocabulary for this table's grammar (as a set of
// factor IDs)
const boost::unordered_set<std::size_t> &GetSourceTerminalSet() const {
return m_sourceTerminalSet;
}
private:
static std::vector<RuleTableFF*> s_instances;
const RuleTable *m_table;
boost::unordered_set<std::size_t> m_sourceTerminalSet;
};
} // Syntax

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";
use FindBin qw($RealBin);

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use Getopt::Std;
getopts('q');

View File

@ -1,5 +1,7 @@
#!/usr/bin/env perl
use strict;
use warnings;
use strict;
my $file = shift(@ARGV);
open(MYFILE, $file);

View File

@ -1,6 +1,7 @@
#!/usr/bin/env perl
#input hindi word urdu word, delete all those entries that have number on any side
use warnings;
use utf8;
use Getopt::Std;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use utf8;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use utf8;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use utf8;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use utf8;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use utf8;
require Encode;
use IO::Handle;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use utf8;
use strict;
use Getopt::Long "GetOptions";

View File

@ -14,6 +14,7 @@ use utf8;
# 23.01.2010: added NIST p-value and interval computation
###############################################
use warnings;
use strict;
#constants

View File

@ -4,6 +4,7 @@
#sentence-by-sentence: take in a system output, with any number of factors, and a reference translation, also maybe with factors, and show each sentence and its errors
#usage: sentence-by-sentence SYSOUT [REFERENCE]+ > sentences.html
use warnings;
use strict;
use Getopt::Long;

View File

@ -4,6 +4,7 @@
# Script to convert MOSES searchgraph to DOT format
#
use warnings;
use strict;
use File::Path;
use File::Basename;

View File

@ -5,7 +5,9 @@
#usage: show-phrases-used DECODER_OUTFILE > output.html
# where DECODER_OUTFILE is the output of moses with the -T (show alignments) option
use warnings;
use strict;
BEGIN
{
my $wd= `pawd 2>/dev/null`;

View File

@ -9,6 +9,7 @@
#similar function to filter-model-given-input.pl, but only operates
#on the phrase table and doesn't require that any subdirectories exist
use warnings;
use strict;
my $MAX_LENGTH = 10;

View File

@ -3,6 +3,7 @@
# Experiment Management System
# Documentation at http://www.statmt.org/moses/?n=FactoredTraining.EMS
use warnings;
use strict;
use Getopt::Long "GetOptions";
use FindBin qw($RealBin);

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my ($file,$step) = @ARGV;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
# Create domain file from corpora

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
# Build necessary files for sparse lexical features

View File

@ -2,6 +2,7 @@
# $Id: consolidate-training-data.perl 928 2009-09-02 02:58:01Z philipp $
use warnings;
use strict;
my ($in,$out,$consolidated,@PART) = @ARGV;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $cores = 8;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $jobs = 20;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
die("ERROR syntax: input-from-sgm.perl < in.sgm > in.txt")

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use IPC::Open3;
use File::Temp qw/tempdir/;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use FindBin qw($RealBin);

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
#

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my ($indomain_source,,$indomain_target,$outdomain_source,$outdomain_target,$lm_training,$lm_binarizer,$order,$lm_settings,$line_count,$model);

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my ($source_file,$target_file,$alignment_factors) = @ARGV;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
die("ERROR syntax: reference-from-sgm.perl ref src out")

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
$|++;

View File

@ -2,6 +2,7 @@
# $Id: report-experiment-scores.perl 407 2008-11-10 14:43:31Z philipp $
use warnings;
use strict;
my $email;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
die("ERROR: syntax: run-command-on-multiple-refsets.perl cmd in out")

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use File::Temp qw/ tempfile tempdir /;

View File

@ -6,6 +6,7 @@ binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
use warnings;
use FindBin qw($RealBin);
use strict;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Cwd;
use FindBin qw($RealBin);

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";
use FindBin qw($RealBin);

View File

@ -1,5 +1,7 @@
#!/usr/bin/env perl
use warnings;
# experiment.perl support script
# get filtered rule and reordering tables and place them into a configuration file

View File

@ -1,5 +1,7 @@
#!/usr/bin/env perl
use warnings;
# experiment.perl support script
# get filtered rule and reordering tables and place them into a configuration file

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
die("ERROR: syntax is fastalign2bal.perl direct-alignment inverse-alignment source-file target-file out-stem symmetrization-method symal\n") unless scalar(@ARGV) == 7;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my ($language,$src,$system) = @ARGV;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Date::Parse;

View File

@ -3,6 +3,7 @@
binmode( STDIN, ":utf8" );
binmode( STDOUT, ":utf8" );
use warnings;
use strict;
use FindBin qw($RealBin);
use File::Basename;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";

View File

@ -6,6 +6,7 @@
#factor indices start at 0
#factor indices too large ought to be ignored
use warnings;
use strict;
my ($filename, @factors) = @ARGV;

View File

@ -3,6 +3,7 @@
# example
# ./extract-parallel.perl 8 ./coreutils-8.9/src/split "./coreutils-8.9/src/sort --batch-size=253" ./extract ./corpus.5.en ./corpus.5.ar ./align.ar-en.grow-diag-final-and ./extracted 7 --NoFileLimit orientation --GZOutput
use warnings;
use strict;
use File::Basename;

View File

@ -5,6 +5,7 @@
# Some rudimentary sanity checks are done on the fly.
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
use warnings;
use strict;
my $errs = 0;

View File

@ -8,6 +8,7 @@
# Note that the output format may not contain any spaces.
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
use warnings;
use strict;
use Getopt::Long;

View File

@ -2,6 +2,7 @@
# A very simple script that converts fsal back to fsa format (openfst lattices)
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
use warnings;
use strict;
while (<>) {

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use utf8;

View File

@ -3,6 +3,7 @@
# example
# ~/giza-parallel.perl 10 split ~/workspace/sourceforge/trunk/scripts/training/train-model.perl ar en train align
use warnings;
use strict;
use File::Basename;

View File

@ -4,6 +4,8 @@
#lopar2pos: extract POSs from LOPAR output
#usage: lopar2pos.pl CORPUS.lopar > CORPUS.pos
use warnings;
my $infilename = shift @ARGV;
open(INFILE, "<$infilename") or die "couldn't open '$infilename' for read: $!\n";
while(my $line = <INFILE>)

View File

@ -15,6 +15,7 @@
# added checks for existence of decoder and configuration file
# 26 Jul 2006 fix a bug related to the use of absolute path for srcfile and nbestfile
use warnings;
use strict;
#######################

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use utf8;
use Encode;

View File

@ -1,6 +1,7 @@
#!/usr/bin/env perl
# $Id$
use warnings;
use strict;
my $lowercase = 0;

View File

@ -7,6 +7,7 @@ package ph_numbers;
#
# (c) 2013 TAUS
use warnings;
use strict;
run() unless caller();

View File

@ -1,6 +1,7 @@
#!/usr/bin/env perl
# $Id$
use warnings;
use strict;
#######################

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $line;

View File

@ -4,6 +4,7 @@
# ./score-parallel.perl 8 "gsort --batch-size=253" ./score ./extract.2.sorted.gz ./lex.2.f2e ./phrase-table.2.half.f2e --GoodTuring ./phrase-table.2.coc 0
# ./score-parallel.perl 8 "gsort --batch-size=253" ./score ./extract.2.inv.sorted.gz ./lex.2.e2f ./phrase-table.2.half.e2f --Inverse 1
use warnings;
use strict;
use File::Basename;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
while (my $line = <STDIN>) {

View File

@ -10,6 +10,7 @@
# irst-dir = /Users/hieu/workspace/irstlm/trunk/bin
# Set smoothing method in settings, if different from modified Kneser-Ney
use warnings;
use strict;
use FindBin qw($RealBin);
use Getopt::Long;

View File

@ -9,6 +9,7 @@
# It should point to the binary file
# lmplz = /home/waziz/workspace/github/moses/bin/lmplz
use warnings;
use strict;
use FindBin qw($RealBin);
use Getopt::Long qw/GetOptionsFromArray/;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use File::Basename;
use FindBin qw($RealBin);

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";

View File

@ -6,6 +6,7 @@
#
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
use warnings;
use strict;
use Getopt::Long;
use CGI;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";

View File

@ -1,6 +1,7 @@
#!/usr/bin/env perl
# $Id$
use warnings;
use strict;
use Getopt::Long "GetOptions";

View File

@ -1,6 +1,7 @@
#!/usr/bin/env perl
# $Id$
use warnings;
use strict;
use FindBin qw($Bin);
use Getopt::Long "GetOptions";

View File

@ -8,6 +8,7 @@
# --possiblyUseFirstToken : boolean option; the default behaviour (when this option is not provided) is that the first token of a sentence is ignored, on the basis that the first word of a sentence is always capitalized; if this option is provided then: a) if a sentence-initial token is *not* capitalized, then it is counted, and b) if a capitalized sentence-initial token is the only token of the segment, then it is counted, but with only 10% of the weight of a normal token.
#
use warnings;
use strict;
use Getopt::Long "GetOptions";

View File

@ -1,6 +1,8 @@
#!/usr/bin/env perl
# $Id: train-recaser.perl 1326 2007-03-26 05:44:27Z bojar $
use warnings;
use strict;
use Getopt::Long "GetOptions";

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my ($results, $truth) = @ARGV;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
use MosesScriptsRegressionTesting;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $argv=join(" ",@ARGV);

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my %opt = ();

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
use MosesScriptsRegressionTesting;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
use Getopt::Long;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
while(<STDIN>) {

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
while(<STDIN>) {

View File

@ -7,6 +7,8 @@
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
use warnings;
use strict;
use utf8; # tell perl this script file is in UTF-8 (see all funny punct below)
@ -36,7 +38,7 @@ if ($HELP) {
exit;
}
if ($language !~ /^(cs|en|fr|it)$/) {
if ($language !~ /^(cs|en|fr|it|fi)$/) {
print STDERR "Warning: No built-in rules for language $language.\n"
}
@ -176,6 +178,11 @@ sub detokenize {
}
} elsif (($language eq "fi") && ($words[$i-1] =~ /:$/) && ($words[$i] =~ /^(N|n|A|a|Ä|ä|ssa|Ssa|ssä|Ssä|sta|stä|Sta|Stä|hun|Hun|hyn|Hyn|han|Han|hän|Hän|hön|Hön|un|Un|yn|Yn|an|An|än|Än|ön|Ön|seen|Seen|lla|Lla|llä|Llä|lta|Lta|ltä|Ltä|lle|Lle|ksi|Ksi|kse|Kse|tta|Tta|ine|Ine)(ni|si|mme|nne|nsa)?(ko|kö|han|hän|pa|pä|kaan|kään|kin)?$/)) {
# Finnish : without intervening space if followed by case suffix
# EU:N EU:n EU:ssa EU:sta EU:hun EU:iin ...
$text=$text. lc $words[$i];
$prependSpace = " ";
} else {
$text=$text.$prependSpace.$words[$i];
$prependSpace = " ";

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
while(<STDIN>) {

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
binmode(STDIN, ":utf8");

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $language = "en";

View File

@ -4,6 +4,7 @@
# Start by Ulrich Germann, after noticing systematic preprocessing errors
# in some of the English Europarl data.
use warnings;
use strict;
use Getopt::Std;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use utf8;
binmode(STDIN, ":utf8");

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
#binmode(STDIN, ":utf8");

View File

@ -16,6 +16,7 @@ use warnings;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
use warnings;
use FindBin qw($RealBin);
use strict;
use Time::HiRes;

View File

@ -14,6 +14,7 @@
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
use warnings;
use FindBin qw($RealBin);
use strict;
use Time::HiRes;

View File

@ -6,6 +6,8 @@
#
# Ondrej Bojar.
use warnings;
my $ini = shift;
die "usage: absolutize_moses_model.pl path-to-moses.ini > moses.abs.ini"
if !defined $ini;

View File

@ -4,6 +4,7 @@
# Binarize a Moses model
#
use warnings;
use strict;
use Getopt::Long "GetOptions";

Some files were not shown because too many files have changed in this diff Show More