mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
Merge branch 'master' of github.com:moses-smt/mosesdecoder
This commit is contained in:
commit
044968bb4b
@ -7,8 +7,8 @@
|
||||
<Project Name="lm" Path="lm/lm.project" Active="No"/>
|
||||
<Project Name="OnDiskPt" Path="OnDiskPt/OnDiskPt.project" Active="No"/>
|
||||
<Project Name="search" Path="search/search.project" Active="No"/>
|
||||
<Project Name="moses" Path="moses/moses.project" Active="No"/>
|
||||
<Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="Yes"/>
|
||||
<Project Name="moses" Path="moses/moses.project" Active="Yes"/>
|
||||
<Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="No"/>
|
||||
<Project Name="score" Path="score/score.project" Active="No"/>
|
||||
<Project Name="consolidate" Path="consolidate/consolidate.project" Active="No"/>
|
||||
<BuildMatrix>
|
||||
|
@ -1,5 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="manual-label" InternalType="Console">
|
||||
<Plugins>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="manual-label">
|
||||
@ -14,6 +31,8 @@
|
||||
<File Name="Main.cpp"/>
|
||||
<File Name="Main.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Executable">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -33,6 +52,8 @@
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<Library Value="boost_program_options"/>
|
||||
<Library Value="boost_filesystem"/>
|
||||
<Library Value="boost_system"/>
|
||||
</Linker>
|
||||
<ResourceCompiler Options="" Required="no"/>
|
||||
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
|
||||
@ -107,6 +128,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -474,8 +474,6 @@
|
||||
<File Name="../../../moses/FF/DistortionScoreProducer.h"/>
|
||||
<File Name="../../../moses/FF/DynamicCacheBasedLanguageModel.cpp"/>
|
||||
<File Name="../../../moses/FF/DynamicCacheBasedLanguageModel.h"/>
|
||||
<File Name="../../../moses/FF/ExternalFeature.cpp"/>
|
||||
<File Name="../../../moses/FF/ExternalFeature.h"/>
|
||||
<File Name="../../../moses/FF/Factory.cpp"/>
|
||||
<File Name="../../../moses/FF/Factory.h"/>
|
||||
<File Name="../../../moses/FF/FeatureFunction.cpp"/>
|
||||
|
@ -40,12 +40,12 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
const std::string &inFile,
|
||||
const RuleTableFF &ff,
|
||||
HyperTree &trie)
|
||||
HyperTree &trie,
|
||||
boost::unordered_set<std::size_t> &sourceTermSet)
|
||||
{
|
||||
PrintUserTime(std::string("Start loading HyperTree"));
|
||||
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
// const std::string &factorDelimiter = staticData.GetFactorDelimiter();
|
||||
sourceTermSet.clear();
|
||||
|
||||
std::size_t count = 0;
|
||||
|
||||
@ -106,6 +106,7 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
|
||||
// Source-side
|
||||
HyperPath sourceFragment;
|
||||
hyperPathLoader.Load(sourceString, sourceFragment);
|
||||
ExtractSourceTerminalSetFromHyperPath(sourceFragment, sourceTermSet);
|
||||
|
||||
// Target-side
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(&ff);
|
||||
@ -144,6 +145,23 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
|
||||
return true;
|
||||
}
|
||||
|
||||
void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath(
|
||||
const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
|
||||
{
|
||||
for (std::vector<HyperPath::NodeSeq>::const_iterator p = hp.nodeSeqs.begin();
|
||||
p != hp.nodeSeqs.end(); ++p) {
|
||||
for (std::vector<std::size_t>::const_iterator q = p->begin();
|
||||
q != p->end(); ++q) {
|
||||
const std::size_t factorId = *q;
|
||||
if (factorId >= moses_MaxNumNonterminals &&
|
||||
factorId != HyperPath::kComma &&
|
||||
factorId != HyperPath::kEpsilon) {
|
||||
sourceTerminalSet.insert(factorId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace F2S
|
||||
} // namespace Syntax
|
||||
} // namespace Moses
|
||||
|
@ -3,9 +3,12 @@
|
||||
#include <istream>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/unordered_set.hpp>
|
||||
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/Syntax/RuleTableFF.h"
|
||||
|
||||
#include "HyperPath.h"
|
||||
#include "HyperTree.h"
|
||||
#include "HyperTreeCreator.h"
|
||||
|
||||
@ -23,7 +26,12 @@ public:
|
||||
const std::vector<FactorType> &output,
|
||||
const std::string &inFile,
|
||||
const RuleTableFF &,
|
||||
HyperTree &);
|
||||
HyperTree &,
|
||||
boost::unordered_set<std::size_t> &);
|
||||
|
||||
private:
|
||||
void ExtractSourceTerminalSetFromHyperPath(
|
||||
const HyperPath &, boost::unordered_set<std::size_t> &);
|
||||
};
|
||||
|
||||
} // namespace F2S
|
||||
|
@ -38,6 +38,7 @@ Manager<RuleMatcher>::Manager(const InputType &source)
|
||||
if (const ForestInput *p = dynamic_cast<const ForestInput*>(&source)) {
|
||||
m_forest = p->GetForest();
|
||||
m_rootVertex = p->GetRootVertex();
|
||||
m_sentenceLength = p->GetSize();
|
||||
} else if (const TreeInput *p = dynamic_cast<const TreeInput*>(&source)) {
|
||||
T2S::InputTreeBuilder builder;
|
||||
T2S::InputTree tmpTree;
|
||||
@ -45,6 +46,7 @@ Manager<RuleMatcher>::Manager(const InputType &source)
|
||||
boost::shared_ptr<Forest> forest = boost::make_shared<Forest>();
|
||||
m_rootVertex = T2S::InputTreeToForest(tmpTree, *forest);
|
||||
m_forest = forest;
|
||||
m_sentenceLength = p->GetSize();
|
||||
} else {
|
||||
UTIL_THROW2("ERROR: F2S::Manager requires input to be a tree or forest");
|
||||
}
|
||||
@ -82,8 +84,13 @@ void Manager<RuleMatcher>::Decode()
|
||||
p = sortedVertices.begin(); p != sortedVertices.end(); ++p) {
|
||||
const Forest::Vertex &vertex = **p;
|
||||
|
||||
// Skip terminal vertices.
|
||||
// Skip terminal vertices (after checking if they are OOVs).
|
||||
if (vertex.incoming.empty()) {
|
||||
if (vertex.pvertex.span.GetStartPos() > 0 &&
|
||||
vertex.pvertex.span.GetEndPos() < m_sentenceLength-1 &&
|
||||
IsUnknownSourceWord(vertex.pvertex.symbol)) {
|
||||
m_oovs.insert(vertex.pvertex.symbol);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -189,6 +196,21 @@ void Manager<RuleMatcher>::InitializeStacks()
|
||||
}
|
||||
}
|
||||
|
||||
template<typename RuleMatcher>
|
||||
bool Manager<RuleMatcher>::IsUnknownSourceWord(const Word &w) const
|
||||
{
|
||||
const std::size_t factorId = w[0]->GetId();
|
||||
const std::vector<RuleTableFF*> &ffs = RuleTableFF::Instances();
|
||||
for (std::size_t i = 0; i < ffs.size(); ++i) {
|
||||
RuleTableFF *ff = ffs[i];
|
||||
const boost::unordered_set<std::size_t> &sourceTerms =
|
||||
ff->GetSourceTerminalSet();
|
||||
if (sourceTerms.find(factorId) != sourceTerms.end()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename RuleMatcher>
|
||||
const SHyperedge *Manager<RuleMatcher>::GetBestSHyperedge() const
|
||||
|
@ -51,10 +51,13 @@ private:
|
||||
|
||||
void InitializeStacks();
|
||||
|
||||
bool IsUnknownSourceWord(const Word &) const;
|
||||
|
||||
void RecombineAndSort(const std::vector<SHyperedge*> &, SVertexStack &);
|
||||
|
||||
boost::shared_ptr<const Forest> m_forest;
|
||||
const Forest::Vertex *m_rootVertex;
|
||||
std::size_t m_sentenceLength; // Includes <s> and </s>
|
||||
PVertexToStackMap m_stackMap;
|
||||
boost::shared_ptr<HyperTree> m_glueRuleTrie;
|
||||
std::vector<boost::shared_ptr<RuleMatcher> > m_mainRuleMatchers;
|
||||
|
@ -35,7 +35,8 @@ void RuleTableFF::Load()
|
||||
staticData.GetSearchAlgorithm() == SyntaxT2S) {
|
||||
F2S::HyperTree *trie = new F2S::HyperTree(this);
|
||||
F2S::HyperTreeLoader loader;
|
||||
loader.Load(m_input, m_output, m_filePath, *this, *trie);
|
||||
loader.Load(m_input, m_output, m_filePath, *this, *trie,
|
||||
m_sourceTerminalSet);
|
||||
m_table = trie;
|
||||
} else if (staticData.GetSearchAlgorithm() == SyntaxS2T) {
|
||||
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
|
||||
|
@ -43,10 +43,17 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Get the source terminal vocabulary for this table's grammar (as a set of
|
||||
// factor IDs)
|
||||
const boost::unordered_set<std::size_t> &GetSourceTerminalSet() const {
|
||||
return m_sourceTerminalSet;
|
||||
}
|
||||
|
||||
private:
|
||||
static std::vector<RuleTableFF*> s_instances;
|
||||
|
||||
const RuleTable *m_table;
|
||||
boost::unordered_set<std::size_t> m_sourceTerminalSet;
|
||||
};
|
||||
|
||||
} // Syntax
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
use FindBin qw($RealBin);
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use Getopt::Std;
|
||||
getopts('q');
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env perl
|
||||
use strict;
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $file = shift(@ARGV);
|
||||
open(MYFILE, $file);
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
#input hindi word urdu word, delete all those entries that have number on any side
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Getopt::Std;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
use utf8;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
use utf8;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
use utf8;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
use utf8;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use utf8;
|
||||
require Encode;
|
||||
use IO::Handle;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use utf8;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
@ -14,6 +14,7 @@ use utf8;
|
||||
# 23.01.2010: added NIST p-value and interval computation
|
||||
###############################################
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
#constants
|
||||
|
@ -4,6 +4,7 @@
|
||||
#sentence-by-sentence: take in a system output, with any number of factors, and a reference translation, also maybe with factors, and show each sentence and its errors
|
||||
#usage: sentence-by-sentence SYSOUT [REFERENCE]+ > sentences.html
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
# Script to convert MOSES searchgraph to DOT format
|
||||
#
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use File::Path;
|
||||
use File::Basename;
|
||||
|
@ -5,7 +5,9 @@
|
||||
#usage: show-phrases-used DECODER_OUTFILE > output.html
|
||||
# where DECODER_OUTFILE is the output of moses with the -T (show alignments) option
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
BEGIN
|
||||
{
|
||||
my $wd= `pawd 2>/dev/null`;
|
||||
|
@ -9,6 +9,7 @@
|
||||
#similar function to filter-model-given-input.pl, but only operates
|
||||
#on the phrase table and doesn't require that any subdirectories exist
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $MAX_LENGTH = 10;
|
||||
|
@ -3,6 +3,7 @@
|
||||
# Experiment Management System
|
||||
# Documentation at http://www.statmt.org/moses/?n=FactoredTraining.EMS
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
use FindBin qw($RealBin);
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my ($file,$step) = @ARGV;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
# Create domain file from corpora
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
# Build necessary files for sparse lexical features
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
# $Id: consolidate-training-data.perl 928 2009-09-02 02:58:01Z philipp $
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my ($in,$out,$consolidated,@PART) = @ARGV;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $cores = 8;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $jobs = 20;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
die("ERROR syntax: input-from-sgm.perl < in.sgm > in.txt")
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use IPC::Open3;
|
||||
use File::Temp qw/tempdir/;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
#
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my ($indomain_source,,$indomain_target,$outdomain_source,$outdomain_target,$lm_training,$lm_binarizer,$order,$lm_settings,$line_count,$model);
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my ($source_file,$target_file,$alignment_factors) = @ARGV;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
die("ERROR syntax: reference-from-sgm.perl ref src out")
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
$|++;
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
# $Id: report-experiment-scores.perl 407 2008-11-10 14:43:31Z philipp $
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $email;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
die("ERROR: syntax: run-command-on-multiple-refsets.perl cmd in out")
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use File::Temp qw/ tempfile tempdir /;
|
||||
|
||||
|
@ -6,6 +6,7 @@ binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
binmode(STDERR, ":utf8");
|
||||
|
||||
use warnings;
|
||||
use FindBin qw($RealBin);
|
||||
use strict;
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Cwd;
|
||||
use FindBin qw($RealBin);
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
use FindBin qw($RealBin);
|
||||
|
@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
|
||||
# experiment.perl support script
|
||||
# get filtered rule and reordering tables and place them into a configuration file
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
|
||||
# experiment.perl support script
|
||||
# get filtered rule and reordering tables and place them into a configuration file
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
die("ERROR: syntax is fastalign2bal.perl direct-alignment inverse-alignment source-file target-file out-stem symmetrization-method symal\n") unless scalar(@ARGV) == 7;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my ($language,$src,$system) = @ARGV;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Date::Parse;
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
binmode( STDIN, ":utf8" );
|
||||
binmode( STDOUT, ":utf8" );
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
use File::Basename;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#factor indices start at 0
|
||||
#factor indices too large ought to be ignored
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my ($filename, @factors) = @ARGV;
|
||||
|
@ -3,6 +3,7 @@
|
||||
# example
|
||||
# ./extract-parallel.perl 8 ./coreutils-8.9/src/split "./coreutils-8.9/src/sort --batch-size=253" ./extract ./corpus.5.en ./corpus.5.ar ./align.ar-en.grow-diag-final-and ./extracted 7 --NoFileLimit orientation --GZOutput
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use File::Basename;
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
# Some rudimentary sanity checks are done on the fly.
|
||||
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $errs = 0;
|
||||
|
@ -8,6 +8,7 @@
|
||||
# Note that the output format may not contain any spaces.
|
||||
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
# A very simple script that converts fsal back to fsa format (openfst lattices)
|
||||
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
while (<>) {
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use utf8;
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
# example
|
||||
# ~/giza-parallel.perl 10 split ~/workspace/sourceforge/trunk/scripts/training/train-model.perl ar en train align
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use File::Basename;
|
||||
|
||||
|
@ -4,6 +4,8 @@
|
||||
#lopar2pos: extract POSs from LOPAR output
|
||||
#usage: lopar2pos.pl CORPUS.lopar > CORPUS.pos
|
||||
|
||||
use warnings;
|
||||
|
||||
my $infilename = shift @ARGV;
|
||||
open(INFILE, "<$infilename") or die "couldn't open '$infilename' for read: $!\n";
|
||||
while(my $line = <INFILE>)
|
||||
|
@ -15,6 +15,7 @@
|
||||
# added checks for existence of decoder and configuration file
|
||||
# 26 Jul 2006 fix a bug related to the use of absolute path for srcfile and nbestfile
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
#######################
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use utf8;
|
||||
use Encode;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
# $Id$
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $lowercase = 0;
|
||||
|
@ -7,6 +7,7 @@ package ph_numbers;
|
||||
#
|
||||
# (c) 2013 TAUS
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
run() unless caller();
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
# $Id$
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
#######################
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $line;
|
||||
|
@ -4,6 +4,7 @@
|
||||
# ./score-parallel.perl 8 "gsort --batch-size=253" ./score ./extract.2.sorted.gz ./lex.2.f2e ./phrase-table.2.half.f2e --GoodTuring ./phrase-table.2.coc 0
|
||||
# ./score-parallel.perl 8 "gsort --batch-size=253" ./score ./extract.2.inv.sorted.gz ./lex.2.e2f ./phrase-table.2.half.e2f --Inverse 1
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use File::Basename;
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
while (my $line = <STDIN>) {
|
||||
|
@ -10,6 +10,7 @@
|
||||
# irst-dir = /Users/hieu/workspace/irstlm/trunk/bin
|
||||
# Set smoothing method in settings, if different from modified Kneser-Ney
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
use Getopt::Long;
|
||||
|
@ -9,6 +9,7 @@
|
||||
# It should point to the binary file
|
||||
# lmplz = /home/waziz/workspace/github/moses/bin/lmplz
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
use Getopt::Long qw/GetOptionsFromArray/;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use File::Basename;
|
||||
use FindBin qw($RealBin);
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#
|
||||
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
use CGI;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
# $Id$
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
# $Id$
|
||||
use warnings;
|
||||
use strict;
|
||||
use FindBin qw($Bin);
|
||||
use Getopt::Long "GetOptions";
|
||||
|
@ -8,6 +8,7 @@
|
||||
# --possiblyUseFirstToken : boolean option; the default behaviour (when this option is not provided) is that the first token of a sentence is ignored, on the basis that the first word of a sentence is always capitalized; if this option is provided then: a) if a sentence-initial token is *not* capitalized, then it is counted, and b) if a capitalized sentence-initial token is the only token of the segment, then it is counted, but with only 10% of the weight of a normal token.
|
||||
#
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
# $Id: train-recaser.perl 1326 2007-03-26 05:44:27Z bojar $
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long "GetOptions";
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
my ($results, $truth) = @ARGV;
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
|
||||
use MosesScriptsRegressionTesting;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $argv=join(" ",@ARGV);
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my %opt = ();
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
|
||||
use MosesScriptsRegressionTesting;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
|
||||
use Getopt::Long;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
while(<STDIN>) {
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
while(<STDIN>) {
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use utf8; # tell perl this script file is in UTF-8 (see all funny punct below)
|
||||
|
||||
@ -36,7 +38,7 @@ if ($HELP) {
|
||||
exit;
|
||||
}
|
||||
|
||||
if ($language !~ /^(cs|en|fr|it)$/) {
|
||||
if ($language !~ /^(cs|en|fr|it|fi)$/) {
|
||||
print STDERR "Warning: No built-in rules for language $language.\n"
|
||||
}
|
||||
|
||||
@ -176,6 +178,11 @@ sub detokenize {
|
||||
|
||||
}
|
||||
|
||||
} elsif (($language eq "fi") && ($words[$i-1] =~ /:$/) && ($words[$i] =~ /^(N|n|A|a|Ä|ä|ssa|Ssa|ssä|Ssä|sta|stä|Sta|Stä|hun|Hun|hyn|Hyn|han|Han|hän|Hän|hön|Hön|un|Un|yn|Yn|an|An|än|Än|ön|Ön|seen|Seen|lla|Lla|llä|Llä|lta|Lta|ltä|Ltä|lle|Lle|ksi|Ksi|kse|Kse|tta|Tta|ine|Ine)(ni|si|mme|nne|nsa)?(ko|kö|han|hän|pa|pä|kaan|kään|kin)?$/)) {
|
||||
# Finnish : without intervening space if followed by case suffix
|
||||
# EU:N EU:n EU:ssa EU:sta EU:hun EU:iin ...
|
||||
$text=$text. lc $words[$i];
|
||||
$prependSpace = " ";
|
||||
} else {
|
||||
$text=$text.$prependSpace.$words[$i];
|
||||
$prependSpace = " ";
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
while(<STDIN>) {
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
binmode(STDIN, ":utf8");
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $language = "en";
|
||||
|
@ -4,6 +4,7 @@
|
||||
# Start by Ulrich Germann, after noticing systematic preprocessing errors
|
||||
# in some of the English Europarl data.
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Std;
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
binmode(STDIN, ":utf8");
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
#binmode(STDIN, ":utf8");
|
||||
|
@ -16,6 +16,7 @@ use warnings;
|
||||
binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
|
||||
use warnings;
|
||||
use FindBin qw($RealBin);
|
||||
use strict;
|
||||
use Time::HiRes;
|
||||
|
@ -14,6 +14,7 @@
|
||||
binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
|
||||
use warnings;
|
||||
use FindBin qw($RealBin);
|
||||
use strict;
|
||||
use Time::HiRes;
|
||||
|
@ -6,6 +6,8 @@
|
||||
#
|
||||
# Ondrej Bojar.
|
||||
|
||||
use warnings;
|
||||
|
||||
my $ini = shift;
|
||||
die "usage: absolutize_moses_model.pl path-to-moses.ini > moses.abs.ini"
|
||||
if !defined $ini;
|
||||
|
@ -4,6 +4,7 @@
|
||||
# Binarize a Moses model
|
||||
#
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
use Getopt::Long "GetOptions";
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user