mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 14:05:29 +03:00
Merge branch 'master' of github.com:moses-smt/mosesdecoder
This commit is contained in:
commit
abe90b5af7
1
Jamroot
1
Jamroot
@ -115,6 +115,7 @@ requirements += <os>MACOSX:<library>iconv ;
|
||||
project : requirements
|
||||
<threading>multi:<define>WITH_THREADS
|
||||
<threading>multi:<library>boost_thread
|
||||
<library>boost_system
|
||||
<define>_FILE_OFFSET_BITS=64 <define>_LARGE_FILES
|
||||
$(requirements)
|
||||
<include>.
|
||||
|
@ -143,7 +143,7 @@ rule boost-lib ( name macro : deps * ) {
|
||||
alias boost_$(name) : inner_boost_$(name) : <link>shared ;
|
||||
requirements += <define>BOOST_$(macro) ;
|
||||
} else {
|
||||
alias boost_$(name) : inner_boost_$(name) : <link>static ;
|
||||
alias boost_$(name) : inner_boost_$(name) : : : <link>shared:<define>BOOST_$(macro) ;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@ lib mira_lib :
|
||||
[ glob *.cpp : *Test.cpp Main.cpp ]
|
||||
../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ;
|
||||
|
||||
exe mira : Main.cpp mira_lib ;
|
||||
exe mira : Main.cpp mira_lib ../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ;
|
||||
|
||||
alias programs : mira ;
|
||||
|
||||
|
@ -4,11 +4,12 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
ControlRecombination::ControlRecombination(const std::string &line)
|
||||
:StatefulFeatureFunction("ControlRecombination", 0, line)
|
||||
,m_type(Output)
|
||||
:StatefulFeatureFunction("ControlRecombination", 0, line)
|
||||
,m_type(Output)
|
||||
{
|
||||
}
|
||||
|
||||
@ -44,12 +45,12 @@ const FFState* ControlRecombination::EmptyHypothesisState(const InputType &input
|
||||
}
|
||||
|
||||
ControlRecombinationState::ControlRecombinationState()
|
||||
:m_hypo(NULL)
|
||||
:m_hypo(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
ControlRecombinationState::ControlRecombinationState(const Hypothesis *hypo)
|
||||
:m_hypo(hypo)
|
||||
:m_hypo(hypo)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,8 @@
|
||||
#include "StatefulFeatureFunction.h"
|
||||
#include "moses/FF/FFState.h"
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class ControlRecombinationState;
|
||||
|
||||
@ -12,17 +13,16 @@ class ControlRecombinationState;
|
||||
class ControlRecombination : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
enum Type
|
||||
{
|
||||
None,
|
||||
Output,
|
||||
Segmentation
|
||||
};
|
||||
enum Type {
|
||||
None,
|
||||
Output,
|
||||
Segmentation
|
||||
};
|
||||
|
||||
ControlRecombination(const std::string &line);
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual FFState* Evaluate(
|
||||
|
@ -42,7 +42,8 @@ LanguageModelRandLM::LanguageModelRandLM(const std::string &line)
|
||||
{
|
||||
}
|
||||
|
||||
LanguageModelRandLM::~LanguageModelRandLM() {
|
||||
LanguageModelRandLM::~LanguageModelRandLM()
|
||||
{
|
||||
delete m_lm;
|
||||
}
|
||||
|
||||
@ -100,7 +101,8 @@ randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const
|
||||
return m_lm->getWordID(str);
|
||||
}
|
||||
|
||||
randlm::WordID LanguageModelRandLM::GetLmID( const Factor *factor ) const {
|
||||
randlm::WordID LanguageModelRandLM::GetLmID( const Factor *factor ) const
|
||||
{
|
||||
size_t factorId = factor->GetId();
|
||||
return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
|
||||
}
|
||||
@ -127,10 +129,12 @@ LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void LanguageModelRandLM::InitializeForInput(InputType const& source) {
|
||||
void LanguageModelRandLM::InitializeForInput(InputType const& source)
|
||||
{
|
||||
m_lm->initThreadSpecificData(); // Creates thread specific data iff // compiled with multithreading.
|
||||
}
|
||||
void LanguageModelRandLM::CleanUpAfterSentenceProcessing(const InputType& source) {
|
||||
void LanguageModelRandLM::CleanUpAfterSentenceProcessing(const InputType& source)
|
||||
{
|
||||
m_lm->clearCaches(); // clear caches
|
||||
}
|
||||
|
||||
|
@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
namespace randlm
|
||||
{
|
||||
class RandLM;
|
||||
class RandLM;
|
||||
}
|
||||
|
||||
namespace Moses
|
||||
|
@ -174,12 +174,12 @@ Parameter::Parameter()
|
||||
AddParam("lmodel-file", "DEPRECATED. DO NOT USE. location and properties of the language models");
|
||||
AddParam("lmodel-dub", "DEPRECATED. DO NOT USE. dictionary upper bounds of language models");
|
||||
|
||||
#ifdef HAVE_SYNLM
|
||||
#ifdef HAVE_SYNLM
|
||||
AddParam("slmodel-file", "DEPRECATED. DO NOT USE. location of the syntactic language model file(s)");
|
||||
AddParam("slmodel-factor", "DEPRECATED. DO NOT USE. factor to use with syntactic language model");
|
||||
AddParam("slmodel-beam", "DEPRECATED. DO NOT USE. beam width to use with syntactic language model's parser");
|
||||
#endif
|
||||
AddParam("ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables");
|
||||
AddParam("ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables");
|
||||
AddParam("phrase-pair-feature", "DEPRECATED. DO NOT USE. Source and target factors for phrase pair feature");
|
||||
AddParam("phrase-boundary-source-feature", "DEPRECATED. DO NOT USE. Source factors for phrase boundary feature");
|
||||
AddParam("phrase-boundary-target-feature", "DEPRECATED. DO NOT USE. Target factors for phrase boundary feature");
|
||||
@ -312,21 +312,21 @@ bool Parameter::LoadParam(int argc, char* argv[])
|
||||
|
||||
// don't mix old and new format
|
||||
if ((isParamSpecified("feature") || isParamSpecified("weight"))
|
||||
&& (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") ||
|
||||
isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") ||
|
||||
isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") ||
|
||||
isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") ||
|
||||
isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") ||
|
||||
isParamSpecified("weight-u") || isParamSpecified("weight-e") ||
|
||||
isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") ||
|
||||
isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") ||
|
||||
isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") ||
|
||||
isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") ||
|
||||
isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") ||
|
||||
isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature")
|
||||
)
|
||||
) {
|
||||
UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
|
||||
&& (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") ||
|
||||
isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") ||
|
||||
isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") ||
|
||||
isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") ||
|
||||
isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") ||
|
||||
isParamSpecified("weight-u") || isParamSpecified("weight-e") ||
|
||||
isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") ||
|
||||
isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") ||
|
||||
isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") ||
|
||||
isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") ||
|
||||
isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") ||
|
||||
isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature")
|
||||
)
|
||||
) {
|
||||
UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
|
||||
}
|
||||
|
||||
// convert old weights args to new format
|
||||
|
@ -56,7 +56,7 @@ public:
|
||||
const ChartCellCollectionBase &);
|
||||
|
||||
private:
|
||||
const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const;
|
||||
// const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const;
|
||||
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
||||
|
@ -391,7 +391,6 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
|
||||
|
||||
// Workaround for extract-parallel issue.
|
||||
if (options.sentenceOffset > 0) {
|
||||
options.glueGrammarFile.clear();
|
||||
options.unknownWordFile.clear();
|
||||
}
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ sub run {
|
||||
}
|
||||
my $sourceLocale = $opts{s} || "";
|
||||
my $targetLocale = $opts{t} || "";
|
||||
my $numberSymbol = $opts{m} || '@NUM@';
|
||||
my $numberSymbol = $opts{m} || '@num@';
|
||||
while(<>) {
|
||||
chomp;
|
||||
print mark_numbers($_,$opts{c},$opts{l},$numberSymbol,$_),"\n";
|
||||
@ -32,7 +32,7 @@ sub mark_numbers {
|
||||
my $input = shift;
|
||||
my $corpusMode = shift;
|
||||
my $legacyMode = shift;
|
||||
my $numberSymbol = shift || '@NUM@';
|
||||
my $numberSymbol = shift || '@num@';
|
||||
|
||||
my $numref = recognize($input);
|
||||
my $input_length = length($input);
|
||||
@ -46,7 +46,7 @@ sub mark_numbers {
|
||||
}
|
||||
my $number = substr($input,$numstart,$numend-$numstart);
|
||||
if($corpusMode) {
|
||||
$output .= $number;
|
||||
$output .= $numberSymbol;
|
||||
}
|
||||
else {
|
||||
if($legacyMode) {
|
||||
|
@ -28,6 +28,9 @@ sub Beautify($)
|
||||
next if ($name eq "util");
|
||||
next if ($name eq "lm");
|
||||
next if ($name eq "search");
|
||||
next if ($name eq "randlm");
|
||||
next if ($name eq "srilm");
|
||||
next if ($name eq "irstlm");
|
||||
|
||||
$name = $path ."/" .$name;
|
||||
if (-d $name) {
|
||||
|
66
scripts/other/convert-pt.perl
Executable file
66
scripts/other/convert-pt.perl
Executable file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
# $Id$
|
||||
# convert a phrase-table with alignment in Moses' dead-end format
|
||||
# a . ||| A . ||| (0) (0,1) ||| (0,1) (1) ||| 1 0.0626124 1 0.032119 2.718
|
||||
# to
|
||||
# a . ||| A . ||| 1 0.0626124 1 0.032119 2.718 ||| 0-0 1-0 1-1
|
||||
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Getopt::Long;
|
||||
use IO::File;
|
||||
use File::Basename;
|
||||
|
||||
sub ConvertAlignment($);
|
||||
|
||||
binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
binmode(STDERR, ":utf8");
|
||||
|
||||
|
||||
my $lineNum = 0;
|
||||
while (my $line = <STDIN>) {
|
||||
chomp($line);
|
||||
++$lineNum;
|
||||
|
||||
my @toks = split(/\|/, $line);
|
||||
my $source = $toks[0];
|
||||
my $target = $toks[3];
|
||||
my $scores = $toks[12];
|
||||
|
||||
my $alignS = $toks[6];
|
||||
my $align = ConvertAlignment($alignS);
|
||||
|
||||
print "$source|||$target|||$scores ||| $align\n";
|
||||
|
||||
}
|
||||
|
||||
sub ConvertAlignment($ $)
|
||||
{
|
||||
my $ret = "";
|
||||
my $alignS = shift;
|
||||
$alignS =~ s/^\s+//;
|
||||
$alignS =~ s/\s+$//;
|
||||
|
||||
#print "alignS=$alignS\n";
|
||||
|
||||
my @toks = split(/ /, $alignS);
|
||||
for (my $posS = 0; $posS < scalar @toks; ++$posS) {
|
||||
my $tok = $toks[$posS];
|
||||
$tok = substr($tok, 1, length($tok) - 2);
|
||||
#print "tok=$tok\n";
|
||||
|
||||
my @posTvec = split(/,/, $tok);
|
||||
for (my $j = 0; $j < scalar @posTvec; ++$j) {
|
||||
my $posT = $posTvec[$j];
|
||||
$ret .= "$posS-$posT ";
|
||||
}
|
||||
}
|
||||
|
||||
#print "ret=$ret \n";
|
||||
return $ret;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user