Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2013-07-27 04:19:16 +01:00
commit abe90b5af7
13 changed files with 116 additions and 42 deletions

View File

@ -115,6 +115,7 @@ requirements += <os>MACOSX:<library>iconv ;
project : requirements
<threading>multi:<define>WITH_THREADS
<threading>multi:<library>boost_thread
<library>boost_system
<define>_FILE_OFFSET_BITS=64 <define>_LARGE_FILES
$(requirements)
<include>.

View File

@ -143,7 +143,7 @@ rule boost-lib ( name macro : deps * ) {
alias boost_$(name) : inner_boost_$(name) : <link>shared ;
requirements += <define>BOOST_$(macro) ;
} else {
alias boost_$(name) : inner_boost_$(name) : <link>static ;
alias boost_$(name) : inner_boost_$(name) : : : <link>shared:<define>BOOST_$(macro) ;
}
}

View File

@ -2,7 +2,7 @@ lib mira_lib :
[ glob *.cpp : *Test.cpp Main.cpp ]
../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ;
exe mira : Main.cpp mira_lib ;
exe mira : Main.cpp mira_lib ../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ;
alias programs : mira ;

View File

@ -4,11 +4,12 @@
using namespace std;
namespace Moses {
namespace Moses
{
ControlRecombination::ControlRecombination(const std::string &line)
:StatefulFeatureFunction("ControlRecombination", 0, line)
,m_type(Output)
:StatefulFeatureFunction("ControlRecombination", 0, line)
,m_type(Output)
{
}
@ -44,12 +45,12 @@ const FFState* ControlRecombination::EmptyHypothesisState(const InputType &input
}
ControlRecombinationState::ControlRecombinationState()
:m_hypo(NULL)
:m_hypo(NULL)
{
}
ControlRecombinationState::ControlRecombinationState(const Hypothesis *hypo)
:m_hypo(hypo)
:m_hypo(hypo)
{
}

View File

@ -4,7 +4,8 @@
#include "StatefulFeatureFunction.h"
#include "moses/FF/FFState.h"
namespace Moses {
namespace Moses
{
class ControlRecombinationState;
@ -12,17 +13,16 @@ class ControlRecombinationState;
class ControlRecombination : public StatefulFeatureFunction
{
public:
enum Type
{
None,
Output,
Segmentation
};
enum Type {
None,
Output,
Segmentation
};
ControlRecombination(const std::string &line);
bool IsUseable(const FactorMask &mask) const {
return true;
return true;
}
virtual FFState* Evaluate(

View File

@ -42,7 +42,8 @@ LanguageModelRandLM::LanguageModelRandLM(const std::string &line)
{
}
LanguageModelRandLM::~LanguageModelRandLM() {
LanguageModelRandLM::~LanguageModelRandLM()
{
delete m_lm;
}
@ -100,7 +101,8 @@ randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const
return m_lm->getWordID(str);
}
randlm::WordID LanguageModelRandLM::GetLmID( const Factor *factor ) const {
randlm::WordID LanguageModelRandLM::GetLmID( const Factor *factor ) const
{
size_t factorId = factor->GetId();
return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
}
@ -127,10 +129,12 @@ LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
return ret;
}
void LanguageModelRandLM::InitializeForInput(InputType const& source) {
void LanguageModelRandLM::InitializeForInput(InputType const& source)
{
m_lm->initThreadSpecificData(); // Creates thread specific data iff // compiled with multithreading.
}
void LanguageModelRandLM::CleanUpAfterSentenceProcessing(const InputType& source) {
void LanguageModelRandLM::CleanUpAfterSentenceProcessing(const InputType& source)
{
m_lm->clearCaches(); // clear caches
}

View File

@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace randlm
{
class RandLM;
class RandLM;
}
namespace Moses

View File

@ -174,12 +174,12 @@ Parameter::Parameter()
AddParam("lmodel-file", "DEPRECATED. DO NOT USE. location and properties of the language models");
AddParam("lmodel-dub", "DEPRECATED. DO NOT USE. dictionary upper bounds of language models");
#ifdef HAVE_SYNLM
#ifdef HAVE_SYNLM
AddParam("slmodel-file", "DEPRECATED. DO NOT USE. location of the syntactic language model file(s)");
AddParam("slmodel-factor", "DEPRECATED. DO NOT USE. factor to use with syntactic language model");
AddParam("slmodel-beam", "DEPRECATED. DO NOT USE. beam width to use with syntactic language model's parser");
#endif
AddParam("ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables");
AddParam("ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables");
AddParam("phrase-pair-feature", "DEPRECATED. DO NOT USE. Source and target factors for phrase pair feature");
AddParam("phrase-boundary-source-feature", "DEPRECATED. DO NOT USE. Source factors for phrase boundary feature");
AddParam("phrase-boundary-target-feature", "DEPRECATED. DO NOT USE. Target factors for phrase boundary feature");
@ -312,21 +312,21 @@ bool Parameter::LoadParam(int argc, char* argv[])
// don't mix old and new format
if ((isParamSpecified("feature") || isParamSpecified("weight"))
&& (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") ||
isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") ||
isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") ||
isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") ||
isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") ||
isParamSpecified("weight-u") || isParamSpecified("weight-e") ||
isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") ||
isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") ||
isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") ||
isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") ||
isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") ||
isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature")
)
) {
UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
&& (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") ||
isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") ||
isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") ||
isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") ||
isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") ||
isParamSpecified("weight-u") || isParamSpecified("weight-e") ||
isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") ||
isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") ||
isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") ||
isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") ||
isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") ||
isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature")
)
) {
UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
}
// convert old weights args to new format

View File

@ -56,7 +56,7 @@ public:
const ChartCellCollectionBase &);
private:
const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const;
// const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const;
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);

View File

@ -391,7 +391,6 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
// Workaround for extract-parallel issue.
if (options.sentenceOffset > 0) {
options.glueGrammarFile.clear();
options.unknownWordFile.clear();
}
}

View File

@ -21,7 +21,7 @@ sub run {
}
my $sourceLocale = $opts{s} || "";
my $targetLocale = $opts{t} || "";
my $numberSymbol = $opts{m} || '@NUM@';
my $numberSymbol = $opts{m} || '@num@';
while(<>) {
chomp;
print mark_numbers($_,$opts{c},$opts{l},$numberSymbol,$_),"\n";
@ -32,7 +32,7 @@ sub mark_numbers {
my $input = shift;
my $corpusMode = shift;
my $legacyMode = shift;
my $numberSymbol = shift || '@NUM@';
my $numberSymbol = shift || '@num@';
my $numref = recognize($input);
my $input_length = length($input);
@ -46,7 +46,7 @@ sub mark_numbers {
}
my $number = substr($input,$numstart,$numend-$numstart);
if($corpusMode) {
$output .= $number;
$output .= $numberSymbol;
}
else {
if($legacyMode) {

View File

@ -28,6 +28,9 @@ sub Beautify($)
next if ($name eq "util");
next if ($name eq "lm");
next if ($name eq "search");
next if ($name eq "randlm");
next if ($name eq "srilm");
next if ($name eq "irstlm");
$name = $path ."/" .$name;
if (-d $name) {

66
scripts/other/convert-pt.perl Executable file
View File

@ -0,0 +1,66 @@
#!/usr/bin/perl
# $Id$
# convert a phrase-table with alignment in Moses' dead-end format
# a . ||| A . ||| (0) (0,1) ||| (0,1) (1) ||| 1 0.0626124 1 0.032119 2.718
# to
# a . ||| A . ||| 1 0.0626124 1 0.032119 2.718 ||| 0-0 1-0 1-1
use strict;
use warnings;
use Getopt::Long;
use IO::File;
use File::Basename;
sub ConvertAlignment($);
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
my $lineNum = 0;
while (my $line = <STDIN>) {
chomp($line);
++$lineNum;
my @toks = split(/\|/, $line);
my $source = $toks[0];
my $target = $toks[3];
my $scores = $toks[12];
my $alignS = $toks[6];
my $align = ConvertAlignment($alignS);
print "$source|||$target|||$scores ||| $align\n";
}
sub ConvertAlignment($ $)
{
my $ret = "";
my $alignS = shift;
$alignS =~ s/^\s+//;
$alignS =~ s/\s+$//;
#print "alignS=$alignS\n";
my @toks = split(/ /, $alignS);
for (my $posS = 0; $posS < scalar @toks; ++$posS) {
my $tok = $toks[$posS];
$tok = substr($tok, 1, length($tok) - 2);
#print "tok=$tok\n";
my @posTvec = split(/,/, $tok);
for (my $j = 0; $j < scalar @posTvec; ++$j) {
my $posT = $posTvec[$j];
$ret .= "$posS-$posT ";
}
}
#print "ret=$ret \n";
return $ret;
}