diff --git a/Jamroot b/Jamroot index e6db1d207..1f782261c 100644 --- a/Jamroot +++ b/Jamroot @@ -115,6 +115,7 @@ requirements += MACOSX:iconv ; project : requirements multi:WITH_THREADS multi:boost_thread + boost_system _FILE_OFFSET_BITS=64 _LARGE_FILES $(requirements) . diff --git a/jam-files/sanity.jam b/jam-files/sanity.jam index 672502ba0..891bf2eb3 100644 --- a/jam-files/sanity.jam +++ b/jam-files/sanity.jam @@ -143,7 +143,7 @@ rule boost-lib ( name macro : deps * ) { alias boost_$(name) : inner_boost_$(name) : shared ; requirements += BOOST_$(macro) ; } else { - alias boost_$(name) : inner_boost_$(name) : static ; + alias boost_$(name) : inner_boost_$(name) : : : shared:BOOST_$(macro) ; } } diff --git a/mira/Jamfile b/mira/Jamfile index 1f750d168..3862cb172 100644 --- a/mira/Jamfile +++ b/mira/Jamfile @@ -2,7 +2,7 @@ lib mira_lib : [ glob *.cpp : *Test.cpp Main.cpp ] ../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ; -exe mira : Main.cpp mira_lib ; +exe mira : Main.cpp mira_lib ../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ; alias programs : mira ; diff --git a/moses/FF/ControlRecombination.cpp b/moses/FF/ControlRecombination.cpp index f191ece85..a9664f8c4 100644 --- a/moses/FF/ControlRecombination.cpp +++ b/moses/FF/ControlRecombination.cpp @@ -4,11 +4,12 @@ using namespace std; -namespace Moses { +namespace Moses +{ ControlRecombination::ControlRecombination(const std::string &line) -:StatefulFeatureFunction("ControlRecombination", 0, line) -,m_type(Output) + :StatefulFeatureFunction("ControlRecombination", 0, line) + ,m_type(Output) { } @@ -44,12 +45,12 @@ const FFState* ControlRecombination::EmptyHypothesisState(const InputType &input } ControlRecombinationState::ControlRecombinationState() -:m_hypo(NULL) + :m_hypo(NULL) { } ControlRecombinationState::ControlRecombinationState(const Hypothesis *hypo) -:m_hypo(hypo) + :m_hypo(hypo) { } diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h index ce55546fd..b61bbcf41 100644 --- a/moses/FF/ControlRecombination.h +++ b/moses/FF/ControlRecombination.h @@ -4,7 +4,8 @@ #include "StatefulFeatureFunction.h" #include "moses/FF/FFState.h" -namespace Moses { +namespace Moses +{ class ControlRecombinationState; @@ -12,17 +13,16 @@ class ControlRecombinationState; class ControlRecombination : public StatefulFeatureFunction { public: - enum Type - { - None, - Output, - Segmentation - }; + enum Type { + None, + Output, + Segmentation + }; ControlRecombination(const std::string &line); bool IsUseable(const FactorMask &mask) const { - return true; + return true; } virtual FFState* Evaluate( diff --git a/moses/LM/Rand.cpp b/moses/LM/Rand.cpp index 5568319da..96ca511b6 100644 --- a/moses/LM/Rand.cpp +++ b/moses/LM/Rand.cpp @@ -42,7 +42,8 @@ LanguageModelRandLM::LanguageModelRandLM(const std::string &line) { } -LanguageModelRandLM::~LanguageModelRandLM() { +LanguageModelRandLM::~LanguageModelRandLM() +{ delete m_lm; } @@ -100,7 +101,8 @@ randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const return m_lm->getWordID(str); } -randlm::WordID LanguageModelRandLM::GetLmID( const Factor *factor ) const { +randlm::WordID LanguageModelRandLM::GetLmID( const Factor *factor ) const +{ size_t factorId = factor->GetId(); return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId]; } @@ -127,10 +129,12 @@ LMResult LanguageModelRandLM::GetValue(const vector &contextFactor, return ret; } -void LanguageModelRandLM::InitializeForInput(InputType const& source) { +void LanguageModelRandLM::InitializeForInput(InputType const& source) +{ m_lm->initThreadSpecificData(); // Creates thread specific data iff // compiled with multithreading. } -void LanguageModelRandLM::CleanUpAfterSentenceProcessing(const InputType& source) { +void LanguageModelRandLM::CleanUpAfterSentenceProcessing(const InputType& source) +{ m_lm->clearCaches(); // clear caches } diff --git a/moses/LM/Rand.h b/moses/LM/Rand.h index 55d52df1f..11aa5156d 100644 --- a/moses/LM/Rand.h +++ b/moses/LM/Rand.h @@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA namespace randlm { - class RandLM; +class RandLM; } namespace Moses diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index 9ee31f8df..2daf84824 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -174,12 +174,12 @@ Parameter::Parameter() AddParam("lmodel-file", "DEPRECATED. DO NOT USE. location and properties of the language models"); AddParam("lmodel-dub", "DEPRECATED. DO NOT USE. dictionary upper bounds of language models"); - #ifdef HAVE_SYNLM +#ifdef HAVE_SYNLM AddParam("slmodel-file", "DEPRECATED. DO NOT USE. location of the syntactic language model file(s)"); AddParam("slmodel-factor", "DEPRECATED. DO NOT USE. factor to use with syntactic language model"); AddParam("slmodel-beam", "DEPRECATED. DO NOT USE. beam width to use with syntactic language model's parser"); #endif -AddParam("ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables"); + AddParam("ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables"); AddParam("phrase-pair-feature", "DEPRECATED. DO NOT USE. Source and target factors for phrase pair feature"); AddParam("phrase-boundary-source-feature", "DEPRECATED. DO NOT USE. Source factors for phrase boundary feature"); AddParam("phrase-boundary-target-feature", "DEPRECATED. DO NOT USE. Target factors for phrase boundary feature"); @@ -312,21 +312,21 @@ bool Parameter::LoadParam(int argc, char* argv[]) // don't mix old and new format if ((isParamSpecified("feature") || isParamSpecified("weight")) - && (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") || - isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") || - isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") || - isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") || - isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") || - isParamSpecified("weight-u") || isParamSpecified("weight-e") || - isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") || - isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") || - isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") || - isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") || - isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") || - isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature") - ) - ) { - UTIL_THROW(util::Exception, "Don't mix old and new ini file format"); + && (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") || + isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") || + isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") || + isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") || + isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") || + isParamSpecified("weight-u") || isParamSpecified("weight-e") || + isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") || + isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") || + isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") || + isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") || + isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") || + isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature") + ) + ) { + UTIL_THROW(util::Exception, "Don't mix old and new ini file format"); } // convert old weights args to new format diff --git a/moses/TranslationModel/RuleTable/UTrie.h b/moses/TranslationModel/RuleTable/UTrie.h index 63bf801e8..c0e9b885e 100644 --- a/moses/TranslationModel/RuleTable/UTrie.h +++ b/moses/TranslationModel/RuleTable/UTrie.h @@ -56,7 +56,7 @@ public: const ChartCellCollectionBase &); private: - const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const; +// const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const; TargetPhraseCollection &GetOrCreateTargetPhraseCollection( const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp index b0102e8f2..c03e02830 100644 --- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp +++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp @@ -391,7 +391,6 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[], // Workaround for extract-parallel issue. if (options.sentenceOffset > 0) { - options.glueGrammarFile.clear(); options.unknownWordFile.clear(); } } diff --git a/scripts/generic/ph_numbers.perl b/scripts/generic/ph_numbers.perl index fcd732606..4e26e9107 100755 --- a/scripts/generic/ph_numbers.perl +++ b/scripts/generic/ph_numbers.perl @@ -21,7 +21,7 @@ sub run { } my $sourceLocale = $opts{s} || ""; my $targetLocale = $opts{t} || ""; - my $numberSymbol = $opts{m} || '@NUM@'; + my $numberSymbol = $opts{m} || '@num@'; while(<>) { chomp; print mark_numbers($_,$opts{c},$opts{l},$numberSymbol,$_),"\n"; @@ -32,7 +32,7 @@ sub mark_numbers { my $input = shift; my $corpusMode = shift; my $legacyMode = shift; - my $numberSymbol = shift || '@NUM@'; + my $numberSymbol = shift || '@num@'; my $numref = recognize($input); my $input_length = length($input); @@ -46,7 +46,7 @@ sub mark_numbers { } my $number = substr($input,$numstart,$numend-$numstart); if($corpusMode) { - $output .= $number; + $output .= $numberSymbol; } else { if($legacyMode) { diff --git a/scripts/other/beautify.perl b/scripts/other/beautify.perl index e23f7e2a4..4929c4e4e 100755 --- a/scripts/other/beautify.perl +++ b/scripts/other/beautify.perl @@ -28,6 +28,9 @@ sub Beautify($) next if ($name eq "util"); next if ($name eq "lm"); next if ($name eq "search"); + next if ($name eq "randlm"); + next if ($name eq "srilm"); + next if ($name eq "irstlm"); $name = $path ."/" .$name; if (-d $name) { diff --git a/scripts/other/convert-pt.perl b/scripts/other/convert-pt.perl new file mode 100755 index 000000000..fa35b4490 --- /dev/null +++ b/scripts/other/convert-pt.perl @@ -0,0 +1,66 @@ +#!/usr/bin/perl + +# $Id$ +# convert a phrase-table with alignment in Moses' dead-end format +# a . ||| A . ||| (0) (0,1) ||| (0,1) (1) ||| 1 0.0626124 1 0.032119 2.718 +# to +# a . ||| A . ||| 1 0.0626124 1 0.032119 2.718 ||| 0-0 1-0 1-1 + + +use strict; +use warnings; +use Getopt::Long; +use IO::File; +use File::Basename; + +sub ConvertAlignment($); + +binmode(STDIN, ":utf8"); +binmode(STDOUT, ":utf8"); +binmode(STDERR, ":utf8"); + + +my $lineNum = 0; +while (my $line = ) { + chomp($line); + ++$lineNum; + + my @toks = split(/\|/, $line); + my $source = $toks[0]; + my $target = $toks[3]; + my $scores = $toks[12]; + + my $alignS = $toks[6]; + my $align = ConvertAlignment($alignS); + + print "$source|||$target|||$scores ||| $align\n"; + +} + +sub ConvertAlignment($ $) +{ + my $ret = ""; + my $alignS = shift; + $alignS =~ s/^\s+//; + $alignS =~ s/\s+$//; + + #print "alignS=$alignS\n"; + + my @toks = split(/ /, $alignS); + for (my $posS = 0; $posS < scalar @toks; ++$posS) { + my $tok = $toks[$posS]; + $tok = substr($tok, 1, length($tok) - 2); + #print "tok=$tok\n"; + + my @posTvec = split(/,/, $tok); + for (my $j = 0; $j < scalar @posTvec; ++$j) { + my $posT = $posTvec[$j]; + $ret .= "$posS-$posT "; + } + } + + #print "ret=$ret \n"; + return $ret; +} + +