Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2011-11-18 22:13:46 +07:00
commit ce0065ff2a
16 changed files with 329 additions and 82 deletions

2
.gitignore vendored
View File

@ -71,3 +71,5 @@ m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
dist
bin

1
CreateOnDisk/src/Jamfile Normal file
View File

@ -0,0 +1 @@
exe CreateOnDisk : Main.cpp ../../moses/src//moses ../../OnDiskPt/src//OnDiskPt ;

39
Jamroot Normal file
View File

@ -0,0 +1,39 @@
project : default-build
<threading>multi
<warnings>on
<variant>release
<link>static
;
lib boost_thread : : <link>shared ;
if [ option.get "without-trace" : no : yes ] = yes
{
trace = ;
} else {
trace = <define>TRACE_ENABLE=1 ;
}
project : requirements
<threading>multi:<define>WITH_THREADS
<threading>multi:<library>boost_thread
<define>_FILE_OFFSET_BITS=64 <define>_LARGE_FILES $(trace)
;
path-constant TOP : . ;
build-project lm ;
build-project util ;
build-project moses/src ;
build-project OnDiskPt/src ;
build-project CreateOnDisk/src ;
build-project moses-chart-cmd/src ;
build-project moses-cmd/src ;
install dist :
lm//query
lm//build_binary
moses-chart-cmd/src//moses_chart
moses-cmd/src//programs
CreateOnDisk/src//CreateOnDisk
: <location>dist <install-type>EXE <install-dependencies>on <dll-path>$(TOP)/dist <link>shared:<install-type>LIB ;

1
OnDiskPt/src/Jamfile Normal file
View File

@ -0,0 +1 @@
lib OnDiskPt : OnDiskWrapper.cpp SourcePhrase.cpp TargetPhrase.cpp Word.cpp Phrase.cpp PhraseNode.cpp TargetPhraseCollection.cpp Vocab.cpp ../../moses/src//moses ;

10
lm/Jamfile Normal file
View File

@ -0,0 +1,10 @@
lib lm : bhiksha.cc binary_format.cc config.cc lm_exception.cc model.cc quantize.cc read_arpa.cc search_hashed.cc search_trie.cc trie.cc trie_sort.cc virtual_interface.cc vocab.cc ../util//util : <include>.. : : <include>.. ;
import testing ;
lib boost_unit_test_framework : : <link>shared : : <link>shared:<define>BOOST_TEST_DYN_LINK ;
run left_test.cc lm boost_unit_test_framework : : test.arpa ;
run model_test.cc lm boost_unit_test_framework : : test.arpa test_nounk.arpa ;
exe query : ngram_query.cc lm ;
exe build_binary : build_binary.cc lm ;

View File

@ -0,0 +1 @@
exe moses_chart : Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp ../../moses/src//moses ../../OnDiskPt/src//OnDiskPt ;

View File

@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <fstream>
#include <ostream>
#include <vector>
#include <cassert>
#include "util/check.hh"
#include "TypeDef.h"
#include "Sentence.h"

7
moses-cmd/src/Jamfile Normal file
View File

@ -0,0 +1,7 @@
alias deps : ../../moses/src//moses ../../OnDiskPt/src//OnDiskPt ;
exe checkplf : checkplf.cpp deps ;
exe moses : Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp LatticeMBR.cpp deps ;
exe lmbrgrid : LatticeMBRGrid.cpp LatticeMBR.cpp IOWrapper.cpp deps ;
alias programs : checkplf moses lmbrgrid ;

124
moses/src/Jamfile Normal file
View File

@ -0,0 +1,124 @@
lib z ;
alias InputFileStream : InputFileStream.cpp z ;
if [ option.get "with-synlm" : no : yes ] = yes
{
lib m ;
obj SyntacticLanguageModel.o : SyntacticLanguageModel.cpp ../../util//util : <include>$(TOP)/synlm/hhmm/rvtl/include <include>$(TOP)/synlm/hhmm/wsjparse/include ;
alias synlm : SyntacticLanguageModel.o m : : : <define>HAVE_SYNLM ;
} else {
alias synlm ;
}
lib moses :
AlignmentInfo.cpp
AlignmentInfoCollection.cpp
BilingualDynSuffixArray.cpp
BitmapContainer.cpp
ChartCell.cpp
ChartCellCollection.cpp
ChartHypothesis.cpp
ChartHypothesisCollection.cpp
ChartManager.cpp
ChartRuleLookupManager.cpp
ChartRuleLookupManagerMemory.cpp
ChartRuleLookupManagerOnDisk.cpp
ChartTranslationOption.cpp
ChartTranslationOptionCollection.cpp
ChartTranslationOptionList.cpp
ChartTrellisDetour.cpp
ChartTrellisDetourQueue.cpp
ChartTrellisNode.cpp
ChartTrellisPath.cpp
ConfusionNet.cpp
DecodeFeature.cpp
DecodeGraph.cpp
DecodeStep.cpp
DecodeStepGeneration.cpp
DecodeStepTranslation.cpp
Dictionary.cpp
DotChart.cpp
DotChartInMemory.cpp
DotChartOnDisk.cpp
DummyScoreProducers.cpp
DynSAInclude/file.cpp
DynSAInclude/vocab.cpp
DynSuffixArray.cpp
FFState.cpp
Factor.cpp
FactorCollection.cpp
FactorTypeSet.cpp
FeatureFunction.cpp
FloydWarshall.cpp
GenerationDictionary.cpp
GlobalLexicalModel.cpp
hash.cpp
Hypothesis.cpp
HypothesisStack.cpp
HypothesisStackCubePruning.cpp
HypothesisStackNormal.cpp
InputType.cpp
LMList.cpp
LVoc.cpp
LexicalReordering.cpp
LexicalReorderingState.cpp
LexicalReorderingTable.cpp
Manager.cpp
PCNTools.cpp
Parameter.cpp
PartialTranslOptColl.cpp
Phrase.cpp
PhraseDictionary.cpp
PhraseDictionaryALSuffixArray.cpp
PhraseDictionaryDynSuffixArray.cpp
PhraseDictionaryHiero.cpp
PhraseDictionaryMemory.cpp
PhraseDictionarySCFG.cpp
PhraseDictionaryNode.cpp
PhraseDictionaryNodeSCFG.cpp
PhraseDictionaryOnDisk.cpp
PhraseDictionaryTree.cpp
PhraseDictionaryTreeAdaptor.cpp
PrefixTreeMap.cpp
ReorderingConstraint.cpp
ReorderingStack.cpp
RuleCube.cpp
RuleCubeItem.cpp
RuleCubeQueue.cpp
RuleTableLoaderCompact.cpp
RuleTableLoaderFactory.cpp
RuleTableLoaderHiero.cpp
RuleTableLoaderStandard.cpp
ScoreComponentCollection.cpp
ScoreIndexManager.cpp
ScoreProducer.cpp
Search.cpp
SearchCubePruning.cpp
SearchNormal.cpp
Sentence.cpp
SentenceStats.cpp
SquareMatrix.cpp
StaticData.cpp
TargetPhrase.cpp
TargetPhraseCollection.cpp
ThreadPool.cpp
Timer.cpp
TranslationOption.cpp
TranslationOptionCollection.cpp
TranslationOptionCollectionConfusionNet.cpp
TranslationOptionCollectionText.cpp
TranslationOptionList.cpp
TranslationSystem.cpp
TreeInput.cpp
TrellisPath.cpp
TrellisPathCollection.cpp
UserMessage.cpp
Util.cpp
Word.cpp
WordLattice.cpp
WordsBitmap.cpp
WordsRange.cpp
XmlOption.cpp
synlm
InputFileStream LM//LM ../../util//util : <include>. : : <include>. ;

View File

@ -50,8 +50,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
# define LM_ORLM 1
# endif
# define LM_KEN 1
# ifdef HAVE_DMAPLM
# define LM_DMAP
# endif
@ -74,9 +72,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#ifdef LM_REMOTE
# include "LM/Remote.h"
#endif
#ifdef LM_KEN
# include "LM/Ken.h"
#endif
#include "LM/Ken.h"
#ifdef LM_DMAP
# include "LM/DMapLM.h"
#endif
@ -100,12 +96,7 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
, int dub )
{
if (lmImplementation == Ken || lmImplementation == LazyKen) {
#ifdef LM_KEN
return ConstructKenLM(languageModelFile, scoreIndexManager, factorTypes[0], lmImplementation == LazyKen);
#else
UserMessage::Add("KenLM isn't compiled in but your config asked for it");
return NULL;
#endif
}
LanguageModelImplementation *lm = NULL;
switch (lmImplementation) {
@ -142,7 +133,7 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
break;
case ParallelBackoff:
#ifdef LM_SRI
lm = new LanguageModelParallelBackoff();
lm = NewParallelBackoff();
#endif
break;
case DMapLM:

58
moses/src/LM/Jamfile Normal file
View File

@ -0,0 +1,58 @@
import option ;
# Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php
rule trim-nl ( str )
{
return [ MATCH "([^
]*)" : $(str) ] ;
}
rule _shell ( cmd )
{
return [ trim-nl [ SHELL $(cmd) ] ] ;
}
with-irstlm = [ option.get "with-irstlm" ] ;
if $(with-irstlm) != ""
{
lib irstlm : : <search>$(with-irstlm)/lib ;
obj IRST.o : IRST.cpp ../../../util//util : <include>$(with-irstlm)/include <include>.. ;
alias irst : IRST.o irstlm : : : <define>LM_IRST ;
echo "" ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
echo "!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!" ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
echo "" ;
} else {
alias irst ;
}
with-srilm = [ option.get "with-srilm" ] ;
if $(with-srilm) != ""
{
if [ option.get "with-srilm-dynamic" ] != ""
{
alias sri-libs : srilm ;
} else {
sri-arch = [ option.get "with-srilm-arch" ] ;
sri-arch ?= [ _shell $(with-srilm)/sbin/machine-type ] ;
sri-lib = <search>$(with-srilm)/lib/$(sri-arch) <search>$(with-srilm)/flm/obj/$(sri-arch) ;
lib flm : : $(sri-lib) ;
lib misc : flm : $(sri-lib) ;
lib dstruct : misc flm : $(sri-lib) ;
lib oolm : dstruct misc flm : $(sri-lib) ;
alias sri-libs : oolm dstruct misc flm ;
}
obj SRI.o : SRI.cpp ../../../util//util : <include>$(with-srilm)/include <include>.. ;
obj ParallelBackoff.o : ParallelBackoff.cpp ../../../util//util : <include>$(with-srilm)/include <include>.. ;
alias sri : SRI.o ParallelBackoff.o sri-libs : : : <define>LM_SRI ;
} else {
alias sri ;
}
lib LM : Base.cpp Factory.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp
../../../lm//lm
irst sri
: <include>.. $(requirements) : : <include>.. ;

View File

@ -20,21 +20,71 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "LM/ParallelBackoff.h"
#include "File.h"
#include <vector>
#include <string>
#include <sstream>
#include <fstream>
#include "LM/MultiFactor.h"
#include "Word.h"
#include "Factor.h"
#include "FactorTypeSet.h"
#include "FactorCollection.h"
#include "Phrase.h"
#include "TypeDef.h"
#include "Util.h"
#include "FNgramSpecs.h"
#include "FNgramStats.h"
#include "FactoredVocab.h"
#include "FNgram.h"
#include "wmatrix.h"
#include "Vocab.h"
#include "File.h"
using namespace std;
namespace Moses
{
namespace
{
class LanguageModelParallelBackoff : public LanguageModelMultiFactor
{
private:
std::vector<FactorType> m_factorTypesOrdered;
FactoredVocab *m_srilmVocab;
FNgram *m_srilmModel;
VocabIndex m_unknownId;
VocabIndex m_wtid;
VocabIndex m_wtbid;
VocabIndex m_wteid;
FNgramSpecs<FNgramCount>* fnSpecs;
//std::vector<VocabIndex> m_lmIdLookup;
std::map<size_t, VocabIndex>* lmIdMap;
std::fstream* debugStream;
WidMatrix *widMatrix;
public:
~LanguageModelParallelBackoff();
bool Load(const std::string &filePath, const std::vector<FactorType> &factorTypes, size_t nGramOrder);
VocabIndex GetLmID( const std::string &str ) const;
VocabIndex GetLmID( const Factor *factor, FactorType ft ) const;
void CreateFactors();
LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
const FFState *GetNullContextState() const;
const FFState *GetBeginSentenceState() const;
FFState *NewState(const FFState *from) const;
};
LanguageModelParallelBackoff::~LanguageModelParallelBackoff()
{
///
@ -294,5 +344,12 @@ const FFState *LanguageModelParallelBackoff::GetBeginSentenceState() const
{
return NULL;
}
}
LanguageModelMultiFactor *NewParallelBackoff() {
return new LanguageModelParallelBackoff();
}
}

View File

@ -21,71 +21,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#pragma once
#include <vector>
#include <string>
#include <sstream>
#include <fstream>
#include "LM/MultiFactor.h"
#include "Word.h"
#include "Factor.h"
#include "FactorTypeSet.h"
#include "FactorCollection.h"
#include "Phrase.h"
#include "FNgramStats.h"
#include "FactoredVocab.h"
#include "FNgram.h"
#include "wmatrix.h"
#include "Vocab.h"
using namespace std;
//class FactoredVocab;
//class FNgram;
//class WidMatrix;
namespace Moses
{
/** LM of multiple factors. A simple extension of single factor LM - factors backoff together.
* Rather slow as this uses string concatenation/split
*/
class LanguageModelParallelBackoff : public LanguageModelMultiFactor
{
private:
std::vector<FactorType> m_factorTypesOrdered;
class LanguageModelMultiFactor;
FactoredVocab *m_srilmVocab;
FNgram *m_srilmModel;
VocabIndex m_unknownId;
VocabIndex m_wtid;
VocabIndex m_wtbid;
VocabIndex m_wteid;
FNgramSpecs<FNgramCount>* fnSpecs;
//std::vector<VocabIndex> m_lmIdLookup;
std::map<size_t, VocabIndex>* lmIdMap;
std::fstream* debugStream;
WidMatrix *widMatrix;
public:
~LanguageModelParallelBackoff();
bool Load(const std::string &filePath, const std::vector<FactorType> &factorTypes, size_t nGramOrder);
VocabIndex GetLmID( const std::string &str ) const;
VocabIndex GetLmID( const Factor *factor, FactorType ft ) const;
void CreateFactors();
LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
const FFState *GetNullContextState() const;
const FFState *GetBeginSentenceState() const;
FFState *NewState(const FFState *from) const;
};
LanguageModelMultiFactor *NewParallelBackoff();
}

View File

@ -31,6 +31,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Phrase.h"
#include "StaticData.h"
#include "Vocab.h"
#include "Ngram.h"
using namespace std;
namespace Moses

View File

@ -26,12 +26,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <vector>
#include "Factor.h"
#include "TypeDef.h"
#include "Vocab.h"
#include "Ngram.h"
#include "LM/SingleFactor.h"
class Factor;
class Phrase;
class Vocab;
class Ngram;
namespace Moses
{
@ -39,15 +39,15 @@ namespace Moses
class LanguageModelSRI : public LanguageModelPointerState
{
protected:
std::vector<VocabIndex> m_lmIdLookup;
std::vector<unsigned int> m_lmIdLookup;
::Vocab *m_srilmVocab;
Ngram *m_srilmModel;
VocabIndex m_unknownId;
unsigned int m_unknownId;
LMResult GetValue(VocabIndex wordId, VocabIndex *context) const;
LMResult GetValue(unsigned int wordId, unsigned int *context) const;
void CreateFactors();
VocabIndex GetLmID( const std::string &str ) const;
VocabIndex GetLmID( const Factor *factor ) const;
unsigned int GetLmID( const std::string &str ) const;
unsigned int GetLmID( const Factor *factor ) const;
public:
LanguageModelSRI();

13
util/Jamfile Normal file
View File

@ -0,0 +1,13 @@
lib z ;
lib util : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc z : <include>.. : : <include>.. ;
import testing ;
lib boost_unit_test_framework : : <link>shared : : <link>shared:<define>BOOST_TEST_DYN_LINK ;
unit-test bit_packing_test : bit_packing_test.cc util boost_unit_test_framework ;
run file_piece_test.cc util boost_unit_test_framework : : file_piece.cc ;
unit-test joint_sort_test : joint_sort_test.cc util boost_unit_test_framework ;
unit-test probing_hash_table_test : probing_hash_table_test.cc util boost_unit_test_framework ;
unit-test sorted_uniform_test : sorted_uniform_test.cc util boost_unit_test_framework ;
unit-test tokenize_piece_test : tokenize_piece_test.cc util boost_unit_test_framework ;