diff --git a/misc/.cproject b/misc/.cproject
index 653abad65..405dd3c13 100644
--- a/misc/.cproject
+++ b/misc/.cproject
@@ -19,35 +19,37 @@
-
+
-
-
-
+
+
-
-
+
+
-
-
-
@@ -59,15 +61,15 @@
-
-
+
+
-
-
+
+
@@ -76,6 +78,8 @@
+
+
@@ -93,35 +97,37 @@
-
+
-
-
-
+
+
-
-
+
+
-
-
-
@@ -133,22 +139,22 @@
-
-
+
+
-
-
+
+
-
-
+
+
@@ -157,6 +163,8 @@
+
+
diff --git a/misc/.project b/misc/.project
index 350898510..e67d03462 100644
--- a/misc/.project
+++ b/misc/.project
@@ -11,14 +11,6 @@
org.eclipse.cdt.managedbuilder.core.genmakebuilder
clean,full,incremental,
-
- org.eclipse.cdt.make.core.cleanBuildTarget
- clean
-
-
- org.eclipse.cdt.make.core.enableCleanBuild
- true
-
?name?
@@ -28,44 +20,52 @@
true
- org.eclipse.cdt.make.core.stopOnError
- true
-
-
- org.eclipse.cdt.make.core.buildCommand
- make
-
-
- org.eclipse.cdt.make.core.contents
- org.eclipse.cdt.make.core.activeConfigSettings
-
-
- org.eclipse.cdt.make.core.buildLocation
- ${workspace_loc:/misc/Debug}
-
-
- org.eclipse.cdt.make.core.useDefaultBuildCmd
- true
-
-
- org.eclipse.cdt.make.core.enableAutoBuild
- false
-
-
- org.eclipse.cdt.make.core.enableFullBuild
- true
+ org.eclipse.cdt.make.core.autoBuildTarget
+ all
org.eclipse.cdt.make.core.buildArguments
+
+ org.eclipse.cdt.make.core.buildCommand
+ make
+
+
+ org.eclipse.cdt.make.core.buildLocation
+ ${workspace_loc:/misc/Release}
+
+
+ org.eclipse.cdt.make.core.cleanBuildTarget
+ clean
+
+
+ org.eclipse.cdt.make.core.contents
+ org.eclipse.cdt.make.core.activeConfigSettings
+
+
+ org.eclipse.cdt.make.core.enableAutoBuild
+ false
+
+
+ org.eclipse.cdt.make.core.enableCleanBuild
+ true
+
+
+ org.eclipse.cdt.make.core.enableFullBuild
+ true
+
org.eclipse.cdt.make.core.fullBuildTarget
all
- org.eclipse.cdt.make.core.autoBuildTarget
- all
+ org.eclipse.cdt.make.core.stopOnError
+ true
+
+
+ org.eclipse.cdt.make.core.useDefaultBuildCmd
+ true
diff --git a/moses-cmd/.cproject b/moses-cmd/.cproject
index 6da3c27a6..cc8bee3fd 100644
--- a/moses-cmd/.cproject
+++ b/moses-cmd/.cproject
@@ -36,6 +36,7 @@
+
+
+
+
@@ -84,33 +90,35 @@
-
+
-
-
+
+
-
+
+
-
+
+
-
-
-
-
-
+
+
+
+
+
@@ -123,6 +131,10 @@
+
+
+
+
diff --git a/moses/src/LanguageModelFactory.cpp b/moses/src/LanguageModelFactory.cpp
index b259eef5f..4684d6d43 100644
--- a/moses/src/LanguageModelFactory.cpp
+++ b/moses/src/LanguageModelFactory.cpp
@@ -32,6 +32,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#ifdef LM_IRST
# include "LanguageModelIRST.h"
#endif
+#ifdef LM_RAND
+# include "LanguageModelRandLM.h"
+#endif
#include "LanguageModelInternal.h"
#include "LanguageModelSkip.h"
@@ -44,7 +47,7 @@ namespace LanguageModelFactory
{
LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
- , const std::vector &factorTypes
+ , const std::vector &factorTypes
, size_t nGramOrder
, const std::string &languageModelFile
, float weight
@@ -54,6 +57,13 @@ namespace LanguageModelFactory
LanguageModel *lm = NULL;
switch (lmImplementation)
{
+ case RandLM:
+ #ifdef LM_RAND
+ lm = new LanguageModelRandLM(true,
+ scoreIndexManager);
+ #endif
+ break;
+
case SRI:
#ifdef LM_SRI
lm = new LanguageModelSRI(true, scoreIndexManager);
@@ -94,7 +104,7 @@ namespace LanguageModelFactory
#endif
break;
}
-
+
if (lm == NULL)
{
UserMessage::Add("Language model type unknown. Probably not compiled into library");
@@ -109,7 +119,7 @@ namespace LanguageModelFactory
delete lm;
lm = NULL;
}
- break;
+ break;
case MultiFactor:
if (! static_cast(lm)->Load(languageModelFile, factorTypes, weight, nGramOrder))
{
@@ -119,7 +129,7 @@ namespace LanguageModelFactory
break;
}
}
-
+
return lm;
}
}
diff --git a/moses/src/LanguageModelRandLM.cpp b/moses/src/LanguageModelRandLM.cpp
new file mode 100644
index 000000000..805878e0c
--- /dev/null
+++ b/moses/src/LanguageModelRandLM.cpp
@@ -0,0 +1,114 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include
+#include
+#include
+#include
+
+#include "LanguageModelRandLM.h"
+#include "FactorCollection.h"
+#include "Phrase.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+
+namespace Moses
+{
+
+bool LanguageModelRandLM::Load(const std::string &filePath, FactorType factorType, float weight,
+ size_t nGramOrder) {
+ cerr << "Loading LanguageModelRandLM..." << endl;
+ FactorCollection &factorCollection = FactorCollection::Instance();
+ m_filePath = filePath;
+ m_factorType = factorType;
+ m_weight = weight;
+ m_nGramOrder = nGramOrder;
+ int cache_MB = 50; // increase cache size
+ m_lm = randlm::RandLM::initRandLM(filePath, nGramOrder, cache_MB);
+ assert(m_lm != NULL);
+ // get special word ids
+ m_oov_id = m_lm->getWordID(m_lm->getOOV());
+ CreateFactors(factorCollection);
+ return true;
+}
+
+void LanguageModelRandLM::CreateFactors(FactorCollection &factorCollection) { // add factors which have randlm id
+ // code copied & paste from SRI LM class. should do template function
+ // first get all bf vocab in map
+ std::map randlm_ids_map; // map from factor id -> randlm id
+ size_t maxFactorId = 0; // to create lookup vector later on
+ for(std::map::const_iterator vIter = m_lm->vocabStart();
+ vIter != m_lm->vocabEnd(); vIter++){
+ // get word from randlm vocab and associate with (new) factor id
+ size_t factorId=factorCollection.AddFactor(Output,m_factorType,vIter->first)->GetId();
+ randlm_ids_map[factorId] = vIter->second;
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ }
+ // add factors for BOS and EOS and store bf word ids
+ size_t factorId;
+ m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, m_lm->getBOS());
+ factorId = m_sentenceStart->GetId();
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ m_sentenceStartArray[m_factorType] = m_sentenceStart;
+
+ m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, m_lm->getEOS());
+ factorId = m_sentenceEnd->GetId();
+ maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
+ m_sentenceEndArray[m_factorType] = m_sentenceEnd;
+
+ // add to lookup vector in object
+ m_randlm_ids_vec.resize(maxFactorId+1);
+ // fill with OOV code
+ fill(m_randlm_ids_vec.begin(), m_randlm_ids_vec.end(), m_oov_id);
+
+ for (map::const_iterator iter = randlm_ids_map.begin();
+ iter != randlm_ids_map.end() ; ++iter)
+ m_randlm_ids_vec[iter->first] = iter->second;
+
+}
+
+randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const {
+ return m_lm->getWordID(str);
+}
+
+float LanguageModelRandLM::GetValue(const vector &contextFactor,
+ State* finalState, unsigned int* len) const {
+ unsigned int dummy; // is this needed ?
+ if (!len) { len = &dummy; }
+ FactorType factorType = GetFactorType();
+ // set up context
+ randlm::WordID ngram[MAX_NGRAM_SIZE];
+ int count = contextFactor.size();
+ for (int i = 0 ; i < count ; i++) {
+ ngram[i] = GetLmID((*contextFactor[i])[factorType]);
+ //std::cerr << m_lm->getWord(ngram[i]) << " ";
+ }
+ int found = 0;
+ float logprob = FloorScore(TransformSRIScore(m_lm->getProb(&ngram[0], count, &found, finalState)));
+ *len = 0; // not available
+ //if (finalState)
+ // std::cerr << " = " << logprob << "(" << *finalState << ", " << *len <<")"<< std::endl;
+ //else
+ // std::cerr << " = " << logprob << std::endl;
+ return logprob;
+}
+
+}
+
+
diff --git a/moses/src/LanguageModelRandLM.h b/moses/src/LanguageModelRandLM.h
new file mode 100644
index 000000000..015e1aad4
--- /dev/null
+++ b/moses/src/LanguageModelRandLM.h
@@ -0,0 +1,65 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#pragma once
+
+#include
+#include
+#include "Factor.h"
+#include "Util.h"
+#include "LanguageModelSingleFactor.h"
+#include "RandLM.h"
+
+class randlm::RandLM;
+
+namespace Moses
+{
+class Factor;
+class Phrase;
+
+// RandLM wrapper (single factor LM)
+
+class LanguageModelRandLM : public LanguageModelSingleFactor {
+public:
+ LanguageModelRandLM(bool registerScore, ScoreIndexManager &scoreIndexManager)
+ : LanguageModelSingleFactor(registerScore, scoreIndexManager), m_lm(0) {}
+ bool Load(const std::string &filePath, FactorType factorType, float weight, size_t nGramOrder);
+ virtual float GetValue(const std::vector &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
+ ~LanguageModelRandLM() {
+ delete m_lm;
+ }
+ void CleanUpAfterSentenceProcessing() {
+ m_lm->clearCaches(); // clear caches
+ }
+ void InitializeBeforeSentenceProcessing() {} // nothing to do
+ protected:
+ std::vector m_randlm_ids_vec;
+ randlm::RandLM* m_lm;
+ randlm::WordID m_oov_id;
+ void CreateFactors(FactorCollection &factorCollection);
+ randlm::WordID GetLmID( const std::string &str ) const;
+ randlm::WordID GetLmID( const Factor *factor ) const{
+ size_t factorId = factor->GetId();
+ return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
+ };
+
+};
+
+}
+
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
index 52c630f0e..70cf94b67 100644
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@@ -104,6 +104,10 @@ if IRST_LM
libmoses_a_SOURCES += LanguageModelIRST.cpp
endif
+if RAND_LM
+libmoses_a_SOURCES += LanguageModelRandLM.cpp
+endif
+
if INTERNAL_LM
libmoses_a_SOURCES += LanguageModelInternal.cpp \
NGramCollection.cpp \
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index da704ec58..e986301d9 100644
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -34,7 +34,7 @@ namespace Moses
#ifndef BOS_
#define BOS_ "" //Beginning of sentence symbol
#endif
-#ifndef EOS_
+#ifndef EOS_
#define EOS_ "" //End of sentence symbol
#endif
@@ -55,7 +55,7 @@ const float LOWEST_SCORE = -100.0f;
const float DEFAULT_BEAM_WIDTH = 0.00001f;
const size_t DEFAULT_VERBOSE_LEVEL = 1;
-/////////////////////////////////////////////////
+/////////////////////////////////////////////////
// for those using autoconf/automake
#if HAVE_CONFIG_H
#include "config.h"
@@ -72,10 +72,14 @@ const size_t DEFAULT_VERBOSE_LEVEL = 1;
# define LM_IRST 1
# endif
-#endif
-/////////////////////////////////////////////////
+# ifdef HAVE_RANDLM
+# define LM_RAND 1
+# endif
-// enums.
+#endif
+/////////////////////////////////////////////////
+
+// enums.
// must be 0, 1, 2, ..., unless otherwise stated
// can only be 2 at the moment
@@ -84,7 +88,7 @@ const int NUM_LANGUAGES = 2;
const size_t MAX_NUM_FACTORS = 4;
enum FactorDirection
-{
+{
Input, //! Source factors
Output //! Target factors
};
@@ -114,21 +118,23 @@ namespace DistortionOrientationType
{
Monotone, //distinguish only between monotone and non-monotone as possible orientations
Msd //further separate non-monotone into swapped and discontinuous
- };
+ };
}
enum LMType
-{
+{
SingleFactor
,MultiFactor
};
enum LMImplementation
-{
- SRI = 0
- ,IRST = 1
- ,Skip = 2
+{
+ SRI = 0
+ ,IRST = 1
+ ,Skip = 2
,Joint = 3
,Internal = 4
+ ,RandLM = 5
+
};
@@ -148,13 +154,13 @@ enum XmlInputType
};
enum DictionaryFind
-{
+{
Best = 0
,All = 1
};
enum SearchAlgorithm
-{
+{
Normal = 0
,CubePruning = 1
,CubeGrowing = 2