mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-06 19:49:41 +03:00
uint -> size_t
This commit is contained in:
parent
9ec1bef6fb
commit
9861ecbbe5
@ -38,8 +38,7 @@
|
||||
|
||||
typedef struct _cmd CMD;
|
||||
|
||||
struct _cmd
|
||||
{
|
||||
struct _cmd {
|
||||
CMD * next;
|
||||
CMD * tail; /* valid on in head */
|
||||
RULE * rule; /* rule->actions contains shell script */
|
||||
|
@ -10,8 +10,7 @@
|
||||
#include <time.h>
|
||||
|
||||
|
||||
struct profile_info
|
||||
{
|
||||
struct profile_info {
|
||||
/* name of rule being called */
|
||||
char* name;
|
||||
/* cumulative time spent in rule */
|
||||
@ -27,8 +26,7 @@ struct profile_info
|
||||
};
|
||||
typedef struct profile_info profile_info;
|
||||
|
||||
struct profile_frame
|
||||
{
|
||||
struct profile_frame {
|
||||
/* permanent storage where data accumulates */
|
||||
profile_info* info;
|
||||
/* overhead for profiling in this call */
|
||||
|
@ -18,8 +18,7 @@
|
||||
|
||||
#include <time.h>
|
||||
|
||||
typedef struct timing_info
|
||||
{
|
||||
typedef struct timing_info {
|
||||
double system;
|
||||
double user;
|
||||
time_t start;
|
||||
|
@ -33,8 +33,7 @@ int file_is_file(char* filename);
|
||||
int file_mkdir(char *pathname);
|
||||
|
||||
typedef struct file_info_t file_info_t ;
|
||||
struct file_info_t
|
||||
{
|
||||
struct file_info_t {
|
||||
char * name;
|
||||
short is_file;
|
||||
short is_dir;
|
||||
|
@ -12,8 +12,7 @@
|
||||
typedef struct _PARSE PARSE;
|
||||
typedef struct frame FRAME;
|
||||
|
||||
struct frame
|
||||
{
|
||||
struct frame {
|
||||
FRAME * prev;
|
||||
/* The nearest enclosing frame for which module->user_module is true. */
|
||||
FRAME * prev_user;
|
||||
|
@ -526,8 +526,7 @@
|
||||
#define DEBUG_MAX 14
|
||||
|
||||
|
||||
struct globs
|
||||
{
|
||||
struct globs {
|
||||
int noexec;
|
||||
int jobs;
|
||||
int quitquick;
|
||||
|
@ -8,8 +8,7 @@
|
||||
|
||||
#include "lists.h"
|
||||
|
||||
struct module_t
|
||||
{
|
||||
struct module_t {
|
||||
char* name;
|
||||
struct hash* rules;
|
||||
struct hash* variables;
|
||||
|
@ -7,8 +7,7 @@
|
||||
|
||||
#include "rules.h"
|
||||
|
||||
struct native_rule_t
|
||||
{
|
||||
struct native_rule_t {
|
||||
char* name;
|
||||
argument_list* arguments;
|
||||
PARSE* procedure;
|
||||
|
@ -11,8 +11,7 @@
|
||||
* \ -) "Command line option."
|
||||
*/
|
||||
|
||||
typedef struct bjam_option
|
||||
{
|
||||
typedef struct bjam_option {
|
||||
char flag; /* filled in by getoption() */
|
||||
char *val; /* set to random address if true */
|
||||
} bjam_option;
|
||||
|
@ -28,14 +28,12 @@
|
||||
typedef struct _pathname PATHNAME;
|
||||
typedef struct _pathpart PATHPART;
|
||||
|
||||
struct _pathpart
|
||||
{
|
||||
struct _pathpart {
|
||||
char * ptr;
|
||||
int len;
|
||||
};
|
||||
|
||||
struct _pathname
|
||||
{
|
||||
struct _pathname {
|
||||
PATHPART part[6];
|
||||
#ifdef OS_VMS
|
||||
int parent;
|
||||
|
@ -53,15 +53,13 @@ typedef struct _settings SETTINGS ;
|
||||
/* RULE - a generic jam rule, the product of RULE and ACTIONS. */
|
||||
|
||||
/* A rule's argument list. */
|
||||
struct argument_list
|
||||
{
|
||||
struct argument_list {
|
||||
int reference_count;
|
||||
LOL data[1];
|
||||
};
|
||||
|
||||
/* Build actions corresponding to a rule. */
|
||||
struct rule_actions
|
||||
{
|
||||
struct rule_actions {
|
||||
int reference_count;
|
||||
char * command; /* command string from ACTIONS */
|
||||
LIST * bindlist;
|
||||
@ -78,8 +76,7 @@ struct rule_actions
|
||||
typedef struct rule_actions rule_actions;
|
||||
typedef struct argument_list argument_list;
|
||||
|
||||
struct _rule
|
||||
{
|
||||
struct _rule {
|
||||
char * name;
|
||||
PARSE * procedure; /* parse tree from RULE */
|
||||
argument_list * arguments; /* argument checking info, or NULL for unchecked
|
||||
@ -96,16 +93,14 @@ struct _rule
|
||||
};
|
||||
|
||||
/* ACTIONS - a chain of ACTIONs. */
|
||||
struct _actions
|
||||
{
|
||||
struct _actions {
|
||||
ACTIONS * next;
|
||||
ACTIONS * tail; /* valid only for head */
|
||||
ACTION * action;
|
||||
};
|
||||
|
||||
/* ACTION - a RULE instance with targets and sources. */
|
||||
struct _action
|
||||
{
|
||||
struct _action {
|
||||
RULE * rule;
|
||||
TARGETS * targets;
|
||||
TARGETS * sources; /* aka $(>) */
|
||||
@ -114,8 +109,7 @@ struct _action
|
||||
};
|
||||
|
||||
/* SETTINGS - variables to set when executing a TARGET's ACTIONS. */
|
||||
struct _settings
|
||||
{
|
||||
struct _settings {
|
||||
SETTINGS * next;
|
||||
char * symbol; /* symbol name for var_set() */
|
||||
LIST * value; /* symbol value for var_set() */
|
||||
@ -123,16 +117,14 @@ struct _settings
|
||||
};
|
||||
|
||||
/* TARGETS - a chain of TARGETs. */
|
||||
struct _targets
|
||||
{
|
||||
struct _targets {
|
||||
TARGETS * next;
|
||||
TARGETS * tail; /* valid only for head */
|
||||
TARGET * target;
|
||||
};
|
||||
|
||||
/* TARGET - an entity (e.g. a file) that can be built. */
|
||||
struct _target
|
||||
{
|
||||
struct _target {
|
||||
char * name;
|
||||
char * boundname; /* if search() relocates target */
|
||||
ACTIONS * actions; /* rules to execute, if any */
|
||||
|
@ -29,8 +29,7 @@
|
||||
|
||||
#define YYSTYPE YYSYMBOL
|
||||
|
||||
typedef struct _YYSTYPE
|
||||
{
|
||||
typedef struct _YYSTYPE {
|
||||
int type;
|
||||
char * string;
|
||||
PARSE * parse;
|
||||
|
@ -7,8 +7,7 @@
|
||||
|
||||
# include <stddef.h>
|
||||
|
||||
typedef struct string
|
||||
{
|
||||
typedef struct string {
|
||||
char* value;
|
||||
unsigned long size;
|
||||
unsigned long capacity;
|
||||
|
@ -50,10 +50,10 @@ Data::~Data() {
|
||||
//ADDED BY TS
|
||||
void Data::remove_duplicates() {
|
||||
|
||||
uint nSentences = featdata->size();
|
||||
size_t nSentences = featdata->size();
|
||||
assert(scoredata->size() == nSentences);
|
||||
|
||||
for (uint s=0; s < nSentences; s++) {
|
||||
for (size_t s=0; s < nSentences; s++) {
|
||||
|
||||
FeatureArray& feat_array = featdata->get(s);
|
||||
ScoreArray& score_array = scoredata->get(s);
|
||||
@ -61,29 +61,29 @@ void Data::remove_duplicates() {
|
||||
assert(feat_array.size() == score_array.size());
|
||||
|
||||
//serves as a hash-map:
|
||||
std::map<double, std::vector<uint> > lookup;
|
||||
std::map<double, std::vector<size_t> > lookup;
|
||||
|
||||
uint end_pos = feat_array.size() - 1;
|
||||
size_t end_pos = feat_array.size() - 1;
|
||||
|
||||
uint nRemoved = 0;
|
||||
for (uint k=0; k <= end_pos; k++) {
|
||||
size_t nRemoved = 0;
|
||||
for (size_t k=0; k <= end_pos; k++) {
|
||||
|
||||
const FeatureStats& cur_feats = feat_array.get(k);
|
||||
|
||||
double sum = 0.0;
|
||||
for (uint l=0; l < cur_feats.size(); l++)
|
||||
for (size_t l=0; l < cur_feats.size(); l++)
|
||||
sum += cur_feats.get(l);
|
||||
|
||||
if (lookup.find(sum) != lookup.end()) {
|
||||
|
||||
//std::cerr << "hit" << std::endl;
|
||||
|
||||
std::vector<uint>& cur_list = lookup[sum];
|
||||
std::vector<size_t>& cur_list = lookup[sum];
|
||||
|
||||
uint l=0;
|
||||
size_t l=0;
|
||||
for (l=0; l < cur_list.size(); l++) {
|
||||
|
||||
uint j=cur_list[l];
|
||||
size_t j=cur_list[l];
|
||||
|
||||
if (cur_feats == feat_array.get(j)
|
||||
&& score_array.get(k) == score_array.get(j)) {
|
||||
|
@ -129,7 +129,8 @@ IOWrapper::~IOWrapper()
|
||||
delete m_singleBestOutputCollector;
|
||||
}
|
||||
|
||||
void IOWrapper::ResetTranslationId() {
|
||||
void IOWrapper::ResetTranslationId()
|
||||
{
|
||||
m_translationId = StaticData::Instance().GetStartTranslationId();
|
||||
}
|
||||
|
||||
|
@ -145,8 +145,7 @@ public:
|
||||
|
||||
// MAP decoding: best hypothesis
|
||||
const Hypothesis* bestHypo = NULL;
|
||||
if (!staticData.UseMBR())
|
||||
{
|
||||
if (!staticData.UseMBR()) {
|
||||
bestHypo = manager.GetBestHypothesis();
|
||||
if (bestHypo) {
|
||||
if (staticData.IsPathRecoveryEnabled()) {
|
||||
@ -168,8 +167,7 @@ public:
|
||||
}
|
||||
|
||||
// MBR decoding (n-best MBR, lattice MBR, consensus)
|
||||
else
|
||||
{
|
||||
else {
|
||||
// we first need the n-best translations
|
||||
size_t nBestSize = staticData.GetMBRSize();
|
||||
if (nBestSize <= 0) {
|
||||
|
@ -45,7 +45,8 @@ void AlignmentInfo::BuildNonTermIndexMap()
|
||||
|
||||
}
|
||||
|
||||
bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,size_t> *b) {
|
||||
bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,size_t> *b)
|
||||
{
|
||||
if(a->second < b->second) return true;
|
||||
if(a->second == b->second) return (a->first < b->first);
|
||||
return false;
|
||||
@ -57,8 +58,7 @@ std::vector< const std::pair<size_t,size_t>* > AlignmentInfo::GetSortedAlignment
|
||||
std::vector< const std::pair<size_t,size_t>* > ret;
|
||||
|
||||
CollType::const_iterator iter;
|
||||
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter)
|
||||
{
|
||||
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
||||
const std::pair<size_t,size_t> &alignPair = *iter;
|
||||
ret.push_back(&alignPair);
|
||||
}
|
||||
@ -66,8 +66,7 @@ std::vector< const std::pair<size_t,size_t>* > AlignmentInfo::GetSortedAlignment
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
WordAlignmentSort wordAlignmentSort = staticData.GetWordAlignmentSort();
|
||||
|
||||
switch (wordAlignmentSort)
|
||||
{
|
||||
switch (wordAlignmentSort) {
|
||||
case NoSort:
|
||||
break;
|
||||
|
||||
|
@ -41,8 +41,12 @@ class AlignmentInfo
|
||||
typedef std::vector<size_t> NonTermIndexMap;
|
||||
typedef CollType::const_iterator const_iterator;
|
||||
|
||||
const_iterator begin() const { return m_collection.begin(); }
|
||||
const_iterator end() const { return m_collection.end(); }
|
||||
const_iterator begin() const {
|
||||
return m_collection.begin();
|
||||
}
|
||||
const_iterator end() const {
|
||||
return m_collection.end();
|
||||
}
|
||||
|
||||
// Provides a map from target-side to source-side non-terminal indices.
|
||||
// The target-side index should be the rule symbol index (counting terminals).
|
||||
@ -56,8 +60,7 @@ class AlignmentInfo
|
||||
private:
|
||||
// AlignmentInfo objects should only be created by an AlignmentInfoCollection
|
||||
explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
|
||||
: m_collection(pairs)
|
||||
{
|
||||
: m_collection(pairs) {
|
||||
BuildNonTermIndexMap();
|
||||
}
|
||||
|
||||
@ -69,8 +72,7 @@ class AlignmentInfo
|
||||
|
||||
// Define an arbitrary strict weak ordering between AlignmentInfo objects
|
||||
// for use by AlignmentInfoCollection.
|
||||
struct AlignmentInfoOrderer
|
||||
{
|
||||
struct AlignmentInfoOrderer {
|
||||
bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
|
||||
return a.m_collection < b.m_collection;
|
||||
}
|
||||
|
@ -30,7 +30,9 @@ namespace Moses
|
||||
class AlignmentInfoCollection
|
||||
{
|
||||
public:
|
||||
static AlignmentInfoCollection &Instance() { return s_instance; }
|
||||
static AlignmentInfoCollection &Instance() {
|
||||
return s_instance;
|
||||
}
|
||||
|
||||
// Returns a pointer to an AlignmentInfo object with the same source-target
|
||||
// alignment pairs as given in the argument. If the collection already
|
||||
|
@ -7,7 +7,8 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
BilingualDynSuffixArray::BilingualDynSuffixArray():
|
||||
m_maxPhraseLength(StaticData::Instance().GetMaxPhraseLength()),
|
||||
@ -86,7 +87,8 @@ int BilingualDynSuffixArray::LoadRawAlignments(InputFileStream& align)
|
||||
}
|
||||
return m_rawAlignments.size();
|
||||
}
|
||||
int BilingualDynSuffixArray::LoadRawAlignments(string& align) {
|
||||
int BilingualDynSuffixArray::LoadRawAlignments(string& align)
|
||||
{
|
||||
// stores the alignments in the raw file format
|
||||
vector<int> vtmp;
|
||||
Utils::splitToInt(align, vtmp, "- ");
|
||||
@ -145,8 +147,7 @@ SentenceAlignment BilingualDynSuffixArray::GetSentenceAlignment(const int sntInd
|
||||
if(trg2Src) {
|
||||
curSnt.alignedList[targetPos].push_back(sourcePos); // list of target nodes for each source word
|
||||
curSnt.numberAligned[sourcePos]++; // cnt of how many source words connect to this target word
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
curSnt.alignedList[sourcePos].push_back(targetPos); // list of target nodes for each source word
|
||||
curSnt.numberAligned[targetPos]++; // cnt of how many source words connect to this target word
|
||||
}
|
||||
@ -206,12 +207,10 @@ bool BilingualDynSuffixArray::GetLocalVocabIDs(const Phrase& src, SAPhrase &outp
|
||||
for (size_t pos = 0; pos < phraseSize; ++pos) {
|
||||
const Word &word = src.GetWord(pos);
|
||||
wordID_t arrayId = m_srcVocab->GetWordID(word);
|
||||
if (arrayId == m_srcVocab->GetkOOVWordID())
|
||||
{ // oov
|
||||
if (arrayId == m_srcVocab->GetkOOVWordID()) {
|
||||
// oov
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
output.SetId(pos, arrayId);
|
||||
//cerr << arrayId << " ";
|
||||
}
|
||||
@ -243,8 +242,7 @@ pair<float, float> BilingualDynSuffixArray::GetLexicalWeight(const PhrasePair& p
|
||||
CHECK(itrCache != m_wordPairCache.end());
|
||||
srcSumPairProbs += itrCache->second.first;
|
||||
targetProbs[wordpair] = itrCache->second.second;
|
||||
}
|
||||
else { // extract p(trg|src)
|
||||
} else { // extract p(trg|src)
|
||||
for(size_t i = 0; i < srcWordAlignments.size(); ++i) { // for each aligned word
|
||||
int trgIdx = srcWordAlignments[i];
|
||||
wordID_t trgWord = m_trgCorpus->at(trgIdx + m_trgSntBreaks[phrasepair.m_sntIndex]);
|
||||
@ -279,7 +277,8 @@ pair<float, float> BilingualDynSuffixArray::GetLexicalWeight(const PhrasePair& p
|
||||
// TODO::Need to get p(NULL|trg)
|
||||
return pair<float, float>(srcLexWeight, trgLexWeight);
|
||||
}
|
||||
void BilingualDynSuffixArray::CacheFreqWords() const {
|
||||
void BilingualDynSuffixArray::CacheFreqWords() const
|
||||
{
|
||||
std::multimap<int, wordID_t> wordCnts;
|
||||
// for each source word in vocab
|
||||
Vocab::Word2Id::const_iterator it;
|
||||
@ -318,8 +317,7 @@ void BilingualDynSuffixArray::CacheWordProbs(wordID_t srcWord) const
|
||||
if(srcAlg.size() == 0) {
|
||||
++counts[m_srcVocab->GetkOOVWordID()]; // if not alligned then align to NULL word
|
||||
++denom;
|
||||
}
|
||||
else { //get target words aligned to srcword in this sentence
|
||||
} else { //get target words aligned to srcword in this sentence
|
||||
for(size_t i=0; i < srcAlg.size(); ++i) {
|
||||
wordID_t trgWord = m_trgCorpus->at(srcAlg[i] + m_trgSntBreaks[sntIdx]);
|
||||
++counts[trgWord];
|
||||
@ -452,7 +450,8 @@ int BilingualDynSuffixArray::SampleSelection(std::vector<unsigned>& sample,
|
||||
return sample.size();
|
||||
}
|
||||
|
||||
void BilingualDynSuffixArray::addSntPair(string& source, string& target, string& alignment) {
|
||||
void BilingualDynSuffixArray::addSntPair(string& source, string& target, string& alignment)
|
||||
{
|
||||
vuint_t srcFactor, trgFactor;
|
||||
cerr << "source, target, alignment = " << source << ", " << target << ", " << alignment << endl;
|
||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||
@ -497,7 +496,8 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
|
||||
//ClearWordInCache(sIDs[i]);
|
||||
|
||||
}
|
||||
void BilingualDynSuffixArray::ClearWordInCache(wordID_t srcWord) {
|
||||
void BilingualDynSuffixArray::ClearWordInCache(wordID_t srcWord)
|
||||
{
|
||||
if(m_freqWordsCached.find(srcWord) != m_freqWordsCached.end())
|
||||
return;
|
||||
std::map<std::pair<wordID_t, wordID_t>, std::pair<float, float> >::iterator it,
|
||||
@ -533,14 +533,16 @@ bool SentenceAlignment::Extract(int maxPhraseLength, std::vector<PhrasePair*> &r
|
||||
int minTarget = 9999;
|
||||
int maxTarget = -1;
|
||||
std::vector< int > usedTarget = numberAligned;
|
||||
for(int sourcePos = startSource; sourcePos <= endSource; sourcePos++)
|
||||
{
|
||||
for(int ind=0; ind < (int)alignedList[sourcePos].size();ind++)
|
||||
{
|
||||
for(int sourcePos = startSource; sourcePos <= endSource; sourcePos++) {
|
||||
for(int ind=0; ind < (int)alignedList[sourcePos].size(); ind++) {
|
||||
int targetPos = alignedList[sourcePos][ind];
|
||||
// cout << "point (" << targetPos << ", " << sourcePos << ")\n";
|
||||
if (targetPos<minTarget) { minTarget = targetPos; }
|
||||
if (targetPos>maxTarget) { maxTarget = targetPos; }
|
||||
if (targetPos<minTarget) {
|
||||
minTarget = targetPos;
|
||||
}
|
||||
if (targetPos>maxTarget) {
|
||||
maxTarget = targetPos;
|
||||
}
|
||||
usedTarget[ targetPos ]--;
|
||||
} // for(int ind=0;ind<sentence
|
||||
} // for(int sourcePos=startSource
|
||||
@ -548,37 +550,32 @@ bool SentenceAlignment::Extract(int maxPhraseLength, std::vector<PhrasePair*> &r
|
||||
// cout << "f projected ( " << minTarget << "-" << maxTarget << ", " << startSource << "," << endSource << ")\n";
|
||||
|
||||
if (maxTarget >= 0 && // aligned to any foreign words at all
|
||||
maxTarget-minTarget < maxPhraseLength)
|
||||
{ // foreign phrase within limits
|
||||
maxTarget-minTarget < maxPhraseLength) {
|
||||
// foreign phrase within limits
|
||||
|
||||
// check if foreign words are aligned to out of bound english words
|
||||
bool out_of_bounds = false;
|
||||
for(int targetPos=minTarget; targetPos <= maxTarget && !out_of_bounds; targetPos++)
|
||||
{
|
||||
if (usedTarget[targetPos]>0)
|
||||
{
|
||||
for(int targetPos=minTarget; targetPos <= maxTarget && !out_of_bounds; targetPos++) {
|
||||
if (usedTarget[targetPos]>0) {
|
||||
// cout << "ouf of bounds: " << targetPos << "\n";
|
||||
out_of_bounds = true;
|
||||
}
|
||||
}
|
||||
|
||||
// cout << "doing if for ( " << minTarget << "-" << maxTarget << ", " << startSource << "," << endSource << ")\n";
|
||||
if (!out_of_bounds)
|
||||
{
|
||||
if (!out_of_bounds) {
|
||||
// start point of foreign phrase may retreat over unaligned
|
||||
for(int startTarget = minTarget;
|
||||
(startTarget >= 0 &&
|
||||
startTarget > maxTarget-maxPhraseLength && // within length limit
|
||||
(startTarget==minTarget || numberAligned[startTarget]==0)); // unaligned
|
||||
startTarget--)
|
||||
{
|
||||
startTarget--) {
|
||||
// end point of foreign phrase may advance over unaligned
|
||||
for (int endTarget=maxTarget;
|
||||
(endTarget<countTarget &&
|
||||
endTarget<startTarget+maxPhraseLength && // within length limit
|
||||
(endTarget==maxTarget || numberAligned[endTarget]==0)); // unaligned
|
||||
endTarget++)
|
||||
{
|
||||
endTarget++) {
|
||||
PhrasePair *phrasePair = new PhrasePair(startTarget,endTarget,startSource,endSource, m_sntIndex);
|
||||
ret.push_back(phrasePair);
|
||||
} // for (int endTarget=maxTarget;
|
||||
|
@ -9,7 +9,8 @@
|
||||
#include "InputFileStream.h"
|
||||
#include "FactorTypeSet.h"
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class SAPhrase
|
||||
{
|
||||
@ -20,13 +21,13 @@ public:
|
||||
:words(phraseSize)
|
||||
{}
|
||||
|
||||
void SetId(size_t pos, wordID_t id)
|
||||
{
|
||||
void SetId(size_t pos, wordID_t id) {
|
||||
CHECK(pos < words.size());
|
||||
words[pos] = id;
|
||||
}
|
||||
bool operator<(const SAPhrase& phr2) const
|
||||
{ return words < phr2.words; }
|
||||
bool operator<(const SAPhrase& phr2) const {
|
||||
return words < phr2.words;
|
||||
}
|
||||
};
|
||||
|
||||
class PhrasePair
|
||||
@ -41,8 +42,9 @@ public:
|
||||
, m_sntIndex(sntIndex)
|
||||
{}
|
||||
|
||||
size_t GetTargetSize() const
|
||||
{ return m_endTarget - m_startTarget + 1; }
|
||||
size_t GetTargetSize() const {
|
||||
return m_endTarget - m_startTarget + 1;
|
||||
}
|
||||
};
|
||||
|
||||
class SentenceAlignment
|
||||
@ -56,7 +58,8 @@ public:
|
||||
std::vector< std::vector<int> > alignedList;
|
||||
bool Extract(int maxPhraseLength, std::vector<PhrasePair*> &ret, int startSource, int endSource) const;
|
||||
};
|
||||
class ScoresComp {
|
||||
class ScoresComp
|
||||
{
|
||||
public:
|
||||
ScoresComp(const std::vector<float>& weights): m_weights(weights) {}
|
||||
bool operator()(const Scores& s1, const Scores& s2) const {
|
||||
@ -77,7 +80,8 @@ private:
|
||||
const std::vector<float>& m_weights;
|
||||
};
|
||||
|
||||
class BilingualDynSuffixArray {
|
||||
class BilingualDynSuffixArray
|
||||
{
|
||||
public:
|
||||
BilingualDynSuffixArray();
|
||||
~BilingualDynSuffixArray();
|
||||
@ -128,14 +132,12 @@ private:
|
||||
void ClearWordInCache(wordID_t);
|
||||
std::pair<float, float> GetLexicalWeight(const PhrasePair&) const;
|
||||
|
||||
int GetSourceSentenceSize(size_t sentenceId) const
|
||||
{
|
||||
int GetSourceSentenceSize(size_t sentenceId) const {
|
||||
return (sentenceId==m_srcSntBreaks.size()-1) ?
|
||||
m_srcCorpus->size() - m_srcSntBreaks.at(sentenceId) :
|
||||
m_srcSntBreaks.at(sentenceId+1) - m_srcSntBreaks.at(sentenceId);
|
||||
}
|
||||
int GetTargetSentenceSize(size_t sentenceId) const
|
||||
{
|
||||
int GetTargetSentenceSize(size_t sentenceId) const {
|
||||
return (sentenceId==m_trgSntBreaks.size()-1) ?
|
||||
m_trgCorpus->size() - m_trgSntBreaks.at(sentenceId) :
|
||||
m_trgSntBreaks.at(sentenceId+1) - m_trgSntBreaks.at(sentenceId);
|
||||
|
@ -98,8 +98,7 @@ void ChartCell::ProcessSentence(const ChartTranslationOptionList &transOptList
|
||||
|
||||
// add all trans opt into queue. using only 1st child node.
|
||||
ChartTranslationOptionList::const_iterator iterList;
|
||||
for (iterList = transOptList.begin(); iterList != transOptList.end(); ++iterList)
|
||||
{
|
||||
for (iterList = transOptList.begin(); iterList != transOptList.end(); ++iterList) {
|
||||
const ChartTranslationOption &transOpt = **iterList;
|
||||
RuleCube *ruleCube = new RuleCube(transOpt, allChartCells, m_manager);
|
||||
queue.Add(ruleCube);
|
||||
@ -107,8 +106,7 @@ void ChartCell::ProcessSentence(const ChartTranslationOptionList &transOptList
|
||||
|
||||
// pluck things out of queue and add to hypo collection
|
||||
const size_t popLimit = staticData.GetCubePruningPopLimit();
|
||||
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops)
|
||||
{
|
||||
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
|
||||
ChartHypothesis *hypo = queue.Pop();
|
||||
AddHypothesis(hypo);
|
||||
}
|
||||
|
@ -42,12 +42,17 @@ class ChartCellLabel
|
||||
, m_stack(stack)
|
||||
{}
|
||||
|
||||
const WordsRange &GetCoverage() const { return m_coverage; }
|
||||
const Word &GetLabel() const { return m_label; }
|
||||
const ChartHypothesisCollection *GetStack() const { return m_stack; }
|
||||
const WordsRange &GetCoverage() const {
|
||||
return m_coverage;
|
||||
}
|
||||
const Word &GetLabel() const {
|
||||
return m_label;
|
||||
}
|
||||
const ChartHypothesisCollection *GetStack() const {
|
||||
return m_stack;
|
||||
}
|
||||
|
||||
bool operator<(const ChartCellLabel &other) const
|
||||
{
|
||||
bool operator<(const ChartCellLabel &other) const {
|
||||
// m_coverage and m_label uniquely identify a ChartCellLabel, so don't
|
||||
// need to compare m_stack.
|
||||
if (m_coverage == other.m_coverage) {
|
||||
|
@ -42,27 +42,32 @@ class ChartCellLabelSet
|
||||
|
||||
ChartCellLabelSet(const WordsRange &coverage) : m_coverage(coverage) {}
|
||||
|
||||
const_iterator begin() const { return m_set.begin(); }
|
||||
const_iterator end() const { return m_set.end(); }
|
||||
const_iterator begin() const {
|
||||
return m_set.begin();
|
||||
}
|
||||
const_iterator end() const {
|
||||
return m_set.end();
|
||||
}
|
||||
|
||||
void AddWord(const Word &w)
|
||||
{
|
||||
void AddWord(const Word &w) {
|
||||
ChartCellLabel cellLabel(m_coverage, w);
|
||||
m_set.insert(cellLabel);
|
||||
}
|
||||
|
||||
void AddConstituent(const Word &w, const ChartHypothesisCollection &stack)
|
||||
{
|
||||
void AddConstituent(const Word &w, const ChartHypothesisCollection &stack) {
|
||||
ChartCellLabel cellLabel(m_coverage, w, &stack);
|
||||
m_set.insert(cellLabel);
|
||||
}
|
||||
|
||||
bool Empty() const { return m_set.empty(); }
|
||||
bool Empty() const {
|
||||
return m_set.empty();
|
||||
}
|
||||
|
||||
size_t GetSize() const { return m_set.size(); }
|
||||
size_t GetSize() const {
|
||||
return m_set.size();
|
||||
}
|
||||
|
||||
const ChartCellLabel *Find(const Word &w) const
|
||||
{
|
||||
const ChartCellLabel *Find(const Word &w) const {
|
||||
SetType::const_iterator p = m_set.find(ChartCellLabel(m_coverage, w));
|
||||
return p == m_set.end() ? 0 : &(*p);
|
||||
}
|
||||
|
@ -57,8 +57,7 @@ ChartHypothesis::ChartHypothesis(const ChartTranslationOption &transOpt,
|
||||
const std::vector<HypothesisDimension> &childEntries = item.GetHypothesisDimensions();
|
||||
m_prevHypos.reserve(childEntries.size());
|
||||
std::vector<HypothesisDimension>::const_iterator iter;
|
||||
for (iter = childEntries.begin(); iter != childEntries.end(); ++iter)
|
||||
{
|
||||
for (iter = childEntries.begin(); iter != childEntries.end(); ++iter) {
|
||||
m_prevHypos.push_back(iter->GetHypothesis());
|
||||
}
|
||||
}
|
||||
@ -98,8 +97,7 @@ void ChartHypothesis::CreateOutputPhrase(Phrase &outPhrase) const
|
||||
size_t nonTermInd = nonTermIndexMap[pos];
|
||||
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
|
||||
prevHypo->CreateOutputPhrase(outPhrase);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
outPhrase.AddWord(word);
|
||||
}
|
||||
}
|
||||
@ -125,8 +123,7 @@ int ChartHypothesis::RecombineCompare(const ChartHypothesis &compare) const
|
||||
// +1 = this > compare
|
||||
// 0 = this ==compare
|
||||
|
||||
for (unsigned i = 0; i < m_ffStates.size(); ++i)
|
||||
{
|
||||
for (unsigned i = 0; i < m_ffStates.size(); ++i) {
|
||||
if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL)
|
||||
comp = m_ffStates[i] - compare.m_ffStates[i];
|
||||
else
|
||||
@ -261,8 +258,7 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
|
||||
|
||||
// recombination
|
||||
if (hypo.GetWinningHypothesis() != NULL &&
|
||||
hypo.GetWinningHypothesis() != &hypo)
|
||||
{
|
||||
hypo.GetWinningHypothesis() != &hypo) {
|
||||
out << "->" << hypo.GetWinningHypothesis()->GetId();
|
||||
}
|
||||
|
||||
|
@ -94,7 +94,9 @@ public:
|
||||
|
||||
~ChartHypothesis();
|
||||
|
||||
unsigned GetId() const { return m_id; }
|
||||
unsigned GetId() const {
|
||||
return m_id;
|
||||
}
|
||||
|
||||
const ChartTranslationOption &GetTranslationOption()const {
|
||||
return m_transOpt;
|
||||
@ -111,7 +113,9 @@ public:
|
||||
inline const FFState* GetFFState( size_t featureID ) const {
|
||||
return m_ffStates[ featureID ];
|
||||
}
|
||||
inline const ChartManager& GetManager() const { return m_manager; }
|
||||
inline const ChartManager& GetManager() const {
|
||||
return m_manager;
|
||||
}
|
||||
|
||||
void CreateOutputPhrase(Phrase &outPhrase) const;
|
||||
Phrase GetOutputPhrase() const;
|
||||
|
@ -101,8 +101,7 @@ bool ChartHypothesisCollection::AddHypothesis(ChartHypothesis *hypo, ChartManage
|
||||
VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
|
||||
if (m_nBestIsEnabled) {
|
||||
hypoExisting->AddArc(hypo);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
ChartHypothesis::Delete(hypo);
|
||||
}
|
||||
return false;
|
||||
|
@ -113,7 +113,9 @@ public:
|
||||
return m_hyposOrdered;
|
||||
}
|
||||
|
||||
float GetBestScore() const { return m_bestScore; }
|
||||
float GetBestScore() const {
|
||||
return m_bestScore;
|
||||
}
|
||||
|
||||
void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const std::map<unsigned,bool> &reachable) const;
|
||||
|
||||
|
@ -258,16 +258,14 @@ void ChartManager::GetSearchGraph(long translationId, std::ostream &outputSearch
|
||||
void ChartManager::FindReachableHypotheses( const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable ) const
|
||||
{
|
||||
// do not recurse, if already visited
|
||||
if (reachable.find(hypo->GetId()) != reachable.end())
|
||||
{
|
||||
if (reachable.find(hypo->GetId()) != reachable.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// recurse
|
||||
reachable[ hypo->GetId() ] = true;
|
||||
const std::vector<const ChartHypothesis*> &previous = hypo->GetPrevHypos();
|
||||
for(std::vector<const ChartHypothesis*>::const_iterator i = previous.begin(); i != previous.end(); ++i)
|
||||
{
|
||||
for(std::vector<const ChartHypothesis*>::const_iterator i = previous.begin(); i != previous.end(); ++i) {
|
||||
FindReachableHypotheses( *i, reachable );
|
||||
}
|
||||
|
||||
|
@ -89,7 +89,9 @@ public:
|
||||
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
|
||||
}
|
||||
|
||||
unsigned GetNextHypoId() { return m_hypothesisId++; }
|
||||
unsigned GetNextHypoId() {
|
||||
return m_hypothesisId++;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -136,9 +136,7 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
|
||||
// word.
|
||||
endPos = absEndPos - 1;
|
||||
stackInd = relEndPos;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
endPos = absEndPos;
|
||||
stackInd = relEndPos + 1;
|
||||
}
|
||||
@ -247,9 +245,7 @@ void ChartRuleLookupManagerMemory::ExtendPartialRuleApplication(
|
||||
dottedRuleColl.Add(stackInd, rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
// loop over possible expansions of the rule
|
||||
PhraseDictionaryNodeSCFG::NonTerminalMap::const_iterator p;
|
||||
PhraseDictionaryNodeSCFG::NonTerminalMap::const_iterator end =
|
||||
|
@ -45,14 +45,15 @@ class ChartTranslationOption
|
||||
: m_dottedRule(dottedRule)
|
||||
, m_targetPhraseCollection(targetPhraseColl)
|
||||
, m_wordsRange(wordsRange)
|
||||
, m_estimateOfBestScore(0)
|
||||
{
|
||||
, m_estimateOfBestScore(0) {
|
||||
CalcEstimateOfBestScore(allChartCells);
|
||||
}
|
||||
|
||||
~ChartTranslationOption() {}
|
||||
|
||||
const DottedRule &GetDottedRule() const { return m_dottedRule; }
|
||||
const DottedRule &GetDottedRule() const {
|
||||
return m_dottedRule;
|
||||
}
|
||||
|
||||
const TargetPhraseCollection &GetTargetPhraseCollection() const {
|
||||
return m_targetPhraseCollection;
|
||||
@ -65,7 +66,9 @@ class ChartTranslationOption
|
||||
// return an estimate of the best score possible with this translation option.
|
||||
// the estimate is the sum of the top target phrase's estimated score plus the
|
||||
// scores of the best child hypotheses.
|
||||
inline float GetEstimateOfBestScore() const { return m_estimateOfBestScore; }
|
||||
inline float GetEstimateOfBestScore() const {
|
||||
return m_estimateOfBestScore;
|
||||
}
|
||||
|
||||
private:
|
||||
// not implemented
|
||||
|
@ -106,8 +106,8 @@ void ChartTranslationOptionCollection::ProcessUnknownWord(size_t startPos, size_
|
||||
return;
|
||||
}
|
||||
|
||||
if (startPos == 0 || startPos == m_source.GetSize() - 1)
|
||||
{ // don't create unknown words for <S> or </S> tags. Otherwise they can be moved. Should only be translated by glue rules
|
||||
if (startPos == 0 || startPos == m_source.GetSize() - 1) {
|
||||
// don't create unknown words for <S> or </S> tags. Otherwise they can be moved. Should only be translated by glue rules
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -70,8 +70,7 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &targetPhraseC
|
||||
m_collection.push_back(option);
|
||||
float score = option->GetEstimateOfBestScore();
|
||||
m_scoreThreshold = (score < m_scoreThreshold) ? score : m_scoreThreshold;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// full but not bursting. add if better than worst score
|
||||
ChartTranslationOption option(targetPhraseCollection, dottedRule,
|
||||
m_range, chartCellColl);
|
||||
|
@ -33,14 +33,18 @@ class ChartTrellisDetour
|
||||
ChartTrellisDetour(boost::shared_ptr<const ChartTrellisPath>,
|
||||
const ChartTrellisNode &, const ChartHypothesis &);
|
||||
|
||||
const ChartTrellisPath &GetBasePath() const { return *m_basePath; }
|
||||
const ChartTrellisPath &GetBasePath() const {
|
||||
return *m_basePath;
|
||||
}
|
||||
const ChartTrellisNode &GetSubstitutedNode() const {
|
||||
return m_substitutedNode;
|
||||
}
|
||||
const ChartHypothesis &GetReplacementHypo() const {
|
||||
return m_replacementHypo;
|
||||
}
|
||||
float GetTotalScore() const { return m_totalScore; }
|
||||
float GetTotalScore() const {
|
||||
return m_totalScore;
|
||||
}
|
||||
|
||||
private:
|
||||
boost::shared_ptr<const ChartTrellisPath> m_basePath;
|
||||
|
@ -21,13 +21,16 @@
|
||||
|
||||
#include "Util.h"
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
ChartTrellisDetourQueue::~ChartTrellisDetourQueue() {
|
||||
ChartTrellisDetourQueue::~ChartTrellisDetourQueue()
|
||||
{
|
||||
RemoveAllInColl(m_queue);
|
||||
}
|
||||
|
||||
void ChartTrellisDetourQueue::Push(const ChartTrellisDetour *detour) {
|
||||
void ChartTrellisDetourQueue::Push(const ChartTrellisDetour *detour)
|
||||
{
|
||||
if (m_capacity == 0 || m_queue.size() < m_capacity) {
|
||||
m_queue.insert(detour);
|
||||
} else if (detour->GetTotalScore() > (*m_queue.rbegin())->GetTotalScore()) {
|
||||
@ -43,7 +46,8 @@ void ChartTrellisDetourQueue::Push(const ChartTrellisDetour *detour) {
|
||||
}
|
||||
}
|
||||
|
||||
const ChartTrellisDetour *ChartTrellisDetourQueue::Pop() {
|
||||
const ChartTrellisDetour *ChartTrellisDetourQueue::Pop()
|
||||
{
|
||||
QueueType::iterator p = m_queue.begin();
|
||||
const ChartTrellisDetour *top = *p;
|
||||
m_queue.erase(p);
|
||||
|
@ -23,19 +23,23 @@
|
||||
|
||||
#include <set>
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
// A bounded priority queue of ChartTrellisDetour pointers. The top item is
|
||||
// the best scoring detour. The queue assumes ownership of pushed items and
|
||||
// relinquishes ownership when they are popped. Any remaining items at the
|
||||
// time of the queue's destruction are deleted.
|
||||
class ChartTrellisDetourQueue {
|
||||
class ChartTrellisDetourQueue
|
||||
{
|
||||
public:
|
||||
// Create empty queue with fixed capacity of c. Capacity 0 means unbounded.
|
||||
ChartTrellisDetourQueue(size_t c) : m_capacity(c) {}
|
||||
~ChartTrellisDetourQueue();
|
||||
|
||||
bool Empty() const { return m_queue.empty(); }
|
||||
bool Empty() const {
|
||||
return m_queue.empty();
|
||||
}
|
||||
|
||||
// Add the detour to the queue or delete it if the queue is full and the
|
||||
// score is no better than the queue's worst score.
|
||||
|
@ -40,11 +40,17 @@ class ChartTrellisNode
|
||||
|
||||
~ChartTrellisNode();
|
||||
|
||||
const ChartHypothesis &GetHypothesis() const { return m_hypo; }
|
||||
const ChartHypothesis &GetHypothesis() const {
|
||||
return m_hypo;
|
||||
}
|
||||
|
||||
const NodeChildren &GetChildren() const { return m_children; }
|
||||
const NodeChildren &GetChildren() const {
|
||||
return m_children;
|
||||
}
|
||||
|
||||
const ChartTrellisNode &GetChild(size_t i) const { return *m_children[i]; }
|
||||
const ChartTrellisNode &GetChild(size_t i) const {
|
||||
return *m_children[i];
|
||||
}
|
||||
|
||||
Phrase GetOutputPhrase() const;
|
||||
|
||||
|
@ -42,12 +42,18 @@ class ChartTrellisPath
|
||||
|
||||
~ChartTrellisPath();
|
||||
|
||||
const ChartTrellisNode &GetFinalNode() const { return *m_finalNode; }
|
||||
const ChartTrellisNode &GetFinalNode() const {
|
||||
return *m_finalNode;
|
||||
}
|
||||
|
||||
const ChartTrellisNode *GetDeviationPoint() const { return m_deviationPoint; }
|
||||
const ChartTrellisNode *GetDeviationPoint() const {
|
||||
return m_deviationPoint;
|
||||
}
|
||||
|
||||
//! get score for this path throught trellis
|
||||
float GetTotalScore() const { return m_totalScore; }
|
||||
float GetTotalScore() const {
|
||||
return m_totalScore;
|
||||
}
|
||||
|
||||
Phrase GetOutputPhrase() const;
|
||||
|
||||
|
@ -42,12 +42,24 @@ class DottedRule
|
||||
: m_cellLabel(&ccl)
|
||||
, m_prev(&prev) {}
|
||||
|
||||
const WordsRange &GetWordsRange() const { return m_cellLabel->GetCoverage(); }
|
||||
const Word &GetSourceWord() const { return m_cellLabel->GetLabel(); }
|
||||
bool IsNonTerminal() const { return m_cellLabel->GetLabel().IsNonTerminal(); }
|
||||
const DottedRule *GetPrev() const { return m_prev; }
|
||||
bool IsRoot() const { return m_prev == NULL; }
|
||||
const ChartCellLabel &GetChartCellLabel() const { return *m_cellLabel; }
|
||||
const WordsRange &GetWordsRange() const {
|
||||
return m_cellLabel->GetCoverage();
|
||||
}
|
||||
const Word &GetSourceWord() const {
|
||||
return m_cellLabel->GetLabel();
|
||||
}
|
||||
bool IsNonTerminal() const {
|
||||
return m_cellLabel->GetLabel().IsNonTerminal();
|
||||
}
|
||||
const DottedRule *GetPrev() const {
|
||||
return m_prev;
|
||||
}
|
||||
bool IsRoot() const {
|
||||
return m_prev == NULL;
|
||||
}
|
||||
const ChartCellLabel &GetChartCellLabel() const {
|
||||
return *m_cellLabel;
|
||||
}
|
||||
|
||||
private:
|
||||
const ChartCellLabel *m_cellLabel; // usually contains something, unless
|
||||
|
@ -46,7 +46,9 @@ class DottedRuleInMemory : public DottedRule
|
||||
: DottedRule(cellLabel, prev)
|
||||
, m_node(node) {}
|
||||
|
||||
const PhraseDictionaryNodeSCFG &GetLastNode() const { return m_node; }
|
||||
const PhraseDictionaryNodeSCFG &GetLastNode() const {
|
||||
return m_node;
|
||||
}
|
||||
|
||||
private:
|
||||
const PhraseDictionaryNodeSCFG &m_node;
|
||||
|
@ -48,10 +48,16 @@ class DottedRuleOnDisk : public DottedRule
|
||||
, m_lastNode(lastNode)
|
||||
, m_done(false) {}
|
||||
|
||||
const OnDiskPt::PhraseNode &GetLastNode() const { return m_lastNode; }
|
||||
const OnDiskPt::PhraseNode &GetLastNode() const {
|
||||
return m_lastNode;
|
||||
}
|
||||
|
||||
bool Done() const { return m_done; }
|
||||
void Done(bool value) const { m_done = value; }
|
||||
bool Done() const {
|
||||
return m_done;
|
||||
}
|
||||
void Done(bool value) const {
|
||||
m_done = value;
|
||||
}
|
||||
|
||||
private:
|
||||
const OnDiskPt::PhraseNode &m_lastNode;
|
||||
|
@ -22,10 +22,12 @@
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
|
||||
namespace randlm {
|
||||
namespace randlm
|
||||
{
|
||||
|
||||
template<typename T>
|
||||
class CacheNode {
|
||||
class CacheNode
|
||||
{
|
||||
public:
|
||||
typedef std::map<wordID_t, CacheNode<T>* > childMap;
|
||||
// initialise value to 'unknown' (i.e. not yet queried or cached).
|
||||
@ -36,7 +38,8 @@ namespace randlm {
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class Cache {
|
||||
class Cache
|
||||
{
|
||||
public:
|
||||
typedef typename std::map<wordID_t, CacheNode<T>* >::iterator childPtr;
|
||||
// unknown_value is used to indicate the ngram was not queried (yet)
|
||||
|
@ -20,7 +20,8 @@
|
||||
#include <cmath>
|
||||
#include "file.h"
|
||||
|
||||
namespace randlm {
|
||||
namespace randlm
|
||||
{
|
||||
|
||||
// Class Filter wraps a contiguous array of data. Filter and its subclasses
|
||||
// implement read/write/increment functionality on arrays with arbitrary sized addresses
|
||||
@ -31,7 +32,8 @@ namespace randlm {
|
||||
// and store in a uint16 in positions 0000 0001 111111 where the first 7 bits have
|
||||
// been masked out.
|
||||
template<typename T>
|
||||
class Filter {
|
||||
class Filter
|
||||
{
|
||||
public:
|
||||
Filter(uint64_t addresses, int width) : addresses_(addresses), width_(width), data_(NULL) {
|
||||
// number of bits in T
|
||||
@ -202,10 +204,18 @@ namespace randlm {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
// i/o
|
||||
uint64_t getAddresses() { return addresses_; }
|
||||
int getWidth() { return width_; }
|
||||
int getCellWidth() { return cell_width_; }
|
||||
uint32_t getCells() { return cells_; }
|
||||
uint64_t getAddresses() {
|
||||
return addresses_;
|
||||
}
|
||||
int getWidth() {
|
||||
return width_;
|
||||
}
|
||||
int getCellWidth() {
|
||||
return cell_width_;
|
||||
}
|
||||
uint32_t getCells() {
|
||||
return cells_;
|
||||
}
|
||||
virtual bool save(FileHandler* out) {
|
||||
CHECK(out != NULL);
|
||||
CHECK(out->write((char*)&cells_, sizeof(cells_)));
|
||||
@ -265,7 +275,8 @@ namespace randlm {
|
||||
};
|
||||
|
||||
// Extension with bit test/setter methods added
|
||||
class BitFilter : public Filter<uint8_t> {
|
||||
class BitFilter : public Filter<uint8_t>
|
||||
{
|
||||
public:
|
||||
BitFilter(uint64_t bits) : Filter<uint8_t>(bits, 1) {}
|
||||
BitFilter(FileHandler* fin, bool loaddata = true)
|
||||
|
@ -10,7 +10,8 @@ using namespace Moses;
|
||||
typedef uint64_t P; // largest input range is 2^64
|
||||
|
||||
template <typename T>
|
||||
class HashBase {
|
||||
class HashBase
|
||||
{
|
||||
protected:
|
||||
T m_; // range of hash output
|
||||
count_t H_; // number of hash functions to instantiate
|
||||
@ -26,7 +27,9 @@ class HashBase {
|
||||
virtual ~HashBase() {}
|
||||
virtual T hash(const char*s, count_t h)=0; // string hashing
|
||||
virtual T hash(const wordID_t* id, const int len, count_t h)=0; // vocab mapped hashing
|
||||
count_t size() { return H_;}
|
||||
count_t size() {
|
||||
return H_;
|
||||
}
|
||||
virtual void save(FileHandler* fout) {
|
||||
CHECK(fout != 0);
|
||||
fout->write((char*)&m_, sizeof(m_));
|
||||
@ -39,7 +42,8 @@ class HashBase {
|
||||
}
|
||||
};
|
||||
template <typename T>
|
||||
class UnivHash_linear: public HashBase<T> {
|
||||
class UnivHash_linear: public HashBase<T>
|
||||
{
|
||||
public:
|
||||
UnivHash_linear(float m, count_t H, P pr):
|
||||
HashBase<T>(m, H), pr_(pr) {
|
||||
@ -50,8 +54,12 @@ class UnivHash_linear: public HashBase<T> {
|
||||
HashBase<T>(fin) {
|
||||
load(fin);
|
||||
}
|
||||
~UnivHash_linear() {freeSeeds();}
|
||||
T hash(const char* s, count_t h){return 0;} //not implemented
|
||||
~UnivHash_linear() {
|
||||
freeSeeds();
|
||||
}
|
||||
T hash(const char* s, count_t h) {
|
||||
return 0; //not implemented
|
||||
}
|
||||
T hash(const wordID_t* id, const int len, count_t h);
|
||||
T hash(const wordID_t id, const count_t pos,
|
||||
const T prevValue, count_t h);
|
||||
@ -71,7 +79,8 @@ class UnivHash_linear: public HashBase<T> {
|
||||
* # of hash function = 2^(l-1)
|
||||
*/
|
||||
template <typename T>
|
||||
class UnivHash_noPrimes: public HashBase<T> {
|
||||
class UnivHash_noPrimes: public HashBase<T>
|
||||
{
|
||||
public:
|
||||
UnivHash_noPrimes(float k, float l):
|
||||
HashBase<T>(k, 100), d_(count_t((l-k))) {
|
||||
@ -83,7 +92,9 @@ class UnivHash_noPrimes: public HashBase<T> {
|
||||
HashBase<T>(fin) {
|
||||
load(fin);
|
||||
}
|
||||
~UnivHash_noPrimes() {freeSeeds();}
|
||||
~UnivHash_noPrimes() {
|
||||
freeSeeds();
|
||||
}
|
||||
T hash(const char* s, count_t h);
|
||||
T hash(const wordID_t* id, const int len, count_t h);
|
||||
T hash(const P x, count_t h);
|
||||
@ -93,34 +104,44 @@ class UnivHash_noPrimes: public HashBase<T> {
|
||||
count_t d_; // l-k
|
||||
P p_, *a_; // real-valued input range, storage
|
||||
void initSeeds();
|
||||
void freeSeeds() {delete[] a_;}
|
||||
void freeSeeds() {
|
||||
delete[] a_;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class Hash_shiftAddXOR: public HashBase<T> {
|
||||
class Hash_shiftAddXOR: public HashBase<T>
|
||||
{
|
||||
public:
|
||||
Hash_shiftAddXOR(float m, count_t H=5): HashBase<T>(m,H),
|
||||
l_(5), r_(2) {
|
||||
initSeeds();
|
||||
}
|
||||
~Hash_shiftAddXOR() {freeSeeds();}
|
||||
~Hash_shiftAddXOR() {
|
||||
freeSeeds();
|
||||
}
|
||||
T hash(const char* s, count_t h);
|
||||
T hash(const wordID_t* id, const int len, count_t h) {} // empty
|
||||
private:
|
||||
T* v_; // random seed storage
|
||||
const unsigned short l_, r_; // left-shift bits, right-shift bits
|
||||
void initSeeds();
|
||||
void freeSeeds() {delete[] v_;}
|
||||
void freeSeeds() {
|
||||
delete[] v_;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class UnivHash_tableXOR: public HashBase<T> {
|
||||
class UnivHash_tableXOR: public HashBase<T>
|
||||
{
|
||||
public:
|
||||
UnivHash_tableXOR(float m, count_t H=5): HashBase<T>(m, H),
|
||||
table_(NULL), tblLen_(255*MAX_STR_LEN) {
|
||||
initSeeds();
|
||||
}
|
||||
~UnivHash_tableXOR() {freeSeeds();}
|
||||
~UnivHash_tableXOR() {
|
||||
freeSeeds();
|
||||
}
|
||||
T hash(const char* s, count_t h);
|
||||
T hash(const wordID_t* id, const int len, count_t h) {}
|
||||
private:
|
||||
@ -132,13 +153,15 @@ class UnivHash_tableXOR: public HashBase<T> {
|
||||
|
||||
// ShiftAddXor
|
||||
template <typename T>
|
||||
void Hash_shiftAddXOR<T>::initSeeds() {
|
||||
void Hash_shiftAddXOR<T>::initSeeds()
|
||||
{
|
||||
v_ = new T[this->H_];
|
||||
for(count_t i=0; i < this->H_; i++)
|
||||
v_[i] = Utils::rand<T>() + 1;
|
||||
}
|
||||
template <typename T>
|
||||
T Hash_shiftAddXOR<T>::hash(const char* s, count_t h=0) {
|
||||
T Hash_shiftAddXOR<T>::hash(const char* s, count_t h=0)
|
||||
{
|
||||
T value = v_[h];
|
||||
int pos(0);
|
||||
unsigned char c;
|
||||
@ -150,7 +173,8 @@ T Hash_shiftAddXOR<T>::hash(const char* s, count_t h=0) {
|
||||
|
||||
// UnivHash_tableXOR
|
||||
template <typename T>
|
||||
void UnivHash_tableXOR<T>::initSeeds() {
|
||||
void UnivHash_tableXOR<T>::initSeeds()
|
||||
{
|
||||
// delete any values in table
|
||||
if(table_) freeSeeds();
|
||||
// instance of new table
|
||||
@ -164,14 +188,16 @@ void UnivHash_tableXOR<T>::initSeeds() {
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
void UnivHash_tableXOR<T>::freeSeeds() {
|
||||
void UnivHash_tableXOR<T>::freeSeeds()
|
||||
{
|
||||
for(count_t j = 0; j < this->H_; j++)
|
||||
delete[] table_[j];
|
||||
delete[] table_;
|
||||
table_ = NULL;
|
||||
}
|
||||
template <typename T>
|
||||
T UnivHash_tableXOR<T>::hash(const char* s, count_t h = 0) {
|
||||
T UnivHash_tableXOR<T>::hash(const char* s, count_t h = 0)
|
||||
{
|
||||
T value = 0;
|
||||
count_t pos = 0, idx = 0;
|
||||
unsigned char c;
|
||||
@ -183,7 +209,8 @@ T UnivHash_tableXOR<T>::hash(const char* s, count_t h = 0) {
|
||||
|
||||
// UnivHash_noPrimes
|
||||
template <typename T>
|
||||
void UnivHash_noPrimes<T>::initSeeds() {
|
||||
void UnivHash_noPrimes<T>::initSeeds()
|
||||
{
|
||||
a_ = new P[this->H_];
|
||||
for(T i=0; i < this->H_; i++) {
|
||||
a_[i] = Utils::rand<P>();
|
||||
@ -191,14 +218,16 @@ void UnivHash_noPrimes<T>::initSeeds() {
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
T UnivHash_noPrimes<T>::hash(const P x, count_t h=0) {
|
||||
T UnivHash_noPrimes<T>::hash(const P x, count_t h=0)
|
||||
{
|
||||
// h_a(x) = (ax mod 2^l) div 2^(l-k)
|
||||
T value = ((a_[h] * x) % p_) >> d_;
|
||||
return value % this->m_;
|
||||
}
|
||||
template <typename T>
|
||||
T UnivHash_noPrimes<T>::hash(const wordID_t* id, const int len,
|
||||
count_t h=0) {
|
||||
count_t h=0)
|
||||
{
|
||||
T value = 0;
|
||||
int pos(0);
|
||||
while(pos < len) {
|
||||
@ -208,7 +237,8 @@ T UnivHash_noPrimes<T>::hash(const wordID_t* id, const int len,
|
||||
return value % this->m_;
|
||||
}
|
||||
template <typename T>
|
||||
T UnivHash_noPrimes<T>::hash(const char* s, count_t h=0) {
|
||||
T UnivHash_noPrimes<T>::hash(const char* s, count_t h=0)
|
||||
{
|
||||
T value = 0;
|
||||
int pos(0);
|
||||
unsigned char c;
|
||||
@ -218,7 +248,8 @@ T UnivHash_noPrimes<T>::hash(const char* s, count_t h=0) {
|
||||
return value % this->m_;
|
||||
}
|
||||
template <typename T>
|
||||
void UnivHash_noPrimes<T>::save(FileHandler* fout) {
|
||||
void UnivHash_noPrimes<T>::save(FileHandler* fout)
|
||||
{
|
||||
HashBase<T>::save(fout);
|
||||
fout->write((char*)&p_, sizeof(p_));
|
||||
fout->write((char*)&d_, sizeof(d_));
|
||||
@ -227,20 +258,21 @@ void UnivHash_noPrimes<T>::save(FileHandler* fout) {
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
void UnivHash_noPrimes<T>::load(FileHandler* fin) {
|
||||
void UnivHash_noPrimes<T>::load(FileHandler* fin)
|
||||
{
|
||||
a_ = new P[this->H_];
|
||||
// HashBase<T>::load(fin) already done in constructor
|
||||
fin->read((char*)&p_, sizeof(p_));
|
||||
fin->read((char*)&d_, sizeof(d_));
|
||||
for(T i=0; i < this->H_; i++)
|
||||
{
|
||||
for(T i=0; i < this->H_; i++) {
|
||||
fin->read((char*)&a_[i], sizeof(a_[i]));
|
||||
}
|
||||
}
|
||||
|
||||
//UnivHash_linear
|
||||
template <typename T>
|
||||
void UnivHash_linear<T>::initSeeds() {
|
||||
void UnivHash_linear<T>::initSeeds()
|
||||
{
|
||||
a_ = new T*[this->H_];
|
||||
b_ = new T*[this->H_];
|
||||
for(count_t i=0; i < this->H_; i++) {
|
||||
@ -253,7 +285,8 @@ void UnivHash_linear<T>::initSeeds() {
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
void UnivHash_linear<T>::freeSeeds() {
|
||||
void UnivHash_linear<T>::freeSeeds()
|
||||
{
|
||||
for(count_t i=0; i < this->H_; i++) {
|
||||
delete[] a_[i];
|
||||
delete[] b_[i];
|
||||
@ -264,7 +297,8 @@ void UnivHash_linear<T>::freeSeeds() {
|
||||
}
|
||||
template <typename T>
|
||||
inline T UnivHash_linear<T>::hash(const wordID_t* id, const int len,
|
||||
count_t h=0) {
|
||||
count_t h=0)
|
||||
{
|
||||
CHECK(h < this->H_);
|
||||
T value = 0;
|
||||
int pos(0);
|
||||
@ -276,13 +310,15 @@ inline T UnivHash_linear<T>::hash(const wordID_t* id, const int len,
|
||||
}
|
||||
template <typename T>
|
||||
inline T UnivHash_linear<T>::hash(const wordID_t id, const count_t pos,
|
||||
const T prevValue, count_t h=0) {
|
||||
const T prevValue, count_t h=0)
|
||||
{
|
||||
CHECK(h < this->H_);
|
||||
T value = prevValue + ((a_[h][pos] * id) + b_[h][pos]); // % pr_;
|
||||
return value % this->m_;
|
||||
}
|
||||
template <typename T>
|
||||
void UnivHash_linear<T>::save(FileHandler* fout) {
|
||||
void UnivHash_linear<T>::save(FileHandler* fout)
|
||||
{
|
||||
// int bytes = sizeof(a_[0][0]);
|
||||
HashBase<T>::save(fout);
|
||||
fout->write((char*)&pr_, sizeof(pr_));
|
||||
@ -296,7 +332,8 @@ void UnivHash_linear<T>::save(FileHandler* fout) {
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
void UnivHash_linear<T>::load(FileHandler* fin) {
|
||||
void UnivHash_linear<T>::load(FileHandler* fin)
|
||||
{
|
||||
// HashBase<T>::load(fin) already done in constructor
|
||||
fin->read((char*)&pr_, sizeof(pr_));
|
||||
a_ = new T*[this->H_];
|
||||
|
@ -16,7 +16,8 @@ using randlm::Cache;
|
||||
const bool strict_checks_ = false;
|
||||
|
||||
template<typename T>
|
||||
class OnlineRLM: public PerfectHash<T> {
|
||||
class OnlineRLM: public PerfectHash<T>
|
||||
{
|
||||
public:
|
||||
OnlineRLM(uint16_t MBs, int width, int bucketRange, count_t order,
|
||||
Vocab* v, float qBase = 8): PerfectHash<T>(MBs, width, bucketRange, qBase),
|
||||
@ -58,8 +59,12 @@ public:
|
||||
bool bStrict = false);
|
||||
void remove(const std::vector<string>& ngram);
|
||||
count_t heurDelete(count_t num2del, count_t order = 5);
|
||||
uint64_t corpusSize() {return corpusSize_;}
|
||||
void corpusSize(uint64_t c) {corpusSize_ = c;}
|
||||
uint64_t corpusSize() {
|
||||
return corpusSize_;
|
||||
}
|
||||
void corpusSize(uint64_t c) {
|
||||
corpusSize_ = c;
|
||||
}
|
||||
void clearCache() {
|
||||
if(cache_) cache_->clear();
|
||||
}
|
||||
@ -87,7 +92,8 @@ private:
|
||||
BitFilter* bHit_;
|
||||
};
|
||||
template<typename T>
|
||||
bool OnlineRLM<T>::insert(const std::vector<string>& ngram, const int value) {
|
||||
bool OnlineRLM<T>::insert(const std::vector<string>& ngram, const int value)
|
||||
{
|
||||
int len = ngram.size();
|
||||
wordID_t wrdIDs[len];
|
||||
uint64_t index(this->cells_ + 1);
|
||||
@ -104,7 +110,8 @@ bool OnlineRLM<T>::insert(const std::vector<string>& ngram, const int value) {
|
||||
return true;
|
||||
}
|
||||
template<typename T>
|
||||
bool OnlineRLM<T>::update(const std::vector<string>& ngram, const int value) {
|
||||
bool OnlineRLM<T>::update(const std::vector<string>& ngram, const int value)
|
||||
{
|
||||
int len = ngram.size();
|
||||
wordID_t wrdIDs[len];
|
||||
uint64_t index(this->cells_ + 1);
|
||||
@ -120,13 +127,13 @@ bool OnlineRLM<T>::update(const std::vector<string>& ngram, const int value) {
|
||||
bIncluded = PerfectHash<T>::update2(wrdIDs, len, value, hpdItr, index);
|
||||
if(index < this->cells_) {
|
||||
markQueried(index);
|
||||
}
|
||||
else if(hpdItr != this->dict_.end()) markQueried(hpdItr);
|
||||
} else if(hpdItr != this->dict_.end()) markQueried(hpdItr);
|
||||
}
|
||||
return bIncluded;
|
||||
}
|
||||
template<typename T>
|
||||
int OnlineRLM<T>::query(const wordID_t* IDs, int len) {
|
||||
int OnlineRLM<T>::query(const wordID_t* IDs, int len)
|
||||
{
|
||||
uint64_t filterIdx = 0;
|
||||
hpdEntry_t hpdItr;
|
||||
int value(0);
|
||||
@ -135,8 +142,7 @@ int OnlineRLM<T>::query(const wordID_t* IDs, int len) {
|
||||
if(hpdItr != this->dict_.end()) {
|
||||
//markQueried(hpdItr); // mark this event as "hit"
|
||||
value -= ((value & this->hitMask_) != 0) ? this->hitMask_ : 0; // check for previous hit marks
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
CHECK(filterIdx < this->cells_);
|
||||
//markQueried(filterIdx);
|
||||
}
|
||||
@ -144,7 +150,8 @@ int OnlineRLM<T>::query(const wordID_t* IDs, int len) {
|
||||
return value > 0 ? value : 0;
|
||||
}
|
||||
template<typename T>
|
||||
bool OnlineRLM<T>::markPrefix(const wordID_t* IDs, const int len, bool bSet) {
|
||||
bool OnlineRLM<T>::markPrefix(const wordID_t* IDs, const int len, bool bSet)
|
||||
{
|
||||
if(len <= 1) return true; // only do this for for ngrams with context
|
||||
static Cache<int> pfCache(-1, -1); // local prefix cache
|
||||
int code(0);
|
||||
@ -161,8 +168,7 @@ bool OnlineRLM<T>::markPrefix(const wordID_t* IDs, const int len, bool bSet) {
|
||||
CHECK(hpdItr == this->dict_.end());
|
||||
if(bSet) bPrefix_->setBit(filterIndex); // mark index
|
||||
else bPrefix_->clearBit(filterIndex); // unset index
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
CHECK(filterIndex == this->cells_ + 1);
|
||||
//how to handle hpd prefixes?
|
||||
}
|
||||
@ -172,24 +178,28 @@ bool OnlineRLM<T>::markPrefix(const wordID_t* IDs, const int len, bool bSet) {
|
||||
return true;
|
||||
}
|
||||
template<typename T>
|
||||
void OnlineRLM<T>::markQueried(const uint64_t& index) {
|
||||
void OnlineRLM<T>::markQueried(const uint64_t& index)
|
||||
{
|
||||
bHit_->setBit(index);
|
||||
//cerr << "filter[" << index << "] = " << this->filter_->read(index) << endl;
|
||||
}
|
||||
template<typename T>
|
||||
void OnlineRLM<T>::markQueried(hpdEntry_t& value) {
|
||||
void OnlineRLM<T>::markQueried(hpdEntry_t& value)
|
||||
{
|
||||
// set high bit of counter to indicate "hit" status
|
||||
value->second |= this->hitMask_;
|
||||
}
|
||||
template<typename T>
|
||||
void OnlineRLM<T>::remove(const std::vector<string>& ngram) {
|
||||
void OnlineRLM<T>::remove(const std::vector<string>& ngram)
|
||||
{
|
||||
wordID_t IDs[ngram.size()];
|
||||
for(count_t i = 0; i < ngram.size(); ++i)
|
||||
IDs[i] = vocab_->GetWordID(ngram[i]);
|
||||
PerfectHash<T>::remove(IDs, ngram.size());
|
||||
}
|
||||
template<typename T>
|
||||
count_t OnlineRLM<T>::heurDelete(count_t num2del, count_t order) {
|
||||
count_t OnlineRLM<T>::heurDelete(count_t num2del, count_t order)
|
||||
{
|
||||
count_t deleted = 0;
|
||||
cout << "Deleting " << num2del << " of order "<< order << endl;
|
||||
// delete from filter first
|
||||
@ -220,7 +230,8 @@ count_t OnlineRLM<T>::heurDelete(count_t num2del, count_t order) {
|
||||
}
|
||||
template<typename T>
|
||||
int OnlineRLM<T>::sbsqQuery(const std::vector<string>& ngram, int* codes,
|
||||
bool bStrict) {
|
||||
bool bStrict)
|
||||
{
|
||||
wordID_t IDs[ngram.size()];
|
||||
for(count_t i = 0; i < ngram.size(); ++i)
|
||||
IDs[i] = vocab_->GetWordID(ngram[i]);
|
||||
@ -228,7 +239,8 @@ int OnlineRLM<T>::sbsqQuery(const std::vector<string>& ngram, int* codes,
|
||||
}
|
||||
template<typename T>
|
||||
int OnlineRLM<T>::sbsqQuery(const wordID_t* IDs, const int len, int* codes,
|
||||
bool bStrict) {
|
||||
bool bStrict)
|
||||
{
|
||||
uint64_t filterIdx = 0;
|
||||
int val(0), fnd(0);
|
||||
hpdEntry_t hpdItr;
|
||||
@ -240,8 +252,7 @@ int OnlineRLM<T>::sbsqQuery(const wordID_t* IDs, const int len, int* codes,
|
||||
if(hpdItr != this->dict_.end()) {
|
||||
val -= ((val & this->hitMask_) != 0) ? this->hitMask_ : 0; // account for previous hit marks
|
||||
}
|
||||
}
|
||||
else if(bStrict) {
|
||||
} else if(bStrict) {
|
||||
break;
|
||||
}
|
||||
// add to value array
|
||||
@ -256,7 +267,8 @@ int OnlineRLM<T>::sbsqQuery(const wordID_t* IDs, const int len, int* codes,
|
||||
}
|
||||
template<typename T>
|
||||
float OnlineRLM<T>::getProb(const wordID_t* ngram, int len,
|
||||
const void** state) {
|
||||
const void** state)
|
||||
{
|
||||
static const float oovprob = log10(1.0 / (static_cast<float>(vocab_->Size()) - 1));
|
||||
float logprob(0);
|
||||
const void* context = (state) ? *state : 0;
|
||||
@ -271,16 +283,14 @@ float OnlineRLM<T>::getProb(const wordID_t* ngram, int len,
|
||||
in[i] = query(&ngram[i], len - i);
|
||||
if(in[i] > 0) {
|
||||
num_fnd = len - i;
|
||||
}
|
||||
else if(strict_checks_) break;
|
||||
} else if(strict_checks_) break;
|
||||
}
|
||||
while(num_fnd > 1) { // get lower order count
|
||||
//get sub-context of size one less than length found (exluding target)
|
||||
if(((den_val = query(&ngram[len - num_fnd], num_fnd - 1)) > 0) &&
|
||||
(den_val >= in[len - num_fnd]) && (in[len - num_fnd] > 0)) {
|
||||
break;
|
||||
}
|
||||
else --num_fnd; // else backoff to lower ngram order
|
||||
} else --num_fnd; // else backoff to lower ngram order
|
||||
}
|
||||
if(num_fnd == 1 && (in[len - 1] < 1)) // sanity check for unigrams
|
||||
num_fnd = 0;
|
||||
@ -310,7 +320,8 @@ float OnlineRLM<T>::getProb(const wordID_t* ngram, int len,
|
||||
return logprob;
|
||||
}
|
||||
template<typename T>
|
||||
const void* OnlineRLM<T>::getContext(const wordID_t* ngram, int len) {
|
||||
const void* OnlineRLM<T>::getContext(const wordID_t* ngram, int len)
|
||||
{
|
||||
int dummy(0);
|
||||
float* addresses[len]; // only interested in addresses of cache
|
||||
CHECK(cache_->getCache2(ngram, len, &addresses[0], &dummy) == len);
|
||||
@ -318,7 +329,8 @@ const void* OnlineRLM<T>::getContext(const wordID_t* ngram, int len) {
|
||||
return (const void*)addresses[0];
|
||||
}
|
||||
template<typename T>
|
||||
void OnlineRLM<T>::randDelete(int num2del) {
|
||||
void OnlineRLM<T>::randDelete(int num2del)
|
||||
{
|
||||
int deleted = 0;
|
||||
for(uint64_t i = 0; i < this->cells_; i++) {
|
||||
if(this->filter_->read(i) != 0) {
|
||||
@ -329,7 +341,8 @@ void OnlineRLM<T>::randDelete(int num2del) {
|
||||
}
|
||||
}
|
||||
template<typename T>
|
||||
int OnlineRLM<T>::countHits() {
|
||||
int OnlineRLM<T>::countHits()
|
||||
{
|
||||
int hit(0);
|
||||
for(uint64_t i = 0; i < this->cells_; ++i)
|
||||
if(bHit_->testBit(i)) ++hit;
|
||||
@ -340,7 +353,8 @@ int OnlineRLM<T>::countHits() {
|
||||
return hit;
|
||||
}
|
||||
template<typename T>
|
||||
int OnlineRLM<T>::countPrefixes() {
|
||||
int OnlineRLM<T>::countPrefixes()
|
||||
{
|
||||
int pfx(0);
|
||||
for(uint64_t i = 0; i < this->cells_; ++i)
|
||||
if(bPrefix_->testBit(i)) ++pfx;
|
||||
@ -349,7 +363,8 @@ int OnlineRLM<T>::countPrefixes() {
|
||||
return pfx;
|
||||
}
|
||||
template<typename T>
|
||||
int OnlineRLM<T>::cleanUpHPD() {
|
||||
int OnlineRLM<T>::cleanUpHPD()
|
||||
{
|
||||
cerr << "HPD size before = " << this->dict_.size() << endl;
|
||||
std::vector<string> vDel, vtmp;
|
||||
iterate(this->dict_, itr) {
|
||||
@ -364,7 +379,8 @@ int OnlineRLM<T>::cleanUpHPD() {
|
||||
return vDel.size();
|
||||
}
|
||||
template<typename T>
|
||||
void OnlineRLM<T>::clearMarkings() {
|
||||
void OnlineRLM<T>::clearMarkings()
|
||||
{
|
||||
cerr << "clearing all event hits\n";
|
||||
bHit_->reset();
|
||||
count_t* value(0);
|
||||
@ -374,7 +390,8 @@ void OnlineRLM<T>::clearMarkings() {
|
||||
}
|
||||
}
|
||||
template<typename T>
|
||||
void OnlineRLM<T>::save(FileHandler* fout) {
|
||||
void OnlineRLM<T>::save(FileHandler* fout)
|
||||
{
|
||||
cerr << "Saving ORLM...\n";
|
||||
// save vocab
|
||||
vocab_->Save(fout);
|
||||
@ -387,7 +404,8 @@ void OnlineRLM<T>::save(FileHandler* fout) {
|
||||
cerr << "Finished saving ORLM." << endl;
|
||||
}
|
||||
template<typename T>
|
||||
void OnlineRLM<T>::load(FileHandler* fin) {
|
||||
void OnlineRLM<T>::load(FileHandler* fin)
|
||||
{
|
||||
cerr << "Loading ORLM...\n";
|
||||
// load vocab first
|
||||
vocab_ = new Vocab(fin);
|
||||
@ -402,7 +420,8 @@ void OnlineRLM<T>::load(FileHandler* fin) {
|
||||
PerfectHash<T>::load(fin);
|
||||
}
|
||||
template<typename T>
|
||||
void OnlineRLM<T>::removeNonMarked() {
|
||||
void OnlineRLM<T>::removeNonMarked()
|
||||
{
|
||||
cerr << "deleting all unused events\n";
|
||||
int deleted(0);
|
||||
for(uint64_t i = 0; i < this->cells_; ++i) {
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "params.h"
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
// parameter constants
|
||||
const std::string Parameters::kNotSetValue = "__NOT_SET__";
|
||||
|
||||
@ -13,17 +14,20 @@ const int Parameters::kUndefinedValue = -1;
|
||||
const std::string Parameters::kTrueValue = "1";
|
||||
const std::string Parameters::kFalseValue = "0";
|
||||
|
||||
Parameters::Parameters(const ParamDefs * paramdefs, const count_t paramNum) {
|
||||
Parameters::Parameters(const ParamDefs * paramdefs, const count_t paramNum)
|
||||
{
|
||||
initialize(paramdefs, paramNum);
|
||||
}
|
||||
|
||||
Parameters::Parameters(int argc, char ** argv, const ParamDefs * paramdefs,
|
||||
const count_t paramNum) {
|
||||
const count_t paramNum)
|
||||
{
|
||||
initialize(paramdefs, paramNum);
|
||||
loadParams(argc, argv);
|
||||
}
|
||||
|
||||
void Parameters::initialize(const ParamDefs * paramdefs, const count_t paramNum) {
|
||||
void Parameters::initialize(const ParamDefs * paramdefs, const count_t paramNum)
|
||||
{
|
||||
for( count_t i = 0; i < paramNum; i++ ) {
|
||||
params_[paramdefs[i].name] = paramdefs[i]; // assign name
|
||||
}
|
||||
@ -32,7 +36,8 @@ void Parameters::initialize(const ParamDefs * paramdefs, const count_t paramNum)
|
||||
cerr << "\t" << itr->first << " --> " << itr->second.value << endl;
|
||||
}
|
||||
|
||||
bool Parameters::loadParams(int argc, char ** argv) {
|
||||
bool Parameters::loadParams(int argc, char ** argv)
|
||||
{
|
||||
// load params from commandline args
|
||||
//if( argc < 3 ) {
|
||||
// fprintf(stderr, "ERROR: No parameters. Use \"-config\" or \"-f\" to specify configuration file.\n");
|
||||
@ -80,7 +85,8 @@ bool Parameters::loadParams(int argc, char ** argv) {
|
||||
return success;
|
||||
}
|
||||
|
||||
std::string Parameters::normaliseParamName(const std::string & name) {
|
||||
std::string Parameters::normaliseParamName(const std::string & name)
|
||||
{
|
||||
// Map valid abbreviations to long names. Retain other names.
|
||||
if( params_.find(name) == params_.end() )
|
||||
iterate(params_, i)
|
||||
@ -89,17 +95,20 @@ std::string Parameters::normaliseParamName(const std::string & name) {
|
||||
return name;
|
||||
}
|
||||
|
||||
int Parameters::getValueType(const std::string& name) {
|
||||
int Parameters::getValueType(const std::string& name)
|
||||
{
|
||||
if(params_.find(name) != params_.end())
|
||||
return params_[name].type;
|
||||
return Parameters::kUndefinedValue;
|
||||
}
|
||||
|
||||
bool Parameters::isValidParamName(const std::string & name) {
|
||||
bool Parameters::isValidParamName(const std::string & name)
|
||||
{
|
||||
return params_.find(name) != params_.end();
|
||||
}
|
||||
|
||||
bool Parameters::setParamValue(const std::string& name, const std::string& val) {
|
||||
bool Parameters::setParamValue(const std::string& name, const std::string& val)
|
||||
{
|
||||
// TODO: Add basic type checking w verifyValueType()
|
||||
bool set = isValidParamName(name);
|
||||
if(set) {
|
||||
@ -108,7 +117,8 @@ bool Parameters::setParamValue(const std::string& name, const std::string& val)
|
||||
}
|
||||
return( set );
|
||||
}
|
||||
std::string Parameters::getParamValue(const std::string& name) {
|
||||
std::string Parameters::getParamValue(const std::string& name)
|
||||
{
|
||||
std::string value = Parameters::kNotSetValue;
|
||||
if(isValidParamName(name))
|
||||
if(params_.find(name) != params_.end())
|
||||
@ -117,7 +127,8 @@ std::string Parameters::getParamValue(const std::string& name) {
|
||||
value = kFalseValue;
|
||||
return value;
|
||||
}
|
||||
std::string Parameters::getParam(const std::string& name) {
|
||||
std::string Parameters::getParam(const std::string& name)
|
||||
{
|
||||
return getParamValue(name);
|
||||
/*void* Parameters::getParam(const std::string& name) {
|
||||
void* paramVal = 0;
|
||||
@ -148,12 +159,14 @@ std::string Parameters::getParam(const std::string& name) {
|
||||
}
|
||||
return paramVal;*/
|
||||
}
|
||||
bool Parameters::verifyValueType(const std::string& name, const std::string& val) {
|
||||
bool Parameters::verifyValueType(const std::string& name, const std::string& val)
|
||||
{
|
||||
// Implement basic type checking
|
||||
return true;
|
||||
}
|
||||
|
||||
int Parameters::getParamCount() const {
|
||||
int Parameters::getParamCount() const
|
||||
{
|
||||
return params_.size();
|
||||
}
|
||||
|
||||
@ -161,7 +174,8 @@ int Parameters::getParamCount() const {
|
||||
* HAVE TO CHANGE loadParams() from file to not overwrite command lines but
|
||||
* override default if different*/
|
||||
bool Parameters::loadParams(const std::string & file_path,
|
||||
std::set<std::string>& setParams) {
|
||||
std::set<std::string>& setParams)
|
||||
{
|
||||
// parameters loaded from file don't override cmd line paramters
|
||||
/*std::set<std::string>::iterator end = setParams.end();
|
||||
FileHandler file(file_path.c_str(), std::ios::in);
|
||||
|
@ -12,7 +12,8 @@
|
||||
|
||||
#define NumOfParams(paramArray) (sizeof(paramArray)/sizeof((paramArray)[0]))
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
typedef struct ParamDefs {
|
||||
std::string name;
|
||||
std::string value;
|
||||
@ -21,7 +22,8 @@ typedef struct ParamDefs {
|
||||
std::string description;
|
||||
} ParamDefs;
|
||||
|
||||
class Parameters {
|
||||
class Parameters
|
||||
{
|
||||
public:
|
||||
static const std::string kNotSetValue;
|
||||
static const int kBoolValue;
|
||||
|
@ -18,7 +18,8 @@ typedef hpDict_t::iterator hpdEntry_t;
|
||||
static count_t collisions_ = 0;
|
||||
/* Based on Mortenson et. al. 2006 */
|
||||
template<typename T>
|
||||
class PerfectHash {
|
||||
class PerfectHash
|
||||
{
|
||||
public:
|
||||
PerfectHash(uint16_t MBs, int width, int bucketRange, float qBase);
|
||||
PerfectHash(FileHandler* fin) {
|
||||
@ -63,7 +64,8 @@ private:
|
||||
template<typename T>
|
||||
PerfectHash<T>::PerfectHash(uint16_t MBs, int width, int bucketRange,
|
||||
float qBase): hitMask_(1 << 31), memBound_(MBs * (1ULL << 20)),
|
||||
cellWidth_(width) {
|
||||
cellWidth_(width)
|
||||
{
|
||||
bucketRange_ = static_cast<uint8_t>(bucketRange);
|
||||
if(bucketRange > 255) {
|
||||
cerr << "ERROR: Max bucket range is > 2^8\n";
|
||||
@ -85,7 +87,8 @@ PerfectHash<T>::PerfectHash(uint16_t MBs, int width, int bucketRange,
|
||||
fingerHash_ = new UnivHash_linear<T>(pow(2.0f, cellWidth_), MAX_HASH_FUNCS, PRIME);
|
||||
}
|
||||
template<typename T>
|
||||
PerfectHash<T>::~PerfectHash() {
|
||||
PerfectHash<T>::~PerfectHash()
|
||||
{
|
||||
delete[] idxTracker_;
|
||||
delete filter_;
|
||||
filter_ = NULL;
|
||||
@ -96,7 +99,8 @@ PerfectHash<T>::~PerfectHash() {
|
||||
}
|
||||
template<typename T>
|
||||
uint64_t PerfectHash<T>::insert(const wordID_t* IDs, const int len,
|
||||
const count_t value) {
|
||||
const count_t value)
|
||||
{
|
||||
count_t bucket = (bucketHash_->size() > 1 ? bucketHash_->hash(IDs, len, len) : bucketHash_->hash(IDs, len));
|
||||
if(idxTracker_[bucket] < (int)bucketRange_) { // if empty rows
|
||||
// restriction on fprint value is non-zero
|
||||
@ -108,8 +112,7 @@ uint64_t PerfectHash<T>::insert(const wordID_t* IDs, const int len,
|
||||
T filterVal = filter_->read(index);
|
||||
if((filterVal == 0) && (emptyidx == cells_ + 1)) { // record first empty row
|
||||
emptyidx = index;
|
||||
}
|
||||
else if(filterVal == fp) {
|
||||
} else if(filterVal == fp) {
|
||||
++collisions_;
|
||||
dict_[hpDictKeyValue(IDs, len)] = value; // store exact in hpd
|
||||
return cells_ + 1; // finished
|
||||
@ -122,15 +125,15 @@ uint64_t PerfectHash<T>::insert(const wordID_t* IDs, const int len,
|
||||
values_->write(emptyidx, code);
|
||||
++idxTracker_[bucket]; // keep track of bucket size
|
||||
return emptyidx;
|
||||
}
|
||||
else { // bucket is full
|
||||
} else { // bucket is full
|
||||
dict_[hpDictKeyValue(IDs, len)] = value; // add to hpd
|
||||
return cells_ + 1;
|
||||
}
|
||||
}
|
||||
template<typename T>
|
||||
bool PerfectHash<T>::update(const wordID_t* IDs, const int len,
|
||||
const count_t value, hpdEntry_t& hpdAddr, uint64_t& filterIdx) {
|
||||
const count_t value, hpdEntry_t& hpdAddr, uint64_t& filterIdx)
|
||||
{
|
||||
// check if key is in high perf. dictionary
|
||||
filterIdx = cells_ + 1;
|
||||
string skey = hpDictKeyValue(IDs, len);
|
||||
@ -159,14 +162,14 @@ bool PerfectHash<T>::update(const wordID_t* IDs, const int len,
|
||||
}
|
||||
template<typename T>
|
||||
int PerfectHash<T>::query(const wordID_t* IDs, const int len,
|
||||
hpdEntry_t& hpdAddr, uint64_t& filterIdx) {
|
||||
hpdEntry_t& hpdAddr, uint64_t& filterIdx)
|
||||
{
|
||||
// check if key is in high perf. dictionary
|
||||
string skey = hpDictKeyValue(IDs, len);
|
||||
if((hpdAddr = dict_.find(skey)) != dict_.end()) {
|
||||
filterIdx = cells_ + 1;
|
||||
return(hpdAddr->second); // returns copy of value
|
||||
}
|
||||
else { // check if key is in filter
|
||||
} else { // check if key is in filter
|
||||
// get bucket
|
||||
//count_t bucket = bucketHash_->hash(IDs, len);
|
||||
count_t bucket = (bucketHash_->size() > 1 ? bucketHash_->hash(IDs, len, len) : bucketHash_->hash(IDs, len));
|
||||
@ -188,7 +191,8 @@ int PerfectHash<T>::query(const wordID_t* IDs, const int len,
|
||||
return -1;
|
||||
}
|
||||
template<typename T>
|
||||
void PerfectHash<T>::remove(const wordID_t* IDs, const int len) {
|
||||
void PerfectHash<T>::remove(const wordID_t* IDs, const int len)
|
||||
{
|
||||
// delete key if in high perf. dictionary
|
||||
string skey = hpDictKeyValue(IDs, len);
|
||||
if(dict_.find(skey) != dict_.end())
|
||||
@ -213,7 +217,8 @@ void PerfectHash<T>::remove(const wordID_t* IDs, const int len) {
|
||||
}
|
||||
}
|
||||
template<typename T> // clear filter index
|
||||
void PerfectHash<T>::remove(uint64_t index) {
|
||||
void PerfectHash<T>::remove(uint64_t index)
|
||||
{
|
||||
CHECK(index < cells_);
|
||||
CHECK(filter_->read(index) != 0); // slow
|
||||
filter_->write(index, 0);
|
||||
@ -224,7 +229,8 @@ void PerfectHash<T>::remove(uint64_t index) {
|
||||
}
|
||||
template<typename T>
|
||||
T PerfectHash<T>::nonZeroSignature(const wordID_t* IDs, const int len,
|
||||
count_t bucket) {
|
||||
count_t bucket)
|
||||
{
|
||||
count_t h = bucket;
|
||||
T fingerprint(0);
|
||||
do {
|
||||
@ -236,7 +242,8 @@ T PerfectHash<T>::nonZeroSignature(const wordID_t* IDs, const int len,
|
||||
return fingerprint;
|
||||
}
|
||||
template<typename T>
|
||||
string PerfectHash<T>::hpDictKeyValue(const wordID_t* IDs, const int len) {
|
||||
string PerfectHash<T>::hpDictKeyValue(const wordID_t* IDs, const int len)
|
||||
{
|
||||
string skey(" ");
|
||||
for(int i = 0; i < len; ++i)
|
||||
skey += Utils::IntToStr(IDs[i]) + "¬";
|
||||
@ -244,17 +251,20 @@ string PerfectHash<T>::hpDictKeyValue(const wordID_t* IDs, const int len) {
|
||||
return skey;
|
||||
}
|
||||
template<typename T>
|
||||
count_t PerfectHash<T>::hpDictMemUse() {
|
||||
count_t PerfectHash<T>::hpDictMemUse()
|
||||
{
|
||||
// return hpDict memory usage in MBs
|
||||
return (count_t) sizeof(hpDict_t::value_type)* dict_.size() >> 20;
|
||||
}
|
||||
template<typename T>
|
||||
count_t PerfectHash<T>::bucketsMemUse() {
|
||||
count_t PerfectHash<T>::bucketsMemUse()
|
||||
{
|
||||
// return bucket memory usage in MBs
|
||||
return (count_t) (filter_->size() + values_->size());
|
||||
}
|
||||
template<typename T>
|
||||
void PerfectHash<T>::save(FileHandler* fout) {
|
||||
void PerfectHash<T>::save(FileHandler* fout)
|
||||
{
|
||||
CHECK(fout != 0);
|
||||
cerr << "\tSaving perfect hash parameters...\n";
|
||||
fout->write((char*)&hitMask_, sizeof(hitMask_));
|
||||
@ -279,7 +289,8 @@ void PerfectHash<T>::save(FileHandler* fout) {
|
||||
*fout << t->first << "\t" << t->second << "\n";
|
||||
}
|
||||
template<typename T>
|
||||
void PerfectHash<T>::load(FileHandler* fin) {
|
||||
void PerfectHash<T>::load(FileHandler* fin)
|
||||
{
|
||||
CHECK(fin != 0);
|
||||
cerr << "\tLoading perfect hash parameters...\n";
|
||||
fin->read((char*)&hitMask_, sizeof(hitMask_));
|
||||
@ -315,7 +326,8 @@ void PerfectHash<T>::load(FileHandler* fin) {
|
||||
cerr << "Finished loading ORLM." << endl;
|
||||
}
|
||||
template<typename T>
|
||||
void PerfectHash<T>::analyze() {
|
||||
void PerfectHash<T>::analyze()
|
||||
{
|
||||
cerr << "Analyzing Dynamic Bloomier Filter...\n";
|
||||
// see how many items in each bucket
|
||||
uint8_t* bucketCnt = new uint8_t[totBuckets_];
|
||||
@ -328,16 +340,14 @@ void PerfectHash<T>::analyze() {
|
||||
if(filter_->read(i) != 0) {
|
||||
++bucketCnt[curBucket];
|
||||
++totalCellsSet;
|
||||
}
|
||||
else ++totalZeroes;
|
||||
} else ++totalZeroes;
|
||||
}
|
||||
count_t bi = 0, si = 0;
|
||||
for(int i = 0; i < totBuckets_; ++i) {
|
||||
if(bucketCnt[i] > largestBucket) {
|
||||
largestBucket = bucketCnt[i];
|
||||
bi = i;
|
||||
}
|
||||
else if(bucketCnt[i] < smallestBucket) {
|
||||
} else if(bucketCnt[i] < smallestBucket) {
|
||||
smallestBucket = bucketCnt[i];
|
||||
si = i;
|
||||
}
|
||||
@ -375,7 +385,8 @@ void PerfectHash<T>::analyze() {
|
||||
}
|
||||
template<typename T>
|
||||
bool PerfectHash<T>::update2(const wordID_t* IDs, const int len,
|
||||
const count_t value, hpdEntry_t& hpdAddr, uint64_t& filterIdx) {
|
||||
const count_t value, hpdEntry_t& hpdAddr, uint64_t& filterIdx)
|
||||
{
|
||||
// check if key is in high perf. dictionary
|
||||
filterIdx = cells_ + 1;
|
||||
string skey = hpDictKeyValue(IDs, len);
|
||||
|
@ -8,7 +8,8 @@
|
||||
#include "types.h"
|
||||
|
||||
static const float kFloatErr = 0.00001f;
|
||||
class LogQtizer {
|
||||
class LogQtizer
|
||||
{
|
||||
public:
|
||||
LogQtizer(float i): base_(pow(2, 1 / i)) {
|
||||
CHECK(base_ > 1);
|
||||
|
@ -103,7 +103,8 @@ bool Vocab::Load(const std::string & vocab_path, const FactorDirection& directio
|
||||
std::cerr << "Loading vocab from " << vocab_path << std::endl;
|
||||
return Load(&vcbin, direction, factors, closed);
|
||||
}
|
||||
bool Vocab::Load(FileHandler* vcbin) {
|
||||
bool Vocab::Load(FileHandler* vcbin)
|
||||
{
|
||||
FactorList factors;
|
||||
factors.push_back(0);
|
||||
return Load(vcbin, Input, factors);
|
||||
|
@ -30,20 +30,24 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
LanguageModel::LanguageModel() {
|
||||
LanguageModel::LanguageModel()
|
||||
{
|
||||
m_enableOOVFeature = StaticData::Instance().GetLMEnableOOVFeature();
|
||||
}
|
||||
|
||||
void LanguageModel::Init(ScoreIndexManager &scoreIndexManager) {
|
||||
void LanguageModel::Init(ScoreIndexManager &scoreIndexManager)
|
||||
{
|
||||
scoreIndexManager.AddScoreProducer(this);
|
||||
}
|
||||
|
||||
LanguageModel::~LanguageModel() {}
|
||||
|
||||
// don't inline virtual funcs...
|
||||
size_t LanguageModel::GetNumScoreComponents() const {
|
||||
size_t LanguageModel::GetNumScoreComponents() const
|
||||
{
|
||||
if (m_enableOOVFeature) {
|
||||
return 2;
|
||||
} else {
|
||||
@ -51,13 +55,15 @@ size_t LanguageModel::GetNumScoreComponents() const {
|
||||
}
|
||||
}
|
||||
|
||||
float LanguageModel::GetWeight() const {
|
||||
float LanguageModel::GetWeight() const
|
||||
{
|
||||
size_t lmIndex = StaticData::Instance().GetScoreIndexManager().
|
||||
GetBeginIndex(GetScoreBookkeepingID());
|
||||
return StaticData::Instance().GetAllWeights()[lmIndex];
|
||||
}
|
||||
|
||||
float LanguageModel::GetOOVWeight() const {
|
||||
float LanguageModel::GetOOVWeight() const
|
||||
{
|
||||
if (!m_enableOOVFeature) return 0;
|
||||
size_t lmIndex = StaticData::Instance().GetScoreIndexManager().
|
||||
GetBeginIndex(GetScoreBookkeepingID());
|
||||
|
@ -35,7 +35,8 @@ class Phrase;
|
||||
class ScoreIndexManager;
|
||||
|
||||
//! Abstract base class which represent a language model on a contiguous phrase
|
||||
class LanguageModel : public StatefulFeatureFunction {
|
||||
class LanguageModel : public StatefulFeatureFunction
|
||||
{
|
||||
protected:
|
||||
LanguageModel();
|
||||
|
||||
|
@ -10,10 +10,12 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
LanguageModelDMapLM::LanguageModelDMapLM() : m_lm(0) {
|
||||
LanguageModelDMapLM::LanguageModelDMapLM() : m_lm(0)
|
||||
{
|
||||
}
|
||||
|
||||
LanguageModelDMapLM::~LanguageModelDMapLM() {
|
||||
LanguageModelDMapLM::~LanguageModelDMapLM()
|
||||
{
|
||||
delete m_lm;
|
||||
}
|
||||
|
||||
@ -97,38 +99,44 @@ float LanguageModelDMapLM::GetValue(
|
||||
return score;
|
||||
}
|
||||
|
||||
const FFState* LanguageModelDMapLM::GetNullContextState() const {
|
||||
const FFState* LanguageModelDMapLM::GetNullContextState() const
|
||||
{
|
||||
DMapLMState* state = new DMapLMState();
|
||||
state->m_last_succeeding_order = GetNGramOrder();
|
||||
return state;
|
||||
}
|
||||
|
||||
FFState* LanguageModelDMapLM::GetNewSentenceState() const {
|
||||
FFState* LanguageModelDMapLM::GetNewSentenceState() const
|
||||
{
|
||||
DMapLMState* state = new DMapLMState();
|
||||
state->m_last_succeeding_order = GetNGramOrder();
|
||||
return state;
|
||||
}
|
||||
|
||||
const FFState* LanguageModelDMapLM::GetBeginSentenceState() const {
|
||||
const FFState* LanguageModelDMapLM::GetBeginSentenceState() const
|
||||
{
|
||||
DMapLMState* state = new DMapLMState();
|
||||
state->m_last_succeeding_order = GetNGramOrder();
|
||||
return state;
|
||||
}
|
||||
|
||||
FFState* LanguageModelDMapLM::NewState(const FFState* state) const {
|
||||
FFState* LanguageModelDMapLM::NewState(const FFState* state) const
|
||||
{
|
||||
DMapLMState* new_state = new DMapLMState();
|
||||
const DMapLMState* cast_state = static_cast<const DMapLMState*>(state);
|
||||
new_state->m_last_succeeding_order = cast_state->m_last_succeeding_order;
|
||||
return new_state;
|
||||
}
|
||||
|
||||
void LanguageModelDMapLM::CleanUpAfterSentenceProcessing() {
|
||||
void LanguageModelDMapLM::CleanUpAfterSentenceProcessing()
|
||||
{
|
||||
m_lm->printStats();
|
||||
m_lm->resetStats();
|
||||
m_lm->clearCaches();
|
||||
}
|
||||
|
||||
void LanguageModelDMapLM::InitializeBeforeSentenceProcessing() {
|
||||
void LanguageModelDMapLM::InitializeBeforeSentenceProcessing()
|
||||
{
|
||||
}
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -12,9 +12,11 @@
|
||||
#include "LM/SingleFactor.h"
|
||||
#include "Util.h"
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class DMapLMState : public FFState {
|
||||
class DMapLMState : public FFState
|
||||
{
|
||||
public:
|
||||
int Compare(const FFState &o) const {
|
||||
const DMapLMState& cast_other = static_cast<const DMapLMState&>(o);
|
||||
|
@ -69,7 +69,8 @@ void LanguageModelImplementation::GetState(
|
||||
}
|
||||
|
||||
// Calculate score of a phrase.
|
||||
void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const {
|
||||
void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
|
||||
{
|
||||
fullScore = 0;
|
||||
ngramScore = 0;
|
||||
|
||||
@ -116,7 +117,8 @@ void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullSco
|
||||
}
|
||||
}
|
||||
|
||||
FFState *LanguageModelImplementation::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out, const LanguageModel *feature) const {
|
||||
FFState *LanguageModelImplementation::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out, const LanguageModel *feature) const
|
||||
{
|
||||
// In this function, we only compute the LM scores of n-grams that overlap a
|
||||
// phrase boundary. Phrase-internal scores are taken directly from the
|
||||
// translation option.
|
||||
@ -178,9 +180,7 @@ FFState *LanguageModelImplementation::Evaluate(const Hypothesis &hypo, const FFS
|
||||
contextFactor[i] = &hypo.GetWord((size_t)currPos);
|
||||
}
|
||||
lmScore += GetValueForgotState(contextFactor, *res).score;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
if (endPos < currEndPos) {
|
||||
//need to get the LM state (otherwise the last LM state is fine)
|
||||
for (size_t currPos = endPos+1; currPos <= currEndPos; currPos++) {
|
||||
@ -207,7 +207,8 @@ FFState *LanguageModelImplementation::Evaluate(const Hypothesis &hypo, const FFS
|
||||
return res;
|
||||
}
|
||||
|
||||
namespace {
|
||||
namespace
|
||||
{
|
||||
|
||||
// This is the FFState used by LanguageModelImplementation::EvaluateChart.
|
||||
// Though svn blame goes back to heafield, don't blame me. I just moved this from LanguageModelChartState.cpp and ChartHypothesis.cpp.
|
||||
@ -227,8 +228,7 @@ private:
|
||||
* \param ret prefix string
|
||||
* \param size maximum size (typically max lm context window)
|
||||
*/
|
||||
size_t CalcPrefix(const ChartHypothesis &hypo, int featureID, Phrase &ret, size_t size) const
|
||||
{
|
||||
size_t CalcPrefix(const ChartHypothesis &hypo, int featureID, Phrase &ret, size_t size) const {
|
||||
const TargetPhrase &target = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
target.GetAlignmentInfo().GetNonTermIndexMap();
|
||||
@ -262,8 +262,7 @@ private:
|
||||
* \param ret suffix phrase
|
||||
* \param size maximum size of suffix
|
||||
*/
|
||||
size_t CalcSuffix(const ChartHypothesis &hypo, int featureID, Phrase &ret, size_t size) const
|
||||
{
|
||||
size_t CalcSuffix(const ChartHypothesis &hypo, int featureID, Phrase &ret, size_t size) const {
|
||||
CHECK(m_contextPrefix.GetSize() <= m_numTargetTerminals);
|
||||
|
||||
// special handling for small hypotheses
|
||||
@ -292,8 +291,7 @@ private:
|
||||
size_t nonTermInd = nonTermIndexMap[pos];
|
||||
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
|
||||
size = static_cast<const LanguageModelChartState*>(prevHypo->GetFFState(featureID))->CalcSuffix(*prevHypo, featureID, ret, size);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
ret.PrependWord(hypo.GetCurrTargetPhrase().GetWord(pos));
|
||||
size--;
|
||||
}
|
||||
@ -312,8 +310,7 @@ public:
|
||||
:m_lmRightContext(NULL)
|
||||
,m_contextPrefix(order - 1)
|
||||
,m_contextSuffix( order - 1)
|
||||
,m_hypo(hypo)
|
||||
{
|
||||
,m_hypo(hypo) {
|
||||
m_numTargetTerminals = hypo.GetCurrTargetPhrase().GetNumTerminals();
|
||||
|
||||
for (std::vector<const ChartHypothesis*>::const_iterator i = hypo.GetPrevHypos().begin(); i != hypo.GetPrevHypos().end(); ++i) {
|
||||
@ -334,8 +331,12 @@ public:
|
||||
m_lmRightContext = rightState;
|
||||
}
|
||||
|
||||
float GetPrefixScore() const { return m_prefixScore; }
|
||||
FFState* GetRightContext() const { return m_lmRightContext; }
|
||||
float GetPrefixScore() const {
|
||||
return m_prefixScore;
|
||||
}
|
||||
FFState* GetRightContext() const {
|
||||
return m_lmRightContext;
|
||||
}
|
||||
|
||||
size_t GetNumTargetTerminals() const {
|
||||
return m_numTargetTerminals;
|
||||
@ -353,8 +354,7 @@ public:
|
||||
dynamic_cast<const LanguageModelChartState &>( o );
|
||||
|
||||
// prefix
|
||||
if (m_hypo.GetCurrSourceRange().GetStartPos() > 0) // not for "<s> ..."
|
||||
{
|
||||
if (m_hypo.GetCurrSourceRange().GetStartPos() > 0) { // not for "<s> ..."
|
||||
int ret = GetPrefix().Compare(other.GetPrefix());
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
@ -362,8 +362,7 @@ public:
|
||||
|
||||
// suffix
|
||||
size_t inputSize = m_hypo.GetManager().GetSource().GetSize();
|
||||
if (m_hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1)// not for "... </s>"
|
||||
{
|
||||
if (m_hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1) { // not for "... </s>"
|
||||
int ret = other.GetRightContext()->Compare(*m_lmRightContext);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
@ -374,7 +373,8 @@ public:
|
||||
|
||||
} // namespace
|
||||
|
||||
FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection* out, const LanguageModel *scorer) const {
|
||||
FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection* out, const LanguageModel *scorer) const
|
||||
{
|
||||
LanguageModelChartState *ret = new LanguageModelChartState(hypo, featureID, GetNGramOrder());
|
||||
// data structure for factored context phrase (history and predicted word)
|
||||
vector<const Word*> contextFactor;
|
||||
@ -394,33 +394,28 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
|
||||
// loop over rule
|
||||
for (size_t phrasePos = 0, wordPos = 0;
|
||||
phrasePos < hypo.GetCurrTargetPhrase().GetSize();
|
||||
phrasePos++)
|
||||
{
|
||||
phrasePos++) {
|
||||
// consult rule for either word or non-terminal
|
||||
const Word &word = hypo.GetCurrTargetPhrase().GetWord(phrasePos);
|
||||
|
||||
// regular word
|
||||
if (!word.IsNonTerminal())
|
||||
{
|
||||
if (!word.IsNonTerminal()) {
|
||||
ShiftOrPush(contextFactor, word);
|
||||
|
||||
// beginning of sentence symbol <s>? -> just update state
|
||||
if (word == GetSentenceStartArray())
|
||||
{
|
||||
if (word == GetSentenceStartArray()) {
|
||||
CHECK(phrasePos == 0);
|
||||
delete lmState;
|
||||
lmState = NewState( GetBeginSentenceState() );
|
||||
}
|
||||
// score a regular word added by the rule
|
||||
else
|
||||
{
|
||||
else {
|
||||
updateChartScore( &prefixScore, &finalizedScore, UntransformLMScore(GetValueGivenState(contextFactor, *lmState).score), ++wordPos );
|
||||
}
|
||||
}
|
||||
|
||||
// non-terminal, add phrase from underlying hypothesis
|
||||
else
|
||||
{
|
||||
else {
|
||||
// look up underlying hypothesis
|
||||
size_t nonTermIndex = nonTermIndexMap[phrasePos];
|
||||
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
|
||||
@ -444,8 +439,7 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
|
||||
// push suffix
|
||||
int suffixPos = prevState->GetSuffix().GetSize() - (GetNGramOrder()-1);
|
||||
if (suffixPos < 0) suffixPos = 0; // push all words if less than order
|
||||
for(;(size_t)suffixPos < prevState->GetSuffix().GetSize(); suffixPos++)
|
||||
{
|
||||
for(; (size_t)suffixPos < prevState->GetSuffix().GetSize(); suffixPos++) {
|
||||
const Word &word = prevState->GetSuffix().GetWord(suffixPos);
|
||||
ShiftOrPush(contextFactor, word);
|
||||
wordPos++;
|
||||
@ -453,22 +447,19 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
|
||||
}
|
||||
|
||||
// internal non-terminal
|
||||
else
|
||||
{
|
||||
else {
|
||||
// score its prefix
|
||||
for(size_t prefixPos = 0;
|
||||
prefixPos < GetNGramOrder()-1 // up to LM order window
|
||||
&& prefixPos < subPhraseLength; // up to length
|
||||
prefixPos++)
|
||||
{
|
||||
prefixPos++) {
|
||||
const Word &word = prevState->GetPrefix().GetWord(prefixPos);
|
||||
ShiftOrPush(contextFactor, word);
|
||||
updateChartScore( &prefixScore, &finalizedScore, UntransformLMScore(GetValueGivenState(contextFactor, *lmState).score), ++wordPos );
|
||||
}
|
||||
|
||||
// check if we are dealing with a large sub-phrase
|
||||
if (subPhraseLength > GetNGramOrder() - 1)
|
||||
{
|
||||
if (subPhraseLength > GetNGramOrder() - 1) {
|
||||
// add its finalized language model score
|
||||
finalizedScore +=
|
||||
prevHypo->GetScoreBreakdown().GetScoresForProducer(scorer)[0] // full score
|
||||
@ -503,11 +494,11 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void LanguageModelImplementation::updateChartScore(float *prefixScore, float *finalizedScore, float score, size_t wordPos) const {
|
||||
void LanguageModelImplementation::updateChartScore(float *prefixScore, float *finalizedScore, float score, size_t wordPos) const
|
||||
{
|
||||
if (wordPos < GetNGramOrder()) {
|
||||
*prefixScore += score;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
*finalizedScore += score;
|
||||
}
|
||||
}
|
||||
|
@ -126,7 +126,8 @@ public:
|
||||
virtual void CleanUpAfterSentenceProcessing() {};
|
||||
};
|
||||
|
||||
class LMRefCount : public LanguageModel {
|
||||
class LMRefCount : public LanguageModel
|
||||
{
|
||||
public:
|
||||
LMRefCount(ScoreIndexManager &scoreIndexManager, LanguageModelImplementation *impl) : m_impl(impl) {
|
||||
Init(scoreIndexManager);
|
||||
|
@ -43,8 +43,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses {
|
||||
namespace {
|
||||
namespace Moses
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
struct KenLMState : public FFState {
|
||||
lm::ngram::State state;
|
||||
@ -59,7 +61,8 @@ struct KenLMState : public FFState {
|
||||
/*
|
||||
* An implementation of single factor LM using Ken's code.
|
||||
*/
|
||||
template <class Model> class LanguageModelKen : public LanguageModel {
|
||||
template <class Model> class LanguageModelKen : public LanguageModel
|
||||
{
|
||||
public:
|
||||
LanguageModelKen(const std::string &file, ScoreIndexManager &manager, FactorType factorType, bool lazy);
|
||||
|
||||
@ -119,7 +122,8 @@ template <class Model> class LanguageModelKen : public LanguageModel {
|
||||
const Factor *m_beginSentenceFactor;
|
||||
};
|
||||
|
||||
class MappingBuilder : public lm::EnumerateVocab {
|
||||
class MappingBuilder : public lm::EnumerateVocab
|
||||
{
|
||||
public:
|
||||
MappingBuilder(FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
|
||||
: m_factorCollection(factorCollection), m_mapping(mapping) {}
|
||||
@ -138,11 +142,13 @@ private:
|
||||
std::vector<lm::WordIndex> &m_mapping;
|
||||
};
|
||||
|
||||
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &file, ScoreIndexManager &manager, FactorType factorType, bool lazy) : m_factorType(factorType) {
|
||||
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &file, ScoreIndexManager &manager, FactorType factorType, bool lazy) : m_factorType(factorType)
|
||||
{
|
||||
lm::ngram::Config config;
|
||||
IFVERBOSE(1) {
|
||||
config.messages = &std::cerr;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
config.messages = NULL;
|
||||
}
|
||||
FactorCollection &collection = FactorCollection::Instance();
|
||||
@ -156,7 +162,8 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
|
||||
Init(manager);
|
||||
}
|
||||
|
||||
template <class Model> LanguageModel *LanguageModelKen<Model>::Duplicate(ScoreIndexManager &manager) const {
|
||||
template <class Model> LanguageModel *LanguageModelKen<Model>::Duplicate(ScoreIndexManager &manager) const
|
||||
{
|
||||
return new LanguageModelKen<Model>(manager, *this);
|
||||
}
|
||||
|
||||
@ -165,11 +172,13 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(ScoreIndexManag
|
||||
// TODO: don't copy this.
|
||||
m_lmIdLookup(copy_from.m_lmIdLookup),
|
||||
m_factorType(copy_from.m_factorType),
|
||||
m_beginSentenceFactor(copy_from.m_beginSentenceFactor) {
|
||||
m_beginSentenceFactor(copy_from.m_beginSentenceFactor)
|
||||
{
|
||||
Init(manager);
|
||||
}
|
||||
|
||||
template <class Model> void LanguageModelKen<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const {
|
||||
template <class Model> void LanguageModelKen<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
|
||||
{
|
||||
fullScore = 0;
|
||||
ngramScore = 0;
|
||||
oovCount = 0;
|
||||
@ -210,7 +219,8 @@ template <class Model> void LanguageModelKen<Model>::CalcScore(const Phrase &phr
|
||||
}
|
||||
}
|
||||
|
||||
template <class Model> FFState *LanguageModelKen<Model>::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const {
|
||||
template <class Model> FFState *LanguageModelKen<Model>::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
|
||||
{
|
||||
const lm::ngram::State &in_state = static_cast<const KenLMState&>(*ps).state;
|
||||
|
||||
std::auto_ptr<KenLMState> ret(new KenLMState());
|
||||
@ -265,15 +275,19 @@ template <class Model> FFState *LanguageModelKen<Model>::Evaluate(const Hypothes
|
||||
return ret.release();
|
||||
}
|
||||
|
||||
class LanguageModelChartStateKenLM : public FFState {
|
||||
class LanguageModelChartStateKenLM : public FFState
|
||||
{
|
||||
public:
|
||||
LanguageModelChartStateKenLM() {}
|
||||
|
||||
const lm::ngram::ChartState &GetChartState() const { return m_state; }
|
||||
lm::ngram::ChartState &GetChartState() { return m_state; }
|
||||
const lm::ngram::ChartState &GetChartState() const {
|
||||
return m_state;
|
||||
}
|
||||
lm::ngram::ChartState &GetChartState() {
|
||||
return m_state;
|
||||
}
|
||||
|
||||
int Compare(const FFState& o) const
|
||||
{
|
||||
int Compare(const FFState& o) const {
|
||||
const LanguageModelChartStateKenLM &other = static_cast<const LanguageModelChartStateKenLM&>(o);
|
||||
int ret = m_state.Compare(other.m_state);
|
||||
return ret;
|
||||
@ -283,7 +297,8 @@ class LanguageModelChartStateKenLM : public FFState {
|
||||
lm::ngram::ChartState m_state;
|
||||
};
|
||||
|
||||
template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const {
|
||||
template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const
|
||||
{
|
||||
LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM();
|
||||
lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
|
||||
@ -323,7 +338,8 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const Cha
|
||||
|
||||
} // namespace
|
||||
|
||||
LanguageModel *ConstructKenLM(const std::string &file, ScoreIndexManager &manager, FactorType factorType, bool lazy) {
|
||||
LanguageModel *ConstructKenLM(const std::string &file, ScoreIndexManager &manager, FactorType factorType, bool lazy)
|
||||
{
|
||||
try {
|
||||
lm::ngram::ModelType model_type;
|
||||
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
|
||||
|
@ -26,7 +26,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
#include "TypeDef.h"
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class ScoreIndexManager;
|
||||
class LanguageModel;
|
||||
|
@ -12,7 +12,8 @@ using std::map;
|
||||
namespace Moses
|
||||
{
|
||||
bool LanguageModelORLM::Load(const std::string &filePath, FactorType factorType,
|
||||
size_t nGramOrder) {
|
||||
size_t nGramOrder)
|
||||
{
|
||||
cerr << "Loading LanguageModelORLM..." << endl;
|
||||
m_filePath = filePath;
|
||||
m_factorType = factorType;
|
||||
@ -26,7 +27,8 @@ bool LanguageModelORLM::Load(const std::string &filePath, FactorType factorType,
|
||||
CreateFactors();
|
||||
return true;
|
||||
}
|
||||
void LanguageModelORLM::CreateFactors() {
|
||||
void LanguageModelORLM::CreateFactors()
|
||||
{
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
size_t maxFactorId = 0; // to create lookup vector later on
|
||||
std::map<size_t, wordID_t> m_lmids_map; // map from factor id -> word id
|
||||
@ -58,15 +60,18 @@ void LanguageModelORLM::CreateFactors() {
|
||||
iter != m_lmids_map.end() ; ++iter)
|
||||
lm_ids_vec_[iter->first] = iter->second;
|
||||
}
|
||||
wordID_t LanguageModelORLM::GetLmID(const std::string& str) const {
|
||||
wordID_t LanguageModelORLM::GetLmID(const std::string& str) const
|
||||
{
|
||||
return m_lm->vocab_->GetWordID(str);
|
||||
}
|
||||
wordID_t LanguageModelORLM::GetLmID(const Factor* factor) const {
|
||||
wordID_t LanguageModelORLM::GetLmID(const Factor* factor) const
|
||||
{
|
||||
size_t factorId = factor->GetId();
|
||||
return (factorId >= lm_ids_vec_.size()) ? m_oov_id : lm_ids_vec_[factorId];
|
||||
}
|
||||
LMResult LanguageModelORLM::GetValue(const std::vector<const Word*> &contextFactor,
|
||||
State* finalState) const {
|
||||
State* finalState) const
|
||||
{
|
||||
FactorType factorType = GetFactorType();
|
||||
// set up context
|
||||
//std::vector<long unsigned int> factor(1,0);
|
||||
@ -88,7 +93,8 @@ LMResult LanguageModelORLM::GetValue(const std::vector<const Word*> &contextFact
|
||||
*/
|
||||
return ret;
|
||||
}
|
||||
bool LanguageModelORLM::UpdateORLM(const std::vector<string>& ngram, const int value) {
|
||||
bool LanguageModelORLM::UpdateORLM(const std::vector<string>& ngram, const int value)
|
||||
{
|
||||
/*cerr << "Inserting into ORLM: \"";
|
||||
iterate(ngram, nit)
|
||||
cerr << *nit << " ";
|
||||
|
@ -15,7 +15,8 @@ namespace Moses
|
||||
class Factor;
|
||||
class Phrase;
|
||||
|
||||
class LanguageModelORLM : public LanguageModelPointerState {
|
||||
class LanguageModelORLM : public LanguageModelPointerState
|
||||
{
|
||||
public:
|
||||
typedef count_t T; // type for ORLM filter
|
||||
LanguageModelORLM()
|
||||
@ -30,7 +31,9 @@ public:
|
||||
fout.close();
|
||||
delete m_lm;
|
||||
}
|
||||
void CleanUpAfterSentenceProcessing() {m_lm->clearCache();} // clear caches
|
||||
void CleanUpAfterSentenceProcessing() {
|
||||
m_lm->clearCache(); // clear caches
|
||||
}
|
||||
void InitializeBeforeSentenceProcessing() { // nothing to do
|
||||
//m_lm->initThreadSpecificData(); // Creates thread specific data iff
|
||||
// compiled with multithreading.
|
||||
|
@ -347,7 +347,8 @@ const FFState *LanguageModelParallelBackoff::GetBeginSentenceState() const
|
||||
|
||||
}
|
||||
|
||||
LanguageModelMultiFactor *NewParallelBackoff() {
|
||||
LanguageModelMultiFactor *NewParallelBackoff()
|
||||
{
|
||||
return new LanguageModelParallelBackoff();
|
||||
}
|
||||
|
||||
|
@ -156,7 +156,8 @@ LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
|
||||
|
||||
}
|
||||
|
||||
LanguageModelPointerState *NewRandLM() {
|
||||
LanguageModelPointerState *NewRandLM()
|
||||
{
|
||||
return new LanguageModelRandLM();
|
||||
}
|
||||
|
||||
|
@ -267,7 +267,8 @@ struct SGNReverseCompare {
|
||||
/**
|
||||
* Implements lattice sampling, as in Chatterjee & Cancedda, emnlp 2010
|
||||
**/
|
||||
void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const {
|
||||
void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
|
||||
{
|
||||
|
||||
vector<SearchGraphNode> searchGraph;
|
||||
GetSearchGraph(searchGraph);
|
||||
|
@ -278,8 +278,7 @@ bool Parameter::Validate()
|
||||
for (iterParams = m_setting.begin(); iterParams != m_setting.end(); ++iterParams) {
|
||||
const std::string &key = iterParams->first;
|
||||
|
||||
if (m_valid.find(key) == m_valid.end())
|
||||
{
|
||||
if (m_valid.find(key) == m_valid.end()) {
|
||||
UserMessage::Add("Unknown parameter " + key);
|
||||
noErrorFlag = false;
|
||||
}
|
||||
@ -457,8 +456,7 @@ bool Parameter::ReadConfigFile(const string &filePath )
|
||||
|
||||
if (line.size() == 0) {
|
||||
// blank line. do nothing.
|
||||
}
|
||||
else if (line[0]=='[') {
|
||||
} else if (line[0]=='[') {
|
||||
// new parameter
|
||||
for (size_t currPos = 0 ; currPos < line.size() ; currPos++) {
|
||||
if (line[currPos] == ']') {
|
||||
|
@ -11,7 +11,8 @@
|
||||
|
||||
#include "PhraseDictionarySCFG.h"
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class PhraseDictionaryALSuffixArray : public PhraseDictionarySCFG
|
||||
{
|
||||
|
@ -15,7 +15,8 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
bool PhraseDictionaryHiero::Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
|
@ -11,7 +11,8 @@
|
||||
|
||||
#include "PhraseDictionarySCFG.h"
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class PhraseDictionaryHiero : public PhraseDictionarySCFG
|
||||
{
|
||||
|
@ -44,14 +44,17 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
namespace {
|
||||
void ParserDeath(const std::string &file, size_t line_num) {
|
||||
namespace
|
||||
{
|
||||
void ParserDeath(const std::string &file, size_t line_num)
|
||||
{
|
||||
stringstream strme;
|
||||
strme << "Syntax error at " << file << ":" << line_num;
|
||||
UserMessage::Add(strme.str());
|
||||
abort();
|
||||
}
|
||||
template <class It> StringPiece GrabOrDie(It &it, const std::string &file, size_t line_num) {
|
||||
template <class It> StringPiece GrabOrDie(It &it, const std::string &file, size_t line_num)
|
||||
{
|
||||
if (!it) ParserDeath(file, line_num);
|
||||
return *it++;
|
||||
}
|
||||
|
@ -135,8 +135,7 @@ ChartRuleLookupManager *PhraseDictionarySCFG::CreateRuleLookupManager(
|
||||
|
||||
void PhraseDictionarySCFG::SortAndPrune()
|
||||
{
|
||||
if (GetTableLimit())
|
||||
{
|
||||
if (GetTableLimit()) {
|
||||
m_collection.Sort(GetTableLimit());
|
||||
}
|
||||
}
|
||||
|
@ -50,12 +50,15 @@ class PhraseDictionarySCFG : public PhraseDictionary
|
||||
, const LMList &languageModels
|
||||
, const WordPenaltyProducer* wpProducer);
|
||||
|
||||
const std::string &GetFilePath() const { return m_filePath; }
|
||||
const PhraseDictionaryNodeSCFG &GetRootNode() const { return m_collection; }
|
||||
const std::string &GetFilePath() const {
|
||||
return m_filePath;
|
||||
}
|
||||
const PhraseDictionaryNodeSCFG &GetRootNode() const {
|
||||
return m_collection;
|
||||
}
|
||||
|
||||
// Required by PhraseDictionary.
|
||||
const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const
|
||||
{
|
||||
const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const {
|
||||
CHECK(false);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -136,7 +136,8 @@ static WordVoc* ReadVoc(const std::string& filename)
|
||||
}
|
||||
|
||||
|
||||
class PDTimp {
|
||||
class PDTimp
|
||||
{
|
||||
public:
|
||||
typedef PrefixTreeF<LabelId,OFF_T> PTF;
|
||||
typedef FilePtr<PTF> CPT;
|
||||
|
@ -100,7 +100,8 @@ size_t PhraseDictionaryTreeAdaptor::GetNumInputScores() const
|
||||
return imp->GetNumInputScores();
|
||||
}
|
||||
|
||||
std::string PhraseDictionaryTreeAdaptor::GetScoreProducerDescription(unsigned idx) const{
|
||||
std::string PhraseDictionaryTreeAdaptor::GetScoreProducerDescription(unsigned idx) const
|
||||
{
|
||||
if (idx < imp->GetNumInputScores()) {
|
||||
return "InputScore";
|
||||
} else {
|
||||
|
@ -100,7 +100,9 @@ class RuleCube
|
||||
|
||||
RuleCubeItem *Pop(ChartManager &);
|
||||
|
||||
bool IsEmpty() const { return m_queue.empty(); }
|
||||
bool IsEmpty() const {
|
||||
return m_queue.empty();
|
||||
}
|
||||
|
||||
const ChartTranslationOption &GetTranslationOption() const {
|
||||
return m_transOpt;
|
||||
|
@ -47,7 +47,9 @@ class TranslationDimension
|
||||
, m_orderedTargetPhrases(&orderedTargetPhrases)
|
||||
{}
|
||||
|
||||
std::size_t IncrementPos() { return m_pos++; }
|
||||
std::size_t IncrementPos() {
|
||||
return m_pos++;
|
||||
}
|
||||
|
||||
bool HasMoreTranslations() const {
|
||||
return m_pos+1 < m_orderedTargetPhrases->size();
|
||||
@ -80,7 +82,9 @@ public:
|
||||
, m_orderedHypos(&orderedHypos)
|
||||
{}
|
||||
|
||||
std::size_t IncrementPos() { return m_pos++; }
|
||||
std::size_t IncrementPos() {
|
||||
return m_pos++;
|
||||
}
|
||||
|
||||
bool HasMoreHypo() const {
|
||||
return m_pos+1 < m_orderedHypos->size();
|
||||
@ -120,7 +124,9 @@ class RuleCubeItem
|
||||
return m_hypothesisDimensions;
|
||||
}
|
||||
|
||||
float GetScore() const { return m_score; }
|
||||
float GetScore() const {
|
||||
return m_score;
|
||||
}
|
||||
|
||||
void EstimateScore();
|
||||
|
||||
|
@ -53,7 +53,9 @@ class RuleCubeQueue
|
||||
|
||||
void Add(RuleCube *);
|
||||
ChartHypothesis *Pop();
|
||||
bool IsEmpty() const { return m_queue.empty(); }
|
||||
bool IsEmpty() const {
|
||||
return m_queue.empty();
|
||||
}
|
||||
|
||||
private:
|
||||
typedef std::priority_queue<RuleCube*, std::vector<RuleCube*>,
|
||||
|
@ -85,8 +85,7 @@ class RuleTableLoaderCompact : public RuleTableLoader
|
||||
|
||||
// Like Tokenize() but records starting positions of tokens (instead of
|
||||
// copying substrings) and assumes delimiter is ASCII space character.
|
||||
void FindTokens(std::vector<size_t> &output, const std::string &str) const
|
||||
{
|
||||
void FindTokens(std::vector<size_t> &output, const std::string &str) const {
|
||||
// Skip delimiters at beginning.
|
||||
size_t lastPos = str.find_first_not_of(' ', 0);
|
||||
// Find first "non-delimiter".
|
||||
|
@ -49,8 +49,7 @@ std::auto_ptr<RuleTableLoader> RuleTableLoaderFactory::Create(
|
||||
msg << "Unsupported compact rule table format: " << tokens[0];
|
||||
UserMessage::Add(msg.str());
|
||||
return std::auto_ptr<RuleTableLoader>();
|
||||
}
|
||||
else if (tokens[0] == "[X]" && tokens[1] == "|||") {
|
||||
} else if (tokens[0] == "[X]" && tokens[1] == "|||") {
|
||||
return std::auto_ptr<RuleTableLoader>(new
|
||||
RuleTableLoaderHiero());
|
||||
|
||||
|
@ -11,7 +11,8 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
bool RuleTableLoaderHiero::Load(const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
|
@ -11,7 +11,8 @@
|
||||
|
||||
#include "RuleTableLoaderStandard.h"
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class RuleTableLoaderHiero : public RuleTableLoaderStandard
|
||||
{
|
||||
|
@ -63,12 +63,11 @@ void ReformatHieroRule(int sourceTarget, string &phrase, map<size_t, pair<size_t
|
||||
vector<string> toks;
|
||||
Tokenize(toks, phrase, " ");
|
||||
|
||||
for (size_t i = 0; i < toks.size(); ++i)
|
||||
{
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
string &tok = toks[i];
|
||||
size_t tokLen = tok.size();
|
||||
if (tok.substr(0, 1) == "[" && tok.substr(tokLen - 1, 1) == "]")
|
||||
{ // no-term
|
||||
if (tok.substr(0, 1) == "[" && tok.substr(tokLen - 1, 1) == "]") {
|
||||
// no-term
|
||||
vector<string> split = Tokenize(tok, ",");
|
||||
CHECK(split.size() == 2);
|
||||
|
||||
@ -76,12 +75,9 @@ void ReformatHieroRule(int sourceTarget, string &phrase, map<size_t, pair<size_t
|
||||
size_t coIndex = Scan<size_t>(split[1]);
|
||||
|
||||
pair<size_t, size_t> &alignPoint = ntAlign[coIndex];
|
||||
if (sourceTarget == 0)
|
||||
{
|
||||
if (sourceTarget == 0) {
|
||||
alignPoint.first = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
alignPoint.second = i;
|
||||
}
|
||||
}
|
||||
@ -96,8 +92,7 @@ void ReformateHieroScore(string &scoreString)
|
||||
vector<string> toks;
|
||||
Tokenize(toks, scoreString, " ");
|
||||
|
||||
for (size_t i = 0; i < toks.size(); ++i)
|
||||
{
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
string &tok = toks[i];
|
||||
|
||||
float score = Scan<float>(tok);
|
||||
@ -126,8 +121,7 @@ string *ReformatHieroRule(const string &lineOrig)
|
||||
|
||||
stringstream align;
|
||||
map<size_t, pair<size_t, size_t> >::const_iterator iterAlign;
|
||||
for (iterAlign = ntAlign.begin(); iterAlign != ntAlign.end(); ++iterAlign)
|
||||
{
|
||||
for (iterAlign = ntAlign.begin(); iterAlign != ntAlign.end(); ++iterAlign) {
|
||||
const pair<size_t, size_t> &alignPoint = iterAlign->second;
|
||||
align << alignPoint.first << "-" << alignPoint.second << " ";
|
||||
}
|
||||
@ -164,9 +158,8 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
||||
const string *line;
|
||||
if (format == HieroFormat) { // reformat line
|
||||
line = ReformatHieroRule(lineOrig);
|
||||
}
|
||||
else
|
||||
{ // do nothing to format of line
|
||||
} else {
|
||||
// do nothing to format of line
|
||||
line = &lineOrig;
|
||||
}
|
||||
|
||||
@ -235,9 +228,8 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
||||
|
||||
if (format == HieroFormat) { // reformat line
|
||||
delete line;
|
||||
}
|
||||
else
|
||||
{ // do nothing
|
||||
} else {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -24,8 +24,7 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
enum FormatType
|
||||
{
|
||||
enum FormatType {
|
||||
MosesFormat
|
||||
,HieroFormat
|
||||
};
|
||||
|
@ -650,7 +650,8 @@ StaticData::~StaticData()
|
||||
}
|
||||
|
||||
#ifdef HAVE_SYNLM
|
||||
bool StaticData::LoadSyntacticLanguageModel() {
|
||||
bool StaticData::LoadSyntacticLanguageModel()
|
||||
{
|
||||
cerr << "Loading syntactic language models..." << std::endl;
|
||||
|
||||
const vector<float> weights = Scan<float>(m_parameter->GetParam("weight-slm"));
|
||||
@ -1010,8 +1011,7 @@ bool StaticData::LoadPhraseTables()
|
||||
|
||||
m_numInputScores=m_parameter->GetParam("weight-i").size();
|
||||
|
||||
if (implementation == Binary)
|
||||
{
|
||||
if (implementation == Binary) {
|
||||
for(unsigned k=0; k<m_numInputScores; ++k)
|
||||
weight.push_back(Scan<float>(m_parameter->GetParam("weight-i")[k]));
|
||||
}
|
||||
@ -1327,7 +1327,8 @@ void StaticData::AddTransOptListToCache(const DecodeGraph &decodeGraph, const Ph
|
||||
m_transOptCache[key] = make_pair( storedTransOptList, clock() );
|
||||
ReduceTransOptCache();
|
||||
}
|
||||
void StaticData::ClearTransOptionCache() const {
|
||||
void StaticData::ClearTransOptionCache() const
|
||||
{
|
||||
map<std::pair<size_t, Phrase>, std::pair< TranslationOptionList*, clock_t > >::iterator iterCache;
|
||||
for (iterCache = m_transOptCache.begin() ; iterCache != m_transOptCache.end() ; ++iterCache) {
|
||||
TranslationOptionList *transOptList = iterCache->second.first;
|
||||
|
@ -617,8 +617,9 @@ public:
|
||||
return m_threadCount;
|
||||
}
|
||||
|
||||
long GetStartTranslationId() const
|
||||
{ return m_startTranslationId; }
|
||||
long GetStartTranslationId() const {
|
||||
return m_startTranslationId;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -19,7 +19,8 @@ namespace Moses
|
||||
: m_NumScoreComponents(weights.size())
|
||||
, m_beamWidth(beamWidth)
|
||||
, m_factorType(factorType)
|
||||
, m_files(new SyntacticLanguageModelFiles<YModel,XModel>(filePath)) {
|
||||
, m_files(new SyntacticLanguageModelFiles<YModel,XModel>(filePath))
|
||||
{
|
||||
|
||||
// Inform Moses score manager of this feature and its weight(s)
|
||||
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
|
||||
@ -27,24 +28,29 @@ namespace Moses
|
||||
VERBOSE(3,"Constructed SyntacticLanguageModel" << endl);
|
||||
}
|
||||
|
||||
SyntacticLanguageModel::~SyntacticLanguageModel() {
|
||||
SyntacticLanguageModel::~SyntacticLanguageModel()
|
||||
{
|
||||
VERBOSE(3,"Destructing SyntacticLanguageModel" << std::endl);
|
||||
// delete m_files;
|
||||
}
|
||||
|
||||
size_t SyntacticLanguageModel::GetNumScoreComponents() const {
|
||||
size_t SyntacticLanguageModel::GetNumScoreComponents() const
|
||||
{
|
||||
return m_NumScoreComponents;
|
||||
}
|
||||
|
||||
std::string SyntacticLanguageModel::GetScoreProducerDescription(unsigned) const {
|
||||
std::string SyntacticLanguageModel::GetScoreProducerDescription(unsigned) const
|
||||
{
|
||||
return "Syntactic Language Model";
|
||||
}
|
||||
|
||||
std::string SyntacticLanguageModel::GetScoreProducerWeightShortName(unsigned) const {
|
||||
std::string SyntacticLanguageModel::GetScoreProducerWeightShortName(unsigned) const
|
||||
{
|
||||
return "slm";
|
||||
}
|
||||
|
||||
const FFState* SyntacticLanguageModel::EmptyHypothesisState(const InputType &input) const {
|
||||
const FFState* SyntacticLanguageModel::EmptyHypothesisState(const InputType &input) const
|
||||
{
|
||||
|
||||
return new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth);
|
||||
|
||||
@ -82,7 +88,8 @@ namespace Moses
|
||||
*/
|
||||
FFState* SyntacticLanguageModel::Evaluate(const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
|
||||
VERBOSE(3,"Evaluating SyntacticLanguageModel for a hypothesis" << endl);
|
||||
|
||||
|
@ -14,7 +14,8 @@ namespace Moses
|
||||
|
||||
template <class MH, class MO> class SyntacticLanguageModelFiles;
|
||||
|
||||
class SyntacticLanguageModel : public StatefulFeatureFunction {
|
||||
class SyntacticLanguageModel : public StatefulFeatureFunction
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
|
@ -10,7 +10,8 @@ namespace Moses
|
||||
{
|
||||
|
||||
template <class MH, class MO>
|
||||
class SyntacticLanguageModelFiles {
|
||||
class SyntacticLanguageModelFiles
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
@ -28,7 +29,8 @@ class SyntacticLanguageModelFiles {
|
||||
|
||||
|
||||
template <class MH, class MO>
|
||||
SyntacticLanguageModelFiles<MH,MO>::SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths) {
|
||||
SyntacticLanguageModelFiles<MH,MO>::SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths)
|
||||
{
|
||||
|
||||
this->hiddenModel = new MH();
|
||||
this->observedModel = new MO();
|
||||
@ -43,7 +45,10 @@ template <class MH, class MO>
|
||||
return;
|
||||
}
|
||||
std::cerr << "Loading model \'" << filePaths[a] << "\'...\n";
|
||||
int c=' '; int i=0; int line=1; String sBuff(1000); // Lookahead/ctrs/buffers
|
||||
int c=' ';
|
||||
int i=0;
|
||||
int line=1;
|
||||
String sBuff(1000); // Lookahead/ctrs/buffers
|
||||
CONSUME_ALL ( pf, c, WHITESPACE(c), line); // Get to first record
|
||||
while ( c!=-1 && c!='\0' && c!='\5' ) { // For each record
|
||||
CONSUME_STR ( pf, c, (c!='\n' && c!='\0' && c!='\5'), sBuff, i, line ); // Consume line
|
||||
@ -66,7 +71,8 @@ template <class MH, class MO>
|
||||
|
||||
|
||||
template <class MH, class MO>
|
||||
SyntacticLanguageModelFiles<MH,MO>::~SyntacticLanguageModelFiles() {
|
||||
SyntacticLanguageModelFiles<MH,MO>::~SyntacticLanguageModelFiles()
|
||||
{
|
||||
|
||||
std::cerr<<"Destructing syntactic language model files" << std::endl;
|
||||
//delete hiddenModel;
|
||||
@ -76,14 +82,16 @@ template <class MH, class MO>
|
||||
|
||||
|
||||
template <class MH, class MO>
|
||||
MH* SyntacticLanguageModelFiles<MH,MO>::getHiddenModel() {
|
||||
MH* SyntacticLanguageModelFiles<MH,MO>::getHiddenModel()
|
||||
{
|
||||
|
||||
return this->hiddenModel;
|
||||
|
||||
}
|
||||
|
||||
template <class MH, class MO>
|
||||
MO* SyntacticLanguageModelFiles<MH,MO>::getObservedModel() {
|
||||
MO* SyntacticLanguageModelFiles<MH,MO>::getObservedModel()
|
||||
{
|
||||
|
||||
return this->observedModel;
|
||||
|
||||
|
@ -15,7 +15,8 @@ namespace Moses
|
||||
{
|
||||
|
||||
template <class MY, class MX, class YS=typename MY::RandVarType, class B=NullBackDat<typename MY::RandVarType> >
|
||||
class SyntacticLanguageModelState : public FFState {
|
||||
class SyntacticLanguageModelState : public FFState
|
||||
{
|
||||
public:
|
||||
|
||||
// Initialize an empty LM state
|
||||
@ -55,7 +56,8 @@ template <class MY, class MX, class YS=typename MY::RandVarType, class B=NullBac
|
||||
|
||||
|
||||
template <class MY, class MX, class YS, class B>
|
||||
void SyntacticLanguageModelState<MY,MX,YS,B>::printRV() {
|
||||
void SyntacticLanguageModelState<MY,MX,YS,B>::printRV()
|
||||
{
|
||||
|
||||
cerr << "*********** BEGIN printRV() ******************" << endl;
|
||||
int size=randomVariableStore->getSize();
|
||||
@ -78,7 +80,8 @@ template <class MY, class MX, class YS=typename MY::RandVarType, class B=NullBac
|
||||
// argv is the list of model file names
|
||||
//
|
||||
template <class MY, class MX, class YS, class B>
|
||||
SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize ) {
|
||||
SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize )
|
||||
{
|
||||
|
||||
this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >();
|
||||
this->modelData = modelData;
|
||||
@ -114,7 +117,8 @@ template <class MY, class MX, class YS, class B>
|
||||
|
||||
|
||||
template <class MY, class MX, class YS, class B>
|
||||
int SyntacticLanguageModelState<MY,MX,YS,B>::Compare(const FFState& other) const {
|
||||
int SyntacticLanguageModelState<MY,MX,YS,B>::Compare(const FFState& other) const
|
||||
{
|
||||
/*
|
||||
const SyntacticLanguageModelState<MY,MX,YS,B>& o =
|
||||
static_cast<const SyntacticLanguageModelState<MY,MX,YS,B>&>(other);
|
||||
@ -128,7 +132,8 @@ template <class MY, class MX, class YS, class B>
|
||||
|
||||
|
||||
template <class MY, class MX, class YS, class B>
|
||||
SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word ) {
|
||||
SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word )
|
||||
{
|
||||
|
||||
// Initialize member variables
|
||||
this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >();
|
||||
@ -248,20 +253,23 @@ template <class MY, class MX, class YS, class B>
|
||||
|
||||
|
||||
template <class MY, class MX, class YS, class B>
|
||||
double SyntacticLanguageModelState<MY,MX,YS,B>::getProb() const {
|
||||
double SyntacticLanguageModelState<MY,MX,YS,B>::getProb() const
|
||||
{
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
template <class MY, class MX, class YS, class B>
|
||||
double SyntacticLanguageModelState<MY,MX,YS,B>::getScore() const {
|
||||
double SyntacticLanguageModelState<MY,MX,YS,B>::getScore() const
|
||||
{
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
|
||||
template <class MY, class MX, class YS, class B>
|
||||
void SyntacticLanguageModelState<MY,MX,YS,B>::setScore(double score) {
|
||||
void SyntacticLanguageModelState<MY,MX,YS,B>::setScore(double score)
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
@ -304,8 +304,10 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
|
||||
return clone;
|
||||
}
|
||||
|
||||
namespace {
|
||||
void MosesShouldUseExceptions(bool value) {
|
||||
namespace
|
||||
{
|
||||
void MosesShouldUseExceptions(bool value)
|
||||
{
|
||||
if (!value) {
|
||||
std::cerr << "Could not parse alignment info" << std::endl;
|
||||
abort();
|
||||
|
@ -57,7 +57,9 @@ public:
|
||||
RemoveAllInColl(m_collection);
|
||||
}
|
||||
|
||||
const std::vector<TargetPhrase*> &GetCollection() const { return m_collection; }
|
||||
const std::vector<TargetPhrase*> &GetCollection() const {
|
||||
return m_collection;
|
||||
}
|
||||
|
||||
//! divide collection into 2 buckets using std::nth_element, the top & bottom according to table limit
|
||||
void NthElement(size_t tableLimit);
|
||||
|
@ -54,7 +54,9 @@ class Task
|
||||
{
|
||||
public:
|
||||
virtual void Run() = 0;
|
||||
virtual bool DeleteAfterExecution() {return true;}
|
||||
virtual bool DeleteAfterExecution() {
|
||||
return true;
|
||||
}
|
||||
virtual ~Task() {}
|
||||
};
|
||||
|
||||
|
@ -41,7 +41,8 @@ TrellisPath::TrellisPath(const Hypothesis *hypo)
|
||||
}
|
||||
}
|
||||
|
||||
void TrellisPath::InitScore() {
|
||||
void TrellisPath::InitScore()
|
||||
{
|
||||
m_totalScore = m_path[0]->GetWinningHypo()->GetTotalScore();
|
||||
m_scoreBreakdown= m_path[0]->GetWinningHypo()->GetScoreBreakdown();
|
||||
|
||||
|
@ -236,7 +236,8 @@ void Mismatch::PrintClippedHTML( ostream* out, int width )
|
||||
*out << "</td></tr>";
|
||||
}
|
||||
|
||||
void Mismatch::LabelSourceMatches( char *source_annotation, char *target_annotation, char source_id, char label ) {
|
||||
void Mismatch::LabelSourceMatches( char *source_annotation, char *target_annotation, char source_id, char label )
|
||||
{
|
||||
for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
|
||||
if (m_alignment->GetSourceWord( m_sentence_id, ap ) == source_id) {
|
||||
source_annotation[ source_id ] = label;
|
||||
|
@ -40,8 +40,7 @@ public:
|
||||
,m_source_length(source_length)
|
||||
,m_target_length(target_length)
|
||||
,m_source_start(source_start)
|
||||
,m_source_end(source_end)
|
||||
{
|
||||
,m_source_end(source_end) {
|
||||
// initialize unaligned indexes
|
||||
for(char i=0; i<m_source_length; i++) {
|
||||
m_source_unaligned[i] = true;
|
||||
@ -64,7 +63,9 @@ public:
|
||||
}
|
||||
~Mismatch () {}
|
||||
|
||||
bool Unaligned() { return m_unaligned; }
|
||||
bool Unaligned() {
|
||||
return m_unaligned;
|
||||
}
|
||||
void PrintClippedHTML( ostream* out, int width );
|
||||
void LabelSourceMatches( char *source_annotation, char *target_annotation, char source_id, char label );
|
||||
};
|
||||
|
@ -221,8 +221,7 @@ void PhrasePair::PrintClippedHTML( ostream* out, int width )
|
||||
|
||||
if (target_post.size() < target_post_width) {
|
||||
target_post_width = target_post.size();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
while(target_post_width>0 &&
|
||||
target_post.substr(target_post_width-1,1) != " ") {
|
||||
target_post_width--;
|
||||
|
@ -67,8 +67,7 @@ bool PhrasePairCollection::GetCollection( const vector< string > sourceString )
|
||||
m_size++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
cerr << "mismatch " << (i-first_match)
|
||||
<< " in sentence " << sentence_id
|
||||
<< ", starting at word " << source_start
|
||||
@ -119,8 +118,7 @@ void PhrasePairCollection::PrintHTML()
|
||||
<< (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("
|
||||
<< (m_collection.end() - ppWithSameTarget)
|
||||
<< "/" << m_size << ")</p>";
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
cout << "<p class=\"pp_target_header\">";
|
||||
(*(ppWithSameTarget->begin()))->PrintTarget( &cout );
|
||||
cout << " (" << count << "/" << m_size << ")" << endl;
|
||||
|
@ -34,11 +34,13 @@ static const std::string base64_chars =
|
||||
"0123456789+/";
|
||||
|
||||
|
||||
static inline bool is_base64(unsigned char c) {
|
||||
static inline bool is_base64(unsigned char c)
|
||||
{
|
||||
return (isalnum(c) || (c == '+') || (c == '/'));
|
||||
}
|
||||
|
||||
std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
|
||||
std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len)
|
||||
{
|
||||
std::string ret;
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
@ -59,8 +61,7 @@ std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_
|
||||
}
|
||||
}
|
||||
|
||||
if (i)
|
||||
{
|
||||
if (i) {
|
||||
for(j = i; j < 3; j++)
|
||||
char_array_3[j] = '\0';
|
||||
|
||||
@ -81,7 +82,8 @@ std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_
|
||||
|
||||
}
|
||||
|
||||
std::string base64_decode(std::string const& encoded_string) {
|
||||
std::string base64_decode(std::string const& encoded_string)
|
||||
{
|
||||
int in_len = encoded_string.size();
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
@ -90,7 +92,8 @@ std::string base64_decode(std::string const& encoded_string) {
|
||||
std::string ret;
|
||||
|
||||
while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
|
||||
char_array_4[i++] = encoded_string[in_]; in_++;
|
||||
char_array_4[i++] = encoded_string[in_];
|
||||
in_++;
|
||||
if (i ==4) {
|
||||
for (i = 0; i <4; i++)
|
||||
char_array_4[i] = base64_chars.find(char_array_4[i]);
|
||||
|
@ -14,9 +14,11 @@
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
|
||||
namespace moses {
|
||||
namespace moses
|
||||
{
|
||||
|
||||
int Compactify::main(int argc, char *argv[]) {
|
||||
int Compactify::main(int argc, char *argv[])
|
||||
{
|
||||
// Process the command-line arguments.
|
||||
Options options;
|
||||
processOptions(argc, argv, options);
|
||||
@ -219,7 +221,8 @@ int Compactify::main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
void Compactify::processOptions(int argc, char *argv[],
|
||||
Options &options) const {
|
||||
Options &options) const
|
||||
{
|
||||
namespace po = boost::program_options;
|
||||
|
||||
std::ostringstream usageMsg;
|
||||
@ -281,7 +284,8 @@ void Compactify::processOptions(int argc, char *argv[],
|
||||
}
|
||||
|
||||
void Compactify::encodePhrase(const std::string &lhs, const StringPhrase &rhs,
|
||||
SymbolSet &symbolSet, SymbolPhrase &vec) const {
|
||||
SymbolSet &symbolSet, SymbolPhrase &vec) const
|
||||
{
|
||||
vec.clear();
|
||||
vec.reserve(rhs.size()+1);
|
||||
SymbolIDType id = symbolSet.insert(lhs);
|
||||
|
@ -8,12 +8,14 @@
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace moses {
|
||||
namespace moses
|
||||
{
|
||||
|
||||
class Options;
|
||||
|
||||
// Tool for converting a rule table into a more compact format.
|
||||
class Compactify : public Tool {
|
||||
class Compactify : public Tool
|
||||
{
|
||||
public:
|
||||
Compactify() : Tool("compactify") {}
|
||||
virtual int main(int, char *[]);
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "Compactify.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
moses::Compactify tool;
|
||||
return tool.main(argc, argv);
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user