mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 11:28:48 +03:00
beautify
This commit is contained in:
parent
771e792bb4
commit
efc2c6145e
@ -22,7 +22,8 @@ struct DistortionState_traditional : public FFState {
|
||||
};
|
||||
|
||||
DistortionScoreProducer::DistortionScoreProducer(const std::string &line)
|
||||
: StatefulFeatureFunction("Distortion", 1, line) {
|
||||
: StatefulFeatureFunction("Distortion", 1, line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
|
@ -102,8 +102,8 @@ void FeatureFunction::SetParameter(const std::string& key, const std::string& va
|
||||
void FeatureFunction::ReadParameters()
|
||||
{
|
||||
while (!m_args.empty()) {
|
||||
const vector<string> &args = m_args[0];
|
||||
SetParameter(args[0], args[1]);
|
||||
const vector<string> &args = m_args[0];
|
||||
SetParameter(args[0], args[1]);
|
||||
|
||||
m_args.erase(m_args.begin());
|
||||
}
|
||||
|
@ -5,14 +5,15 @@
|
||||
namespace Moses
|
||||
{
|
||||
PhrasePenalty::PhrasePenalty(const std::string &line)
|
||||
: StatelessFeatureFunction("PhrasePenalty",1, line) {
|
||||
: StatelessFeatureFunction("PhrasePenalty",1, line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void PhrasePenalty::Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
scoreBreakdown.Assign(this, 1.0f);
|
||||
}
|
||||
|
@ -11,13 +11,13 @@ public:
|
||||
PhrasePenalty(const std::string &line);
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
};
|
||||
|
||||
} //namespace
|
||||
|
@ -7,7 +7,8 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
UnknownWordPenaltyProducer::UnknownWordPenaltyProducer(const std::string &line)
|
||||
: StatelessFeatureFunction("UnknownWordPenalty",1, line) {
|
||||
: StatelessFeatureFunction("UnknownWordPenalty",1, line)
|
||||
{
|
||||
m_tuneable = false;
|
||||
ReadParameters();
|
||||
}
|
||||
|
@ -7,7 +7,8 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
WordPenaltyProducer::WordPenaltyProducer(const std::string &line)
|
||||
: StatelessFeatureFunction("WordPenalty",1, line) {
|
||||
: StatelessFeatureFunction("WordPenalty",1, line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
|
@ -275,14 +275,14 @@ bool Parameter::LoadParam(int argc, char* argv[])
|
||||
}
|
||||
|
||||
// overwrite parameters with values from switches
|
||||
for(PARAM_STRING::const_iterator iterParam = m_description.begin();
|
||||
for(PARAM_STRING::const_iterator iterParam = m_description.begin();
|
||||
iterParam != m_description.end(); iterParam++) {
|
||||
const string paramName = iterParam->first;
|
||||
OverwriteParam("-" + paramName, paramName, argc, argv);
|
||||
}
|
||||
|
||||
// ... also shortcuts
|
||||
for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin();
|
||||
for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin();
|
||||
iterParam != m_abbreviation.end(); iterParam++) {
|
||||
const string paramName = iterParam->first;
|
||||
const string paramShortName = iterParam->second;
|
||||
@ -296,8 +296,8 @@ bool Parameter::LoadParam(int argc, char* argv[])
|
||||
verbose = Scan<int>(m_setting["verbose"][0]);
|
||||
if (verbose >= 1) { // only if verbose
|
||||
TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl);
|
||||
for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ;
|
||||
iterParam != m_setting.end(); iterParam++) {
|
||||
for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ;
|
||||
iterParam != m_setting.end(); iterParam++) {
|
||||
TRACE_ERR( "\t" << iterParam->first << ": ");
|
||||
for ( size_t i = 0; i < iterParam->second.size(); i++ )
|
||||
TRACE_ERR( iterParam->second[i] << " ");
|
||||
@ -307,7 +307,7 @@ bool Parameter::LoadParam(int argc, char* argv[])
|
||||
|
||||
// convert old weights args to new format
|
||||
// WHAT IS GOING ON HERE??? - UG
|
||||
if (!isParamSpecified("feature")) // UG
|
||||
if (!isParamSpecified("feature")) // UG
|
||||
ConvertWeightArgs();
|
||||
CreateWeightsMap();
|
||||
WeightOverwrite();
|
||||
@ -361,10 +361,10 @@ void Parameter::SetWeight(const std::string &name, size_t ind, const vector<floa
|
||||
newWeights.push_back(line);
|
||||
}
|
||||
|
||||
void
|
||||
void
|
||||
Parameter::
|
||||
AddWeight(const std::string &name, size_t ind,
|
||||
const std::vector<float> &weights)
|
||||
AddWeight(const std::string &name, size_t ind,
|
||||
const std::vector<float> &weights)
|
||||
{
|
||||
PARAM_VEC &newWeights = m_setting["weight"];
|
||||
|
||||
@ -516,7 +516,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
|
||||
++currOldInd;
|
||||
}
|
||||
|
||||
// cerr << weights.size() << " PHRASE TABLE WEIGHTS "
|
||||
// cerr << weights.size() << " PHRASE TABLE WEIGHTS "
|
||||
// << __FILE__ << ":" << __LINE__ << endl;
|
||||
AddWeight(ptType, ptInd, weights);
|
||||
|
||||
|
@ -694,8 +694,8 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
SetWeights(model, weights);
|
||||
} else if (feature == "PhrasePenalty") {
|
||||
PhrasePenalty* model = new PhrasePenalty(line);
|
||||
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
|
||||
SetWeights(model, weights);
|
||||
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
|
||||
SetWeights(model, weights);
|
||||
}
|
||||
|
||||
#ifdef HAVE_SYNLM
|
||||
|
@ -37,8 +37,8 @@ void TargetPhraseCollection::NthElement(size_t tableLimit)
|
||||
{
|
||||
vector<TargetPhrase*>::iterator nth;
|
||||
nth = (tableLimit && tableLimit <= m_collection.size()
|
||||
? m_collection.begin() + tableLimit
|
||||
: m_collection.end());
|
||||
? m_collection.begin() + tableLimit
|
||||
: m_collection.end());
|
||||
std::nth_element(m_collection.begin(), nth, m_collection.end(), CompareTargetPhrase());
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
#ifndef moses_BilingualDynSuffixArray_h
|
||||
#define moses_BilingualDynSuffixArray_h
|
||||
|
||||
#include "DynSuffixArray.h"
|
||||
#include "DynSuffixArray.h"
|
||||
#include "moses/TranslationModel/DynSAInclude/vocab.h"
|
||||
#include "moses/TranslationModel/DynSAInclude/types.h"
|
||||
#include "moses/TranslationModel/DynSAInclude/utils.h"
|
||||
@ -16,165 +16,168 @@
|
||||
using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
class PhraseDictionaryDynSuffixArray;
|
||||
class PhraseDictionaryDynSuffixArray;
|
||||
|
||||
/** @todo ask Abbey Levenberg
|
||||
*/
|
||||
class SAPhrase
|
||||
{
|
||||
public:
|
||||
vector<wordID_t> words;
|
||||
|
||||
SAPhrase(size_t phraseSize)
|
||||
:words(phraseSize)
|
||||
{}
|
||||
|
||||
void SetId(size_t pos, wordID_t id)
|
||||
{
|
||||
CHECK(pos < words.size());
|
||||
words[pos] = id;
|
||||
}
|
||||
bool operator<(const SAPhrase& phr2) const
|
||||
{ return words < phr2.words; }
|
||||
};
|
||||
/** @todo ask Abbey Levenberg
|
||||
*/
|
||||
class SAPhrase
|
||||
{
|
||||
public:
|
||||
vector<wordID_t> words;
|
||||
|
||||
/** @todo ask Abbey Levenberg
|
||||
*/
|
||||
class PhrasePair
|
||||
{
|
||||
public:
|
||||
int m_startTarget, m_endTarget, m_startSource, m_endSource, m_sntIndex;
|
||||
PhrasePair(int startTarget, int endTarget, int startSource, int endSource, int sntIndex)
|
||||
: m_startTarget(startTarget)
|
||||
, m_endTarget(endTarget)
|
||||
, m_startSource(startSource)
|
||||
, m_endSource(endSource)
|
||||
, m_sntIndex(sntIndex)
|
||||
{}
|
||||
SAPhrase(size_t phraseSize)
|
||||
:words(phraseSize)
|
||||
{}
|
||||
|
||||
size_t GetTargetSize() const
|
||||
{ return m_endTarget - m_startTarget + 1; }
|
||||
|
||||
size_t GetSourceSize() const
|
||||
{ return m_endSource - m_startSource + 1; }
|
||||
};
|
||||
|
||||
/** @todo ask Abbey Levenberg
|
||||
*/
|
||||
class SentenceAlignment
|
||||
{
|
||||
public:
|
||||
SentenceAlignment(int sntIndex, int sourceSize, int targetSize);
|
||||
int m_sntIndex;
|
||||
vector<wordID_t>* trgSnt;
|
||||
vector<wordID_t>* srcSnt;
|
||||
vector<int> numberAligned;
|
||||
vector< vector<int> > alignedList;
|
||||
bool Extract(int maxPhraseLength, vector<PhrasePair*> &ret,
|
||||
int startSource, int endSource) const;
|
||||
};
|
||||
void SetId(size_t pos, wordID_t id) {
|
||||
CHECK(pos < words.size());
|
||||
words[pos] = id;
|
||||
}
|
||||
bool operator<(const SAPhrase& phr2) const {
|
||||
return words < phr2.words;
|
||||
}
|
||||
};
|
||||
|
||||
class ScoresComp {
|
||||
public:
|
||||
ScoresComp(const vector<float>& weights): m_weights(weights) {}
|
||||
bool operator()(const Scores& s1, const Scores& s2) const {
|
||||
return s1[0] < s2[0]; // just p(e|f) as approximation
|
||||
// float score1(0), score2(0);
|
||||
// int idx1(0), idx2(0);
|
||||
// for (Scores::const_iterator itr = s1.begin();
|
||||
// itr != s1.end(); ++itr) {
|
||||
// score1 += log(*itr * m_weights.at(idx1++));
|
||||
// }
|
||||
// for (Scores::const_iterator itr = s2.begin();
|
||||
// itr != s2.end(); ++itr) {
|
||||
// score2 += log(*itr * m_weights.at(idx2++));
|
||||
// }
|
||||
// return score1 < score2;
|
||||
}
|
||||
private:
|
||||
const vector<float>& m_weights;
|
||||
};
|
||||
|
||||
struct BetterPhrase
|
||||
{
|
||||
ScoresComp const& cmp;
|
||||
BetterPhrase(ScoresComp const& sc);
|
||||
// bool operator()(pair<Scores, TargetPhrase const*> const& a,
|
||||
// pair<Scores, TargetPhrase const*> const& b) const;
|
||||
bool operator()(pair<Scores, SAPhrase const*> const& a,
|
||||
pair<Scores, SAPhrase const*> const& b) const;
|
||||
};
|
||||
/** @todo ask Abbey Levenberg
|
||||
*/
|
||||
class PhrasePair
|
||||
{
|
||||
public:
|
||||
int m_startTarget, m_endTarget, m_startSource, m_endSource, m_sntIndex;
|
||||
PhrasePair(int startTarget, int endTarget, int startSource, int endSource, int sntIndex)
|
||||
: m_startTarget(startTarget)
|
||||
, m_endTarget(endTarget)
|
||||
, m_startSource(startSource)
|
||||
, m_endSource(endSource)
|
||||
, m_sntIndex(sntIndex)
|
||||
{}
|
||||
|
||||
/** @todo ask Abbey Levenberg
|
||||
*/
|
||||
class BilingualDynSuffixArray {
|
||||
public:
|
||||
BilingualDynSuffixArray();
|
||||
~BilingualDynSuffixArray();
|
||||
bool Load( const vector<FactorType>& inputFactors,
|
||||
const vector<FactorType>& outputTactors,
|
||||
string source, string target, string alignments,
|
||||
const vector<float> &weight);
|
||||
// bool LoadTM( const vector<FactorType>& inputFactors,
|
||||
// const vector<FactorType>& outputTactors,
|
||||
// string source, string target, string alignments,
|
||||
// const vector<float> &weight);
|
||||
void GetTargetPhrasesByLexicalWeight(const Phrase& src, vector< pair<Scores, TargetPhrase*> >& target) const;
|
||||
size_t GetTargetSize() const {
|
||||
return m_endTarget - m_startTarget + 1;
|
||||
}
|
||||
|
||||
void CleanUp(const InputType& source);
|
||||
void addSntPair(string& source, string& target, string& alignment);
|
||||
pair<float,float>
|
||||
GatherCands(Phrase const& src, map<SAPhrase, vector<float> >& pstats) const;
|
||||
size_t GetSourceSize() const {
|
||||
return m_endSource - m_startSource + 1;
|
||||
}
|
||||
};
|
||||
|
||||
TargetPhrase*
|
||||
GetMosesFactorIDs(const SAPhrase&, const Phrase& sourcePhrase) const;
|
||||
/** @todo ask Abbey Levenberg
|
||||
*/
|
||||
class SentenceAlignment
|
||||
{
|
||||
public:
|
||||
SentenceAlignment(int sntIndex, int sourceSize, int targetSize);
|
||||
int m_sntIndex;
|
||||
vector<wordID_t>* trgSnt;
|
||||
vector<wordID_t>* srcSnt;
|
||||
vector<int> numberAligned;
|
||||
vector< vector<int> > alignedList;
|
||||
bool Extract(int maxPhraseLength, vector<PhrasePair*> &ret,
|
||||
int startSource, int endSource) const;
|
||||
};
|
||||
|
||||
private:
|
||||
class ScoresComp
|
||||
{
|
||||
public:
|
||||
ScoresComp(const vector<float>& weights): m_weights(weights) {}
|
||||
bool operator()(const Scores& s1, const Scores& s2) const {
|
||||
return s1[0] < s2[0]; // just p(e|f) as approximation
|
||||
// float score1(0), score2(0);
|
||||
// int idx1(0), idx2(0);
|
||||
// for (Scores::const_iterator itr = s1.begin();
|
||||
// itr != s1.end(); ++itr) {
|
||||
// score1 += log(*itr * m_weights.at(idx1++));
|
||||
// }
|
||||
// for (Scores::const_iterator itr = s2.begin();
|
||||
// itr != s2.end(); ++itr) {
|
||||
// score2 += log(*itr * m_weights.at(idx2++));
|
||||
// }
|
||||
// return score1 < score2;
|
||||
}
|
||||
private:
|
||||
const vector<float>& m_weights;
|
||||
};
|
||||
|
||||
struct BetterPhrase {
|
||||
ScoresComp const& cmp;
|
||||
BetterPhrase(ScoresComp const& sc);
|
||||
// bool operator()(pair<Scores, TargetPhrase const*> const& a,
|
||||
// pair<Scores, TargetPhrase const*> const& b) const;
|
||||
bool operator()(pair<Scores, SAPhrase const*> const& a,
|
||||
pair<Scores, SAPhrase const*> const& b) const;
|
||||
};
|
||||
|
||||
/** @todo ask Abbey Levenberg
|
||||
*/
|
||||
class BilingualDynSuffixArray
|
||||
{
|
||||
public:
|
||||
BilingualDynSuffixArray();
|
||||
~BilingualDynSuffixArray();
|
||||
bool Load( const vector<FactorType>& inputFactors,
|
||||
const vector<FactorType>& outputTactors,
|
||||
string source, string target, string alignments,
|
||||
const vector<float> &weight);
|
||||
// bool LoadTM( const vector<FactorType>& inputFactors,
|
||||
// const vector<FactorType>& outputTactors,
|
||||
// string source, string target, string alignments,
|
||||
// const vector<float> &weight);
|
||||
void GetTargetPhrasesByLexicalWeight(const Phrase& src, vector< pair<Scores, TargetPhrase*> >& target) const;
|
||||
|
||||
void CleanUp(const InputType& source);
|
||||
void addSntPair(string& source, string& target, string& alignment);
|
||||
pair<float,float>
|
||||
GatherCands(Phrase const& src, map<SAPhrase, vector<float> >& pstats) const;
|
||||
|
||||
TargetPhrase*
|
||||
GetMosesFactorIDs(const SAPhrase&, const Phrase& sourcePhrase) const;
|
||||
|
||||
private:
|
||||
|
||||
|
||||
mutable WordCoocTable m_wrd_cooc;
|
||||
DynSuffixArray * m_srcSA;
|
||||
DynSuffixArray * m_trgSA;
|
||||
vector<wordID_t>* m_srcCorpus;
|
||||
vector<wordID_t>* m_trgCorpus;
|
||||
vector<FactorType> m_inputFactors;
|
||||
vector<FactorType> m_outputFactors;
|
||||
|
||||
vector<unsigned> m_srcSntBreaks, m_trgSntBreaks;
|
||||
|
||||
Vocab* m_srcVocab, *m_trgVocab;
|
||||
ScoresComp* m_scoreCmp;
|
||||
|
||||
vector<SentenceAlignment> m_alignments;
|
||||
vector<vector<short> > m_rawAlignments;
|
||||
|
||||
mutable map<pair<wordID_t, wordID_t>, pair<float, float> > m_wordPairCache;
|
||||
mutable set<wordID_t> m_freqWordsCached;
|
||||
const size_t m_maxPhraseLength, m_maxSampleSize;
|
||||
const size_t m_maxPTEntries;
|
||||
int LoadCorpus(FactorDirection direction,
|
||||
InputFileStream&, const vector<FactorType>& factors,
|
||||
vector<wordID_t>&, vector<wordID_t>&,
|
||||
Vocab*);
|
||||
int LoadAlignments(InputFileStream& aligs);
|
||||
int LoadRawAlignments(InputFileStream& aligs);
|
||||
int LoadRawAlignments(string& aligs);
|
||||
mutable WordCoocTable m_wrd_cooc;
|
||||
DynSuffixArray * m_srcSA;
|
||||
DynSuffixArray * m_trgSA;
|
||||
vector<wordID_t>* m_srcCorpus;
|
||||
vector<wordID_t>* m_trgCorpus;
|
||||
vector<FactorType> m_inputFactors;
|
||||
vector<FactorType> m_outputFactors;
|
||||
|
||||
bool ExtractPhrases(const int&, const int&, const int&, vector<PhrasePair*>&, bool=false) const;
|
||||
SentenceAlignment GetSentenceAlignment(const int, bool=false) const;
|
||||
int SampleSelection(vector<unsigned>&, int = 300) const;
|
||||
vector<unsigned> m_srcSntBreaks, m_trgSntBreaks;
|
||||
|
||||
vector<int> GetSntIndexes(vector<unsigned>&, int, const vector<unsigned>&) const;
|
||||
SAPhrase TrgPhraseFromSntIdx(const PhrasePair&) const;
|
||||
bool GetLocalVocabIDs(const Phrase&, SAPhrase &) const;
|
||||
void CacheWordProbs(wordID_t) const;
|
||||
void CacheFreqWords() const;
|
||||
void ClearWordInCache(wordID_t);
|
||||
pair<float, float> GetLexicalWeight(const PhrasePair&) const;
|
||||
Vocab* m_srcVocab, *m_trgVocab;
|
||||
ScoresComp* m_scoreCmp;
|
||||
|
||||
int GetSourceSentenceSize(size_t sentenceId) const;
|
||||
int GetTargetSentenceSize(size_t sentenceId) const;
|
||||
vector<SentenceAlignment> m_alignments;
|
||||
vector<vector<short> > m_rawAlignments;
|
||||
|
||||
};
|
||||
mutable map<pair<wordID_t, wordID_t>, pair<float, float> > m_wordPairCache;
|
||||
mutable set<wordID_t> m_freqWordsCached;
|
||||
const size_t m_maxPhraseLength, m_maxSampleSize;
|
||||
const size_t m_maxPTEntries;
|
||||
int LoadCorpus(FactorDirection direction,
|
||||
InputFileStream&, const vector<FactorType>& factors,
|
||||
vector<wordID_t>&, vector<wordID_t>&,
|
||||
Vocab*);
|
||||
int LoadAlignments(InputFileStream& aligs);
|
||||
int LoadRawAlignments(InputFileStream& aligs);
|
||||
int LoadRawAlignments(string& aligs);
|
||||
|
||||
bool ExtractPhrases(const int&, const int&, const int&, vector<PhrasePair*>&, bool=false) const;
|
||||
SentenceAlignment GetSentenceAlignment(const int, bool=false) const;
|
||||
int SampleSelection(vector<unsigned>&, int = 300) const;
|
||||
|
||||
vector<int> GetSntIndexes(vector<unsigned>&, int, const vector<unsigned>&) const;
|
||||
SAPhrase TrgPhraseFromSntIdx(const PhrasePair&) const;
|
||||
bool GetLocalVocabIDs(const Phrase&, SAPhrase &) const;
|
||||
void CacheWordProbs(wordID_t) const;
|
||||
void CacheFreqWords() const;
|
||||
void ClearWordInCache(wordID_t);
|
||||
pair<float, float> GetLexicalWeight(const PhrasePair&) const;
|
||||
|
||||
int GetSourceSentenceSize(size_t sentenceId) const;
|
||||
int GetTargetSentenceSize(size_t sentenceId) const;
|
||||
|
||||
};
|
||||
} // end namespace
|
||||
#endif
|
||||
|
@ -216,37 +216,37 @@ void DynSuffixArray::Substitute(vuint_t* /* newSents */, unsigned /* newIndex */
|
||||
return;
|
||||
}
|
||||
|
||||
ComparePosition::
|
||||
ComparePosition(vuint_t const& crp, vuint_t const& sfa)
|
||||
: m_crp(crp), m_sfa(sfa) { }
|
||||
ComparePosition::
|
||||
ComparePosition(vuint_t const& crp, vuint_t const& sfa)
|
||||
: m_crp(crp), m_sfa(sfa) { }
|
||||
|
||||
bool
|
||||
ComparePosition::
|
||||
operator()(unsigned const& i, vector<wordID_t> const& phrase) const
|
||||
{
|
||||
unsigned const* x = &m_crp.at(i);
|
||||
unsigned const* e = &m_crp.back();
|
||||
size_t k = 0;
|
||||
for (;k < phrase.size() && x < e; ++k, ++x)
|
||||
if (*x != phrase[k]) return *x < phrase[k];
|
||||
return (x == e && k < phrase.size());
|
||||
}
|
||||
bool
|
||||
ComparePosition::
|
||||
operator()(unsigned const& i, vector<wordID_t> const& phrase) const
|
||||
{
|
||||
unsigned const* x = &m_crp.at(i);
|
||||
unsigned const* e = &m_crp.back();
|
||||
size_t k = 0;
|
||||
for (; k < phrase.size() && x < e; ++k, ++x)
|
||||
if (*x != phrase[k]) return *x < phrase[k];
|
||||
return (x == e && k < phrase.size());
|
||||
}
|
||||
|
||||
bool
|
||||
ComparePosition::
|
||||
operator()(vector<wordID_t> const& phrase, unsigned const& i) const
|
||||
{
|
||||
unsigned const* x = &m_crp.at(i);
|
||||
unsigned const* e = &m_crp.back();
|
||||
size_t k = 0;
|
||||
for (;k < phrase.size() && x < e; ++k, ++x)
|
||||
if (*x != phrase[k]) return phrase[k] < *x;
|
||||
return false; // (k == phrase.size() && x < e);
|
||||
}
|
||||
bool
|
||||
ComparePosition::
|
||||
operator()(vector<wordID_t> const& phrase, unsigned const& i) const
|
||||
{
|
||||
unsigned const* x = &m_crp.at(i);
|
||||
unsigned const* e = &m_crp.back();
|
||||
size_t k = 0;
|
||||
for (; k < phrase.size() && x < e; ++k, ++x)
|
||||
if (*x != phrase[k]) return phrase[k] < *x;
|
||||
return false; // (k == phrase.size() && x < e);
|
||||
}
|
||||
|
||||
bool DynSuffixArray::GetCorpusIndex(const vuint_t* phrase, vuint_t* indices)
|
||||
{
|
||||
// DOES THIS EVEN WORK WHEN A DynSuffixArray has been saved and reloaded????
|
||||
// DOES THIS EVEN WORK WHEN A DynSuffixArray has been saved and reloaded????
|
||||
pair<vuint_t::iterator,vuint_t::iterator> bounds;
|
||||
indices->clear();
|
||||
size_t phrasesize = phrase->size();
|
||||
@ -281,15 +281,15 @@ bool DynSuffixArray::GetCorpusIndex(const vuint_t* phrase, vuint_t* indices)
|
||||
return (indices->size() > 0);
|
||||
}
|
||||
|
||||
size_t
|
||||
DynSuffixArray::
|
||||
GetCount(vuint_t const& phrase) const
|
||||
{
|
||||
ComparePosition cmp(*m_corpus, *m_SA);
|
||||
vuint_t::const_iterator lb = lower_bound(m_SA->begin(), m_SA->end(), phrase, cmp);
|
||||
vuint_t::const_iterator ub = upper_bound(m_SA->begin(), m_SA->end(), phrase, cmp);
|
||||
return ub-lb;
|
||||
}
|
||||
size_t
|
||||
DynSuffixArray::
|
||||
GetCount(vuint_t const& phrase) const
|
||||
{
|
||||
ComparePosition cmp(*m_corpus, *m_SA);
|
||||
vuint_t::const_iterator lb = lower_bound(m_SA->begin(), m_SA->end(), phrase, cmp);
|
||||
vuint_t::const_iterator ub = upper_bound(m_SA->begin(), m_SA->end(), phrase, cmp);
|
||||
return ub-lb;
|
||||
}
|
||||
|
||||
void DynSuffixArray::Save(FILE* fout)
|
||||
{
|
||||
|
@ -11,28 +11,28 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
using namespace std;
|
||||
typedef std::vector<unsigned> vuint_t;
|
||||
using namespace std;
|
||||
typedef std::vector<unsigned> vuint_t;
|
||||
|
||||
|
||||
/// compare position /i/ in the suffix array /m_sfa/ into corpus /m_crp/
|
||||
/// against reference phrase /phrase/
|
||||
// added by Ulrich Germann
|
||||
class ComparePosition
|
||||
{
|
||||
vuint_t const& m_crp;
|
||||
vuint_t const& m_sfa;
|
||||
/// compare position /i/ in the suffix array /m_sfa/ into corpus /m_crp/
|
||||
/// against reference phrase /phrase/
|
||||
// added by Ulrich Germann
|
||||
class ComparePosition
|
||||
{
|
||||
vuint_t const& m_crp;
|
||||
vuint_t const& m_sfa;
|
||||
|
||||
public:
|
||||
ComparePosition(vuint_t const& crp, vuint_t const& sfa);
|
||||
bool operator()(unsigned const& i, vector<wordID_t> const& phrase) const;
|
||||
bool operator()(vector<wordID_t> const& phrase, unsigned const& i) const;
|
||||
};
|
||||
|
||||
public:
|
||||
ComparePosition(vuint_t const& crp, vuint_t const& sfa);
|
||||
bool operator()(unsigned const& i, vector<wordID_t> const& phrase) const;
|
||||
bool operator()(vector<wordID_t> const& phrase, unsigned const& i) const;
|
||||
};
|
||||
|
||||
|
||||
/** @todo ask Abbey Levenberg
|
||||
*/
|
||||
class DynSuffixArray
|
||||
class DynSuffixArray
|
||||
{
|
||||
|
||||
public:
|
||||
@ -64,8 +64,8 @@ private:
|
||||
void PrintAuxArrays() {
|
||||
std::cerr << "SA\tISA\tF\tL\n";
|
||||
for(size_t i=0; i < m_SA->size(); ++i)
|
||||
std::cerr << m_SA->at(i) << "\t" << m_ISA->at(i) << "\t"
|
||||
<< m_F->at(i) << "\t" << m_L->at(i) << std::endl;
|
||||
std::cerr << m_SA->at(i) << "\t" << m_ISA->at(i) << "\t"
|
||||
<< m_F->at(i) << "\t" << m_L->at(i) << std::endl;
|
||||
}
|
||||
};
|
||||
} //end namespace
|
||||
|
@ -10,10 +10,10 @@ namespace Moses
|
||||
{
|
||||
PhraseDictionaryDynSuffixArray::
|
||||
PhraseDictionaryDynSuffixArray(const std::string &line)
|
||||
: PhraseDictionary("PhraseDictionaryDynSuffixArray", line)
|
||||
,m_biSA(new BilingualDynSuffixArray())
|
||||
: PhraseDictionary("PhraseDictionaryDynSuffixArray", line)
|
||||
,m_biSA(new BilingualDynSuffixArray())
|
||||
{
|
||||
ReadParameters();
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
|
||||
@ -48,17 +48,16 @@ const TargetPhraseCollection*
|
||||
PhraseDictionaryDynSuffixArray::
|
||||
GetTargetPhraseCollection(const Phrase& src) const
|
||||
{
|
||||
typedef map<SAPhrase, vector<float> >::value_type pstat_entry;
|
||||
typedef map<SAPhrase, vector<float> >::value_type pstat_entry;
|
||||
map<SAPhrase, vector<float> > pstats; // phrase (pair) statistics
|
||||
m_biSA->GatherCands(src,pstats);
|
||||
|
||||
TargetPhraseCollection *ret = new TargetPhraseCollection();
|
||||
BOOST_FOREACH(pstat_entry & e, pstats)
|
||||
{
|
||||
TargetPhrase* tp = m_biSA->GetMosesFactorIDs(e.first, src);
|
||||
tp->GetScoreBreakdown().Assign(this,e.second);
|
||||
ret->Add(tp);
|
||||
}
|
||||
BOOST_FOREACH(pstat_entry & e, pstats) {
|
||||
TargetPhrase* tp = m_biSA->GetMosesFactorIDs(e.first, src);
|
||||
tp->GetScoreBreakdown().Assign(this,e.second);
|
||||
ret->Add(tp);
|
||||
}
|
||||
// return ret;
|
||||
// TargetPhraseCollection *ret = new TargetPhraseCollection();
|
||||
// std::vector< std::pair< Scores, TargetPhrase*> > trg;
|
||||
@ -80,15 +79,15 @@ GetTargetPhraseCollection(const Phrase& src) const
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
void
|
||||
PhraseDictionaryDynSuffixArray::
|
||||
insertSnt(string& source, string& target, string& alignment)
|
||||
{
|
||||
m_biSA->addSntPair(source, target, alignment); // insert sentence pair into suffix arrays
|
||||
//StaticData::Instance().ClearTransOptionCache(); // clear translation option cache
|
||||
//StaticData::Instance().ClearTransOptionCache(); // clear translation option cache
|
||||
}
|
||||
|
||||
void
|
||||
void
|
||||
PhraseDictionaryDynSuffixArray::
|
||||
deleteSnt(unsigned /* idx */, unsigned /* num2Del */)
|
||||
{
|
||||
|
@ -31,7 +31,8 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
PhraseDictionaryOnDisk::PhraseDictionaryOnDisk(const std::string &line)
|
||||
: MyBase("PhraseDictionaryOnDisk", line) {
|
||||
: MyBase("PhraseDictionaryOnDisk", line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
|
@ -1,72 +1,72 @@
|
||||
#include "moses/TranslationModel/WordCoocTable.h"
|
||||
using namespace std;
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
WordCoocTable::
|
||||
WordCoocTable()
|
||||
{
|
||||
m_cooc.reserve(1000000);
|
||||
m_marg1.reserve(1000000);
|
||||
m_marg2.reserve(1000000);
|
||||
}
|
||||
|
||||
WordCoocTable::
|
||||
WordCoocTable(wordID_t const VocabSize1, wordID_t const VocabSize2)
|
||||
: m_cooc(VocabSize1), m_marg1(VocabSize1,0), m_marg2(VocabSize2, 0)
|
||||
{}
|
||||
|
||||
void
|
||||
WordCoocTable::
|
||||
Count(size_t const a, size_t const b)
|
||||
{
|
||||
while (a >= m_marg1.size())
|
||||
{
|
||||
m_cooc.push_back(my_map_t());
|
||||
m_marg1.push_back(0);
|
||||
}
|
||||
while (b >= m_marg2.size())
|
||||
m_marg2.push_back(0);
|
||||
++m_marg1[a];
|
||||
++m_marg2[b];
|
||||
++m_cooc[a][b];
|
||||
}
|
||||
|
||||
uint32_t
|
||||
WordCoocTable::
|
||||
GetJoint(size_t const a, size_t const b) const
|
||||
{
|
||||
if (a >= m_marg1.size() || b >= m_marg2.size()) return 0;
|
||||
my_map_t::const_iterator m = m_cooc.at(a).find(b);
|
||||
if (m == m_cooc[a].end()) return 0;
|
||||
return m->second;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
WordCoocTable::
|
||||
GetMarg1(size_t const x) const
|
||||
{
|
||||
return x >= m_marg1.size() ? 0 : m_marg1[x];
|
||||
}
|
||||
|
||||
uint32_t
|
||||
WordCoocTable::
|
||||
GetMarg2(size_t const x) const
|
||||
{
|
||||
return x >= m_marg2.size() ? 0 : m_marg2[x];
|
||||
}
|
||||
|
||||
float
|
||||
WordCoocTable::
|
||||
pfwd(size_t const a, size_t const b) const
|
||||
{
|
||||
return float(GetJoint(a,b))/GetMarg1(a);
|
||||
}
|
||||
|
||||
float
|
||||
WordCoocTable::
|
||||
pbwd(size_t const a, size_t const b) const
|
||||
{
|
||||
// cerr << "at " << __FILE__ << ":" << __LINE__ << endl;
|
||||
return float(GetJoint(a,b))/GetMarg2(b);
|
||||
}
|
||||
WordCoocTable::
|
||||
WordCoocTable()
|
||||
{
|
||||
m_cooc.reserve(1000000);
|
||||
m_marg1.reserve(1000000);
|
||||
m_marg2.reserve(1000000);
|
||||
}
|
||||
|
||||
WordCoocTable::
|
||||
WordCoocTable(wordID_t const VocabSize1, wordID_t const VocabSize2)
|
||||
: m_cooc(VocabSize1), m_marg1(VocabSize1,0), m_marg2(VocabSize2, 0)
|
||||
{}
|
||||
|
||||
void
|
||||
WordCoocTable::
|
||||
Count(size_t const a, size_t const b)
|
||||
{
|
||||
while (a >= m_marg1.size()) {
|
||||
m_cooc.push_back(my_map_t());
|
||||
m_marg1.push_back(0);
|
||||
}
|
||||
while (b >= m_marg2.size())
|
||||
m_marg2.push_back(0);
|
||||
++m_marg1[a];
|
||||
++m_marg2[b];
|
||||
++m_cooc[a][b];
|
||||
}
|
||||
|
||||
uint32_t
|
||||
WordCoocTable::
|
||||
GetJoint(size_t const a, size_t const b) const
|
||||
{
|
||||
if (a >= m_marg1.size() || b >= m_marg2.size()) return 0;
|
||||
my_map_t::const_iterator m = m_cooc.at(a).find(b);
|
||||
if (m == m_cooc[a].end()) return 0;
|
||||
return m->second;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
WordCoocTable::
|
||||
GetMarg1(size_t const x) const
|
||||
{
|
||||
return x >= m_marg1.size() ? 0 : m_marg1[x];
|
||||
}
|
||||
|
||||
uint32_t
|
||||
WordCoocTable::
|
||||
GetMarg2(size_t const x) const
|
||||
{
|
||||
return x >= m_marg2.size() ? 0 : m_marg2[x];
|
||||
}
|
||||
|
||||
float
|
||||
WordCoocTable::
|
||||
pfwd(size_t const a, size_t const b) const
|
||||
{
|
||||
return float(GetJoint(a,b))/GetMarg1(a);
|
||||
}
|
||||
|
||||
float
|
||||
WordCoocTable::
|
||||
pbwd(size_t const a, size_t const b) const
|
||||
{
|
||||
// cerr << "at " << __FILE__ << ":" << __LINE__ << endl;
|
||||
return float(GetJoint(a,b))/GetMarg2(b);
|
||||
}
|
||||
}
|
||||
|
@ -10,63 +10,63 @@
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <map>
|
||||
|
||||
namespace Moses {
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
using namespace std;
|
||||
using namespace std;
|
||||
|
||||
#ifndef bitvector
|
||||
typedef boost::dynamic_bitset<uint64_t> bitvector;
|
||||
typedef boost::dynamic_bitset<uint64_t> bitvector;
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Stores word cooccurrence counts
|
||||
* @todo ask Uli Germann
|
||||
*/
|
||||
class WordCoocTable
|
||||
{
|
||||
typedef map<wordID_t,uint32_t> my_map_t;
|
||||
vector<my_map_t> m_cooc;
|
||||
vector<uint32_t> m_marg1;
|
||||
vector<uint32_t> m_marg2;
|
||||
public:
|
||||
WordCoocTable();
|
||||
WordCoocTable(wordID_t const VocabSize1, wordID_t const VocabSize2);
|
||||
uint32_t GetJoint(size_t const a, size_t const b) const;
|
||||
uint32_t GetMarg1(size_t const x) const;
|
||||
uint32_t GetMarg2(size_t const x) const;
|
||||
float pfwd(size_t const a, size_t const b) const;
|
||||
float pbwd(size_t const a, size_t const b) const;
|
||||
void
|
||||
Count(size_t const a, size_t const b);
|
||||
|
||||
template<typename idvec, typename alnvec>
|
||||
void
|
||||
Count(idvec const& s1, idvec const& s2, alnvec const& aln,
|
||||
wordID_t const NULL1, wordID_t const NULL2);
|
||||
|
||||
};
|
||||
/**
|
||||
* Stores word cooccurrence counts
|
||||
* @todo ask Uli Germann
|
||||
*/
|
||||
class WordCoocTable
|
||||
{
|
||||
typedef map<wordID_t,uint32_t> my_map_t;
|
||||
vector<my_map_t> m_cooc;
|
||||
vector<uint32_t> m_marg1;
|
||||
vector<uint32_t> m_marg2;
|
||||
public:
|
||||
WordCoocTable();
|
||||
WordCoocTable(wordID_t const VocabSize1, wordID_t const VocabSize2);
|
||||
uint32_t GetJoint(size_t const a, size_t const b) const;
|
||||
uint32_t GetMarg1(size_t const x) const;
|
||||
uint32_t GetMarg2(size_t const x) const;
|
||||
float pfwd(size_t const a, size_t const b) const;
|
||||
float pbwd(size_t const a, size_t const b) const;
|
||||
void
|
||||
Count(size_t const a, size_t const b);
|
||||
|
||||
template<typename idvec, typename alnvec>
|
||||
void
|
||||
WordCoocTable::
|
||||
Count(idvec const& s1, idvec const& s2, alnvec const& aln,
|
||||
wordID_t const NULL1, wordID_t const NULL2)
|
||||
{
|
||||
boost::dynamic_bitset<uint64_t> check1(s1.size()), check2(s2.size());
|
||||
check1.set();
|
||||
check2.set();
|
||||
for (size_t i = 0; i < aln.size(); i += 2)
|
||||
{
|
||||
Count(s1[aln[i]], s2[aln[i+1]]);
|
||||
check1.reset(aln[i]);
|
||||
check2.reset(aln[i+1]);
|
||||
}
|
||||
for (size_t i = check1.find_first(); i < check1.size(); i = check1.find_next(i))
|
||||
Count(s1[i], NULL2);
|
||||
for (size_t i = check2.find_first(); i < check2.size(); i = check2.find_next(i))
|
||||
Count(NULL1, s2[i]);
|
||||
void
|
||||
Count(idvec const& s1, idvec const& s2, alnvec const& aln,
|
||||
wordID_t const NULL1, wordID_t const NULL2);
|
||||
|
||||
};
|
||||
|
||||
template<typename idvec, typename alnvec>
|
||||
void
|
||||
WordCoocTable::
|
||||
Count(idvec const& s1, idvec const& s2, alnvec const& aln,
|
||||
wordID_t const NULL1, wordID_t const NULL2)
|
||||
{
|
||||
boost::dynamic_bitset<uint64_t> check1(s1.size()), check2(s2.size());
|
||||
check1.set();
|
||||
check2.set();
|
||||
for (size_t i = 0; i < aln.size(); i += 2) {
|
||||
Count(s1[aln[i]], s2[aln[i+1]]);
|
||||
check1.reset(aln[i]);
|
||||
check2.reset(aln[i+1]);
|
||||
}
|
||||
for (size_t i = check1.find_first(); i < check1.size(); i = check1.find_next(i))
|
||||
Count(s1[i], NULL2);
|
||||
for (size_t i = check2.find_first(); i < check2.size(); i = check2.find_next(i))
|
||||
Count(NULL1, s2[i]);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -1,55 +1,51 @@
|
||||
#ifndef __sampling_h
|
||||
#define __sampling_h
|
||||
|
||||
// Utility functions for proper sub-sampling.
|
||||
// Utility functions for proper sub-sampling.
|
||||
// (c) 2007-2012 Ulrich Germann
|
||||
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
inline
|
||||
size_t
|
||||
randInt(size_t N)
|
||||
{
|
||||
return N*(rand()/(RAND_MAX+1.));
|
||||
}
|
||||
inline
|
||||
size_t
|
||||
randInt(size_t N)
|
||||
{
|
||||
return N*(rand()/(RAND_MAX+1.));
|
||||
}
|
||||
|
||||
// select a random sample of size /s/ without restitution from the range of
|
||||
// integers [0,N);
|
||||
template<typename idx_t>
|
||||
void
|
||||
randomSample(vector<idx_t>& v, size_t s, size_t N)
|
||||
{
|
||||
// see also Knuth: Art of Computer Programming Vol. 2, p. 142
|
||||
|
||||
s = min(s,N);
|
||||
v.resize(s);
|
||||
// select a random sample of size /s/ without restitution from the range of
|
||||
// integers [0,N);
|
||||
template<typename idx_t>
|
||||
void
|
||||
randomSample(vector<idx_t>& v, size_t s, size_t N)
|
||||
{
|
||||
// see also Knuth: Art of Computer Programming Vol. 2, p. 142
|
||||
|
||||
// the first option tries to be a bit more efficient than O(N) in picking
|
||||
// the samples. The threshold is an ad-hoc, off-the-cuff guess. I still
|
||||
// need to figure out the optimal break-even point between a linear sweep
|
||||
// and repeatedly picking random numbers with the risk of hitting the same
|
||||
// number many times.
|
||||
if (s*10<N)
|
||||
{
|
||||
boost::dynamic_bitset<uint64_t> check(N,0);
|
||||
for (size_t i = 0; i < v.size(); i++)
|
||||
{
|
||||
size_t x = randInt(N);
|
||||
while (check[x]) x = randInt(N);
|
||||
check[x]=true;
|
||||
v[i] = x;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t m=0;
|
||||
for (size_t t = 0; m <= s && t < N; t++)
|
||||
if (s==N || randInt(N-t) < s-m) v[m++] = t;
|
||||
}
|
||||
s = min(s,N);
|
||||
v.resize(s);
|
||||
|
||||
// the first option tries to be a bit more efficient than O(N) in picking
|
||||
// the samples. The threshold is an ad-hoc, off-the-cuff guess. I still
|
||||
// need to figure out the optimal break-even point between a linear sweep
|
||||
// and repeatedly picking random numbers with the risk of hitting the same
|
||||
// number many times.
|
||||
if (s*10<N) {
|
||||
boost::dynamic_bitset<uint64_t> check(N,0);
|
||||
for (size_t i = 0; i < v.size(); i++) {
|
||||
size_t x = randInt(N);
|
||||
while (check[x]) x = randInt(N);
|
||||
check[x]=true;
|
||||
v[i] = x;
|
||||
}
|
||||
} else {
|
||||
size_t m=0;
|
||||
for (size_t t = 0; m <= s && t < N; t++)
|
||||
if (s==N || randInt(N-t) < s-m) v[m++] = t;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -11,79 +11,75 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
template<typename THINGY, typename CMP>
|
||||
class
|
||||
using namespace std;
|
||||
|
||||
template<typename THINGY, typename CMP>
|
||||
class
|
||||
NBestList
|
||||
{
|
||||
vector<uint32_t> m_heap;
|
||||
vector<THINGY> m_list;
|
||||
VectorIndexSorter<THINGY, CMP, uint32_t> m_better;
|
||||
mutable vector<uint32_t> m_order;
|
||||
mutable bool m_changed;
|
||||
public:
|
||||
NBestList(size_t const max_size, CMP const& cmp);
|
||||
NBestList(size_t const max_size);
|
||||
bool add(THINGY const& item);
|
||||
THINGY const& operator[](int i) const;
|
||||
size_t size() const { return m_heap.size(); }
|
||||
};
|
||||
|
||||
template<typename THINGY, typename CMP>
|
||||
NBestList<THINGY,CMP>::
|
||||
NBestList(size_t const max_size, CMP const& cmp)
|
||||
: m_better(m_list, cmp), m_changed(false)
|
||||
{
|
||||
m_heap.reserve(max_size);
|
||||
{
|
||||
vector<uint32_t> m_heap;
|
||||
vector<THINGY> m_list;
|
||||
VectorIndexSorter<THINGY, CMP, uint32_t> m_better;
|
||||
mutable vector<uint32_t> m_order;
|
||||
mutable bool m_changed;
|
||||
public:
|
||||
NBestList(size_t const max_size, CMP const& cmp);
|
||||
NBestList(size_t const max_size);
|
||||
bool add(THINGY const& item);
|
||||
THINGY const& operator[](int i) const;
|
||||
size_t size() const {
|
||||
return m_heap.size();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename THINGY, typename CMP>
|
||||
NBestList<THINGY,CMP>::
|
||||
NBestList(size_t const max_size)
|
||||
: m_better(m_heap), m_changed(false)
|
||||
{
|
||||
m_heap.reserve(max_size);
|
||||
}
|
||||
template<typename THINGY, typename CMP>
|
||||
NBestList<THINGY,CMP>::
|
||||
NBestList(size_t const max_size, CMP const& cmp)
|
||||
: m_better(m_list, cmp), m_changed(false)
|
||||
{
|
||||
m_heap.reserve(max_size);
|
||||
}
|
||||
|
||||
template<typename THINGY, typename CMP>
|
||||
bool
|
||||
NBestList<THINGY,CMP>::
|
||||
add(THINGY const& item)
|
||||
{
|
||||
if (m_heap.size() == m_heap.capacity())
|
||||
{
|
||||
if (m_better.Compare(item, m_list[m_heap.at(0)]))
|
||||
{
|
||||
pop_heap(m_heap.begin(),m_heap.end(),m_better);
|
||||
m_list[m_heap.back()] = item;
|
||||
}
|
||||
else return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_list.push_back(item);
|
||||
m_heap.push_back(m_heap.size());
|
||||
}
|
||||
push_heap(m_heap.begin(),m_heap.end(),m_better);
|
||||
return m_changed = true;
|
||||
}
|
||||
template<typename THINGY, typename CMP>
|
||||
NBestList<THINGY,CMP>::
|
||||
NBestList(size_t const max_size)
|
||||
: m_better(m_heap), m_changed(false)
|
||||
{
|
||||
m_heap.reserve(max_size);
|
||||
}
|
||||
|
||||
template<typename THINGY, typename CMP>
|
||||
THINGY const&
|
||||
NBestList<THINGY,CMP>::
|
||||
operator[](int i) const
|
||||
{
|
||||
if (m_changed)
|
||||
{
|
||||
m_order.assign(m_heap.begin(),m_heap.end());
|
||||
for (size_t k = m_heap.size(); k != 0; --k)
|
||||
pop_heap(m_order.begin(), m_order.begin()+k);
|
||||
m_changed = false;
|
||||
}
|
||||
if (i < 0) i += m_order.size();
|
||||
return m_list[m_order.at(i)];
|
||||
template<typename THINGY, typename CMP>
|
||||
bool
|
||||
NBestList<THINGY,CMP>::
|
||||
add(THINGY const& item)
|
||||
{
|
||||
if (m_heap.size() == m_heap.capacity()) {
|
||||
if (m_better.Compare(item, m_list[m_heap.at(0)])) {
|
||||
pop_heap(m_heap.begin(),m_heap.end(),m_better);
|
||||
m_list[m_heap.back()] = item;
|
||||
} else return false;
|
||||
} else {
|
||||
m_list.push_back(item);
|
||||
m_heap.push_back(m_heap.size());
|
||||
}
|
||||
|
||||
push_heap(m_heap.begin(),m_heap.end(),m_better);
|
||||
return m_changed = true;
|
||||
}
|
||||
|
||||
template<typename THINGY, typename CMP>
|
||||
THINGY const&
|
||||
NBestList<THINGY,CMP>::
|
||||
operator[](int i) const
|
||||
{
|
||||
if (m_changed) {
|
||||
m_order.assign(m_heap.begin(),m_heap.end());
|
||||
for (size_t k = m_heap.size(); k != 0; --k)
|
||||
pop_heap(m_order.begin(), m_order.begin()+k);
|
||||
m_changed = false;
|
||||
}
|
||||
if (i < 0) i += m_order.size();
|
||||
return m_list[m_order.at(i)];
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -9,62 +9,61 @@
|
||||
// typcial use:
|
||||
// vector<thingy> my_vector;
|
||||
// VectorIndexSorter<thingy,less<thingy>,int> sorter(my_vector);
|
||||
// vector<int> order;
|
||||
// vector<int> order;
|
||||
// sorter.get_order(order);
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
template<typename VAL, typename COMP = greater<VAL>, typename IDX_T=size_t>
|
||||
class
|
||||
template<typename VAL, typename COMP = greater<VAL>, typename IDX_T=size_t>
|
||||
class
|
||||
VectorIndexSorter : public binary_function<IDX_T const&, IDX_T const&, bool>
|
||||
{
|
||||
vector<VAL> const& m_vecref;
|
||||
boost::shared_ptr<COMP> m_comp;
|
||||
public:
|
||||
|
||||
COMP const& Compare;
|
||||
VectorIndexSorter(vector<VAL> const& v, COMP const& comp)
|
||||
: m_vecref(v), Compare(comp)
|
||||
{ }
|
||||
|
||||
VectorIndexSorter(vector<VAL> const& v)
|
||||
: m_vecref(v), m_comp(new COMP()), Compare(*m_comp)
|
||||
{ }
|
||||
|
||||
bool operator()(IDX_T const & a, IDX_T const & b) const
|
||||
{
|
||||
bool fwd = Compare(m_vecref.at(a) ,m_vecref.at(b));
|
||||
bool bwd = Compare(m_vecref[b], m_vecref[a]);
|
||||
return (fwd == bwd ? a < b : fwd);
|
||||
}
|
||||
|
||||
boost::shared_ptr<vector<IDX_T> >
|
||||
GetOrder() const;
|
||||
|
||||
void
|
||||
GetOrder(vector<IDX_T> & order) const;
|
||||
|
||||
};
|
||||
{
|
||||
vector<VAL> const& m_vecref;
|
||||
boost::shared_ptr<COMP> m_comp;
|
||||
public:
|
||||
|
||||
COMP const& Compare;
|
||||
VectorIndexSorter(vector<VAL> const& v, COMP const& comp)
|
||||
: m_vecref(v), Compare(comp)
|
||||
{ }
|
||||
|
||||
VectorIndexSorter(vector<VAL> const& v)
|
||||
: m_vecref(v), m_comp(new COMP()), Compare(*m_comp)
|
||||
{ }
|
||||
|
||||
bool operator()(IDX_T const & a, IDX_T const & b) const {
|
||||
bool fwd = Compare(m_vecref.at(a) ,m_vecref.at(b));
|
||||
bool bwd = Compare(m_vecref[b], m_vecref[a]);
|
||||
return (fwd == bwd ? a < b : fwd);
|
||||
}
|
||||
|
||||
template<typename VAL, typename COMP, typename IDX_T>
|
||||
boost::shared_ptr<vector<IDX_T> >
|
||||
VectorIndexSorter<VAL,COMP,IDX_T>::
|
||||
GetOrder() const
|
||||
{
|
||||
boost::shared_ptr<vector<IDX_T> > ret(new vector<IDX_T>(m_vecref.size()));
|
||||
get_order(*ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename VAL, typename COMP, typename IDX_T>
|
||||
GetOrder() const;
|
||||
|
||||
void
|
||||
VectorIndexSorter<VAL,COMP,IDX_T>::
|
||||
GetOrder(vector<IDX_T> & order) const
|
||||
{
|
||||
order.resize(m_vecref.size());
|
||||
for (IDX_T i = 0; i < IDX_T(m_vecref.size()); ++i) order[i] = i;
|
||||
sort(order.begin(), order.end(), *this);
|
||||
}
|
||||
GetOrder(vector<IDX_T> & order) const;
|
||||
|
||||
};
|
||||
|
||||
template<typename VAL, typename COMP, typename IDX_T>
|
||||
boost::shared_ptr<vector<IDX_T> >
|
||||
VectorIndexSorter<VAL,COMP,IDX_T>::
|
||||
GetOrder() const
|
||||
{
|
||||
boost::shared_ptr<vector<IDX_T> > ret(new vector<IDX_T>(m_vecref.size()));
|
||||
get_order(*ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename VAL, typename COMP, typename IDX_T>
|
||||
void
|
||||
VectorIndexSorter<VAL,COMP,IDX_T>::
|
||||
GetOrder(vector<IDX_T> & order) const
|
||||
{
|
||||
order.resize(m_vecref.size());
|
||||
for (IDX_T i = 0; i < IDX_T(m_vecref.size()); ++i) order[i] = i;
|
||||
sort(order.begin(), order.end(), *this);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user