mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 12:52:29 +03:00
port PhraseDictionaryMultiModel to new format
This commit is contained in:
parent
1f5fc77c94
commit
ed7ab8146f
@ -1441,6 +1441,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynSuffixArray.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/PhraseDictionaryMultiModel.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMultiModel.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/PhraseDictionaryMultiModel.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMultiModel.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/PhraseDictionaryTree.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include <string>
|
||||
|
||||
#ifdef WITH_THREADS
|
||||
#include <boost/thread.hpp>
|
||||
#include <boost/thread/mutex.hpp>
|
||||
#endif
|
||||
|
||||
@ -700,6 +701,40 @@ public:
|
||||
void CollectFeatureFunctions();
|
||||
bool CheckWeights() const;
|
||||
|
||||
|
||||
void SetTemporaryMultiModelWeightsVector(std::vector<float> weights) const {
|
||||
#ifdef WITH_THREADS
|
||||
m_multimodelweights_tmp[boost::this_thread::get_id()] = weights;
|
||||
#else
|
||||
m_multimodelweights_tmp = weights;
|
||||
#endif
|
||||
}
|
||||
|
||||
// multimodel
|
||||
std::vector<float> m_multimodelweights;
|
||||
#ifdef WITH_THREADS
|
||||
mutable std::map<boost::thread::id, std::vector<float> > m_multimodelweights_tmp;
|
||||
#else
|
||||
mutable std::vector<float> m_multimodelweights_tmp;
|
||||
#endif
|
||||
|
||||
const std::vector<float>* GetMultiModelWeightsVector() const {
|
||||
return &m_multimodelweights;
|
||||
}
|
||||
|
||||
const std::vector<float>* GetTemporaryMultiModelWeightsVector() const {
|
||||
#ifdef WITH_THREADS
|
||||
if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) {
|
||||
return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second;
|
||||
}
|
||||
else {
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
return &m_multimodelweights_tmp;
|
||||
#endif
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -25,10 +25,9 @@ using namespace std;
|
||||
namespace Moses
|
||||
|
||||
{
|
||||
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(size_t numScoreComponent,
|
||||
PhraseDictionaryFeature* feature): PhraseDictionary(numScoreComponent, feature)
|
||||
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
|
||||
:PhraseDictionary("PhraseDictionaryMultiModel", line)
|
||||
{
|
||||
m_feature_load = feature;
|
||||
}
|
||||
|
||||
PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel()
|
||||
@ -45,6 +44,7 @@ bool PhraseDictionaryMultiModel::Load(const std::vector<FactorType> &input
|
||||
, const LMList &languageModels
|
||||
, float weightWP)
|
||||
{
|
||||
/*
|
||||
m_languageModels = &languageModels;
|
||||
m_weight = weight;
|
||||
m_weightWP = weightWP;
|
||||
@ -63,7 +63,7 @@ bool PhraseDictionaryMultiModel::Load(const std::vector<FactorType> &input
|
||||
|
||||
//how many actual scores there are in the phrase tables
|
||||
//so far, equal to number of log-linear scores, but it is allowed to be smaller (for other combination types)
|
||||
size_t numPtScores = m_numScoreComponent;
|
||||
size_t numPtScores = m_numScoreComponents;
|
||||
|
||||
if (m_mode != "interpolate") {
|
||||
ostringstream msg;
|
||||
@ -88,18 +88,18 @@ bool PhraseDictionaryMultiModel::Load(const std::vector<FactorType> &input
|
||||
|
||||
if (!FileExists(file) && FileExists(file + ".gz")) file += ".gz";
|
||||
|
||||
PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent, m_feature_load);
|
||||
PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponents, m_feature_load);
|
||||
pdm->SetNumScoreComponentMultiModel(numPtScores); //instead of complaining about inequal number of scores, silently fill up the score vector with zeroes
|
||||
pdm->Load( input, output, file, m_weight, m_componentTableLimit, languageModels, m_weightWP);
|
||||
m_pd.push_back(pdm);
|
||||
} else if (implementation == Binary) {
|
||||
PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, numInputScores , m_feature_load);
|
||||
PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponents, numInputScores , m_feature_load);
|
||||
pdta->Load(input, output, file, m_weight, m_componentTableLimit, languageModels, m_weightWP);
|
||||
m_pd.push_back(pdta);
|
||||
} else if (implementation == Compact) {
|
||||
#ifndef WIN32
|
||||
PhraseDictionaryCompact* pdc = new PhraseDictionaryCompact(m_numScoreComponent, implementation, m_feature_load);
|
||||
pdc->SetNumScoreComponentMultiModel(m_numScoreComponent); //for compact models, we need to pass number of log-linear components to correctly resize the score vector
|
||||
PhraseDictionaryCompact* pdc = new PhraseDictionaryCompact(m_numScoreComponents, implementation, m_feature_load);
|
||||
pdc->SetNumScoreComponentMultiModel(m_numScoreComponents); //for compact models, we need to pass number of log-linear components to correctly resize the score vector
|
||||
pdc->Load( input, output, file, m_weight, m_componentTableLimit, languageModels, m_weightWP);
|
||||
m_pd.push_back(pdc);
|
||||
#else
|
||||
@ -110,6 +110,7 @@ bool PhraseDictionaryMultiModel::Load(const std::vector<FactorType> &input
|
||||
UTIL_THROW(util::Exception,"PhraseDictionaryMultiModel does not support phrase table type " << implementation);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -122,7 +123,7 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect
|
||||
|
||||
if (m_mode == "interpolate") {
|
||||
//interpolation of phrase penalty is skipped, and fixed-value (2.718) is used instead. results will be screwed up if phrase penalty is not last feature
|
||||
size_t numWeights = m_numScoreComponent-1;
|
||||
size_t numWeights = m_numScoreComponents-1;
|
||||
multimodelweights = getWeights(numWeights, true);
|
||||
}
|
||||
|
||||
@ -161,7 +162,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
||||
|
||||
for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != iterLast; ++iterTargetPhrase) {
|
||||
TargetPhrase * targetPhrase = *iterTargetPhrase;
|
||||
std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(m_feature);
|
||||
std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(this);
|
||||
|
||||
std::string targetString = targetPhrase->GetStringRep(m_output);
|
||||
if (allStats->find(targetString) == allStats->end()) {
|
||||
@ -169,21 +170,21 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
||||
multiModelStatistics * statistics = new multiModelStatistics;
|
||||
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
|
||||
|
||||
Scores scoreVector(m_numScoreComponent);
|
||||
statistics->p.resize(m_numScoreComponent);
|
||||
for(size_t j = 0; j < m_numScoreComponent; ++j){
|
||||
Scores scoreVector(m_numScoreComponents);
|
||||
statistics->p.resize(m_numScoreComponents);
|
||||
for(size_t j = 0; j < m_numScoreComponents; ++j){
|
||||
statistics->p[j].resize(m_numModels);
|
||||
scoreVector[j] = -raw_scores[j];
|
||||
}
|
||||
|
||||
statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels); // set scores to 0
|
||||
statistics->targetPhrase->SetScore(this, scoreVector); // set scores to 0
|
||||
|
||||
(*allStats)[targetString] = statistics;
|
||||
|
||||
}
|
||||
multiModelStatistics * statistics = (*allStats)[targetString];
|
||||
|
||||
for(size_t j = 0; j < m_numScoreComponent; ++j){
|
||||
for(size_t j = 0; j < m_numScoreComponents; ++j){
|
||||
statistics->p[j][i] = UntransformScore(raw_scores[j]);
|
||||
}
|
||||
|
||||
@ -201,16 +202,16 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection
|
||||
|
||||
multiModelStatistics * statistics = iter->second;
|
||||
|
||||
Scores scoreVector(m_numScoreComponent);
|
||||
Scores scoreVector(m_numScoreComponents);
|
||||
|
||||
for(size_t i = 0; i < m_numScoreComponent-1; ++i){
|
||||
for(size_t i = 0; i < m_numScoreComponents-1; ++i){
|
||||
scoreVector[i] = TransformScore(std::inner_product(statistics->p[i].begin(), statistics->p[i].end(), multimodelweights[i].begin(), 0.0));
|
||||
}
|
||||
|
||||
//assuming that last value is phrase penalty
|
||||
scoreVector[m_numScoreComponent-1] = 1.0;
|
||||
scoreVector[m_numScoreComponents-1] = 1.0;
|
||||
|
||||
statistics->targetPhrase->SetScore(m_feature, scoreVector, ScoreComponentCollection(), m_weight, m_weightWP, *m_languageModels);
|
||||
statistics->targetPhrase->SetScore(this, scoreVector);
|
||||
ret->Add(new TargetPhrase(*statistics->targetPhrase));
|
||||
}
|
||||
return ret;
|
||||
@ -303,7 +304,7 @@ void PhraseDictionaryMultiModel::CacheForCleanup(TargetPhraseCollection* tpc) {
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryMultiModel::CleanUp(const InputType &source) {
|
||||
void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType &source) {
|
||||
#ifdef WITH_THREADS
|
||||
boost::mutex::scoped_lock lock(m_sentenceMutex);
|
||||
PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()];
|
||||
@ -327,7 +328,7 @@ void PhraseDictionaryMultiModel::CleanUp(const InputType &source) {
|
||||
|
||||
void PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source) {
|
||||
for(size_t i = 0; i < m_numModels; ++i){
|
||||
m_pd[i]->CleanUp(source);
|
||||
m_pd[i]->CleanUpAfterSentenceProcessing(source);
|
||||
}
|
||||
}
|
||||
|
||||
@ -380,10 +381,10 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
|
||||
Sentence sentence;
|
||||
CleanUp(sentence); // free memory used by compact phrase tables
|
||||
|
||||
size_t numWeights = m_numScoreComponent;
|
||||
size_t numWeights = m_numScoreComponents;
|
||||
if (m_mode == "interpolate") {
|
||||
//interpolation of phrase penalty is skipped, and fixed-value (2.718) is used instead. results will be screwed up if phrase penalty is not last feature
|
||||
numWeights = m_numScoreComponent-1;
|
||||
numWeights = m_numScoreComponents-1;
|
||||
}
|
||||
|
||||
vector<float> ret (m_numModels*numWeights);
|
@ -21,11 +21,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#define moses_PhraseDictionaryMultiModel_h
|
||||
|
||||
#include "moses/TranslationModel/PhraseDictionary.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
||||
#ifndef WIN32
|
||||
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
@ -62,7 +57,7 @@ friend class CrossEntropy;
|
||||
#endif
|
||||
|
||||
public:
|
||||
PhraseDictionaryMultiModel(size_t m_numScoreComponent, PhraseDictionaryFeature* feature);
|
||||
PhraseDictionaryMultiModel(const std::string &line);
|
||||
~PhraseDictionaryMultiModel();
|
||||
bool Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
@ -77,7 +72,7 @@ public:
|
||||
std::vector<std::vector<float> > getWeights(size_t numWeights, bool normalize) const;
|
||||
std::vector<float> normalizeWeights(std::vector<float> &weights) const;
|
||||
void CacheForCleanup(TargetPhraseCollection* tpc);
|
||||
void CleanUp(const InputType &source);
|
||||
void CleanUpAfterSentenceProcessing(const InputType &source);
|
||||
virtual void CleanUpComponentModels(const InputType &source);
|
||||
#ifdef WITH_DLIB
|
||||
virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
|
||||
@ -100,7 +95,6 @@ protected:
|
||||
std::vector<FactorType> m_output;
|
||||
size_t m_numModels;
|
||||
size_t m_componentTableLimit;
|
||||
PhraseDictionaryFeature* m_feature_load;
|
||||
|
||||
typedef std::vector<TargetPhraseCollection*> PhraseCache;
|
||||
#ifdef WITH_THREADS
|
Loading…
Reference in New Issue
Block a user