mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
support for binary models with multimodelcounts
This commit is contained in:
parent
0450fd6776
commit
adb7de6e61
@ -292,6 +292,14 @@ protected:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (scoreVector.size() != m_obj->GetNumScoreComponentMultiModel()) {
|
||||||
|
//PhraseDictionaryMultiModel may use input phrase dictionaries with a different number of features than it is assigned in the log-linear model;
|
||||||
|
//filling extra slots with zeroes to prevent error messages on the way
|
||||||
|
if (m_obj->GetNumScoreComponentMultiModel() > 0 && scoreVector.size() < m_obj->GetNumScoreComponentMultiModel()) {
|
||||||
|
const_cast<Scores &>(scoreVector).resize(m_obj->GetNumScoreComponentMultiModel());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
targetPhrase.SetScore(m_obj->GetFeature(), scoreVector, sparseFeatures, weights, weightWP, *m_languageModels);
|
targetPhrase.SetScore(m_obj->GetFeature(), scoreVector, sparseFeatures, weights, weightWP, *m_languageModels);
|
||||||
targetPhrase.SetSourcePhrase(*srcPtr);
|
targetPhrase.SetSourcePhrase(*srcPtr);
|
||||||
}
|
}
|
||||||
|
@ -264,6 +264,7 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSyst
|
|||||||
, m_config
|
, m_config
|
||||||
, weightT
|
, weightT
|
||||||
, m_tableLimit
|
, m_tableLimit
|
||||||
|
, m_numInputScores
|
||||||
, system->GetLanguageModels()
|
, system->GetLanguageModels()
|
||||||
, system->GetWeightWordPenalty());
|
, system->GetWeightWordPenalty());
|
||||||
CHECK(ret);
|
CHECK(ret);
|
||||||
|
@ -16,6 +16,7 @@ You should have received a copy of the GNU Lesser General Public
|
|||||||
License along with this library; if not, write to the Free Software
|
License along with this library; if not, write to the Free Software
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
|
#include "util/exception.hh"
|
||||||
|
|
||||||
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
|
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
|
||||||
|
|
||||||
@ -73,6 +74,7 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
|
|||||||
, const vector<string> &config
|
, const vector<string> &config
|
||||||
, const vector<float> &weight
|
, const vector<float> &weight
|
||||||
, size_t tableLimit
|
, size_t tableLimit
|
||||||
|
, size_t numInputScores
|
||||||
, const LMList &languageModels
|
, const LMList &languageModels
|
||||||
, float weightWP)
|
, float weightWP)
|
||||||
{
|
{
|
||||||
@ -104,10 +106,7 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
|
|||||||
|
|
||||||
string delim = ":";
|
string delim = ":";
|
||||||
size_t delim_pos = files[i].find(delim);
|
size_t delim_pos = files[i].find(delim);
|
||||||
if (delim_pos >= files[i].size()) {
|
UTIL_THROW_IF(delim_pos >= files[i].size(), util::Exception, "Phrase table must be specified in this format: Implementation:Path");
|
||||||
UserMessage::Add("Phrase table must be specified in this format: Implementation:Path");
|
|
||||||
CHECK(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
impl = files[i].substr(0,delim_pos);
|
impl = files[i].substr(0,delim_pos);
|
||||||
file = files[i].substr(delim_pos+1,files[i].size());
|
file = files[i].substr(delim_pos+1,files[i].size());
|
||||||
@ -119,11 +118,11 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
|
|||||||
|
|
||||||
PhraseTableImplementation implementation = (PhraseTableImplementation) Scan<int>(impl);
|
PhraseTableImplementation implementation = (PhraseTableImplementation) Scan<int>(impl);
|
||||||
|
|
||||||
if (implementation == Memory) {
|
//how many actual scores there are in the phrase tables
|
||||||
|
size_t numScoresCounts = 3;
|
||||||
|
size_t numScoresTargetCounts = 1;
|
||||||
|
|
||||||
//how many actual scores there are in the phrase tables
|
if (implementation == Memory) {
|
||||||
size_t numScoresCounts = 3;
|
|
||||||
size_t numScoresTargetCounts = 1;
|
|
||||||
|
|
||||||
if (!FileExists(main_table) && FileExists(main_table + ".gz")) main_table += ".gz";
|
if (!FileExists(main_table) && FileExists(main_table + ".gz")) main_table += ".gz";
|
||||||
if (!FileExists(target_table) && FileExists(target_table + ".gz")) target_table += ".gz";
|
if (!FileExists(target_table) && FileExists(target_table + ".gz")) target_table += ".gz";
|
||||||
@ -137,8 +136,17 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
|
|||||||
pdm_inverse->SetNumScoreComponentMultiModel(numScoresTargetCounts);
|
pdm_inverse->SetNumScoreComponentMultiModel(numScoresTargetCounts);
|
||||||
pdm_inverse->Load( input, output, target_table, m_weight, componentTableLimit, languageModels, m_weightWP);
|
pdm_inverse->Load( input, output, target_table, m_weight, componentTableLimit, languageModels, m_weightWP);
|
||||||
m_inverse_pd.push_back(pdm_inverse);
|
m_inverse_pd.push_back(pdm_inverse);
|
||||||
}
|
} else if (implementation == Binary) {
|
||||||
else if (implementation == Compact) {
|
PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, numInputScores , m_feature_load);
|
||||||
|
pdta->SetNumScoreComponentMultiModel(m_numScoreComponent); //for binary models, we need to pass number of log-linear components to correctly resize the score vector
|
||||||
|
pdta->Load(input, output, main_table, m_weight, m_componentTableLimit, languageModels, m_weightWP);
|
||||||
|
m_pd.push_back(pdta);
|
||||||
|
|
||||||
|
PhraseDictionaryTreeAdaptor* pdta_inverse = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, numInputScores , m_feature_load);
|
||||||
|
pdta_inverse->SetNumScoreComponentMultiModel(m_numScoreComponent);
|
||||||
|
pdta_inverse->Load(input, output, target_table, m_weight, m_componentTableLimit, languageModels, m_weightWP);
|
||||||
|
m_inverse_pd.push_back(pdta_inverse);
|
||||||
|
} else if (implementation == Compact) {
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
PhraseDictionaryCompact* pdc = new PhraseDictionaryCompact(m_numScoreComponent, implementation, m_feature_load);
|
PhraseDictionaryCompact* pdc = new PhraseDictionaryCompact(m_numScoreComponent, implementation, m_feature_load);
|
||||||
pdc->SetNumScoreComponentMultiModel(m_numScoreComponent); //for compact models, we need to pass number of log-linear components to correctly resize the score vector
|
pdc->SetNumScoreComponentMultiModel(m_numScoreComponent); //for compact models, we need to pass number of log-linear components to correctly resize the score vector
|
||||||
@ -150,12 +158,11 @@ bool PhraseDictionaryMultiModelCounts::Load(const vector<FactorType> &input
|
|||||||
pdc_inverse->Load( input, output, target_table, m_weight, componentTableLimit, languageModels, m_weightWP);
|
pdc_inverse->Load( input, output, target_table, m_weight, componentTableLimit, languageModels, m_weightWP);
|
||||||
m_inverse_pd.push_back(pdc_inverse);
|
m_inverse_pd.push_back(pdc_inverse);
|
||||||
#else
|
#else
|
||||||
CHECK(false);
|
UTIL_THROW(util::Exception, "Compact phrase table not supported in windows");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
UserMessage::Add("phrase table type unknown to multi-model mode");
|
UTIL_THROW(util::Exception,"PhraseDictionaryMultiModel does not support phrase table type " << implementation);
|
||||||
CHECK(false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
lexicalTable* e2f = new lexicalTable;
|
lexicalTable* e2f = new lexicalTable;
|
||||||
@ -257,8 +264,7 @@ TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseColl
|
|||||||
multiModelCountsStatistics * statistics = iter->second;
|
multiModelCountsStatistics * statistics = iter->second;
|
||||||
|
|
||||||
if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) {
|
if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) {
|
||||||
UserMessage::Add(" alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables.");
|
UTIL_THROW(util::Exception, " alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables.");
|
||||||
CHECK(false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -619,8 +625,7 @@ double CrossEntropyCounts::operator() ( const dlib::matrix<double,0,1>& arg) con
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
score = 0;
|
score = 0;
|
||||||
UserMessage::Add("Trying to optimize feature that I don't know. Aborting");
|
UTIL_THROW(util::Exception, "Trying to optimize feature that I don't know. Aborting");
|
||||||
CHECK(false);
|
|
||||||
}
|
}
|
||||||
total -= (FloorScore(TransformScore(score))/TransformScore(2))*f;
|
total -= (FloorScore(TransformScore(score))/TransformScore(2))*f;
|
||||||
n += f;
|
n += f;
|
||||||
|
@ -22,6 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
|
|
||||||
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
|
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
|
||||||
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
|
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
|
||||||
|
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
|
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
|
||||||
#endif
|
#endif
|
||||||
@ -90,6 +91,7 @@ public:
|
|||||||
, const std::vector<std::string> &files
|
, const std::vector<std::string> &files
|
||||||
, const std::vector<float> &weight
|
, const std::vector<float> &weight
|
||||||
, size_t tableLimit
|
, size_t tableLimit
|
||||||
|
, size_t numInputScores
|
||||||
, const LMList &languageModels
|
, const LMList &languageModels
|
||||||
, float weightWP);
|
, float weightWP);
|
||||||
TargetPhraseCollection* CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
|
TargetPhraseCollection* CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
|
||||||
|
Loading…
Reference in New Issue
Block a user