2015-11-03 18:04:26 +03:00
|
|
|
/*
|
|
|
|
* ProbingPT.cpp
|
|
|
|
*
|
|
|
|
* Created on: 3 Nov 2015
|
|
|
|
* Author: hieu
|
|
|
|
*/
|
2015-12-07 21:30:17 +03:00
|
|
|
#include <boost/foreach.hpp>
|
2015-11-03 18:04:26 +03:00
|
|
|
#include "ProbingPT.h"
|
2015-11-04 16:09:53 +03:00
|
|
|
#include "../System.h"
|
|
|
|
#include "../Scores.h"
|
|
|
|
#include "../FF/FeatureFunctions.h"
|
|
|
|
#include "../Search/Manager.h"
|
2015-11-13 03:05:54 +03:00
|
|
|
#include "../legacy/FactorCollection.h"
|
|
|
|
#include "../legacy/ProbingPT/quering.hh"
|
2015-11-13 13:40:55 +03:00
|
|
|
#include "../legacy/Util2.h"
|
2015-11-03 18:04:26 +03:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2015-12-10 23:49:30 +03:00
|
|
|
namespace Moses2
|
|
|
|
{
|
|
|
|
|
2015-11-03 18:04:26 +03:00
|
|
|
ProbingPT::ProbingPT(size_t startInd, const std::string &line)
|
|
|
|
:PhraseTable(startInd, line)
|
|
|
|
{
|
|
|
|
ReadParameters();
|
|
|
|
}
|
|
|
|
|
2015-11-04 03:37:35 +03:00
|
|
|
ProbingPT::~ProbingPT()
|
|
|
|
{
|
|
|
|
delete m_engine;
|
2015-11-03 18:04:26 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void ProbingPT::Load(System &system)
|
|
|
|
{
|
|
|
|
m_engine = new QueryEngine(m_path.c_str());
|
|
|
|
|
2015-12-09 14:13:52 +03:00
|
|
|
m_unkId = 456456546456;
|
2015-11-03 18:04:26 +03:00
|
|
|
|
2015-11-18 18:33:42 +03:00
|
|
|
FactorCollection &vocab = system.GetVocab();
|
2015-11-03 18:04:26 +03:00
|
|
|
|
|
|
|
// source vocab
|
|
|
|
const std::map<uint64_t, std::string> &sourceVocab = m_engine->getSourceVocab();
|
|
|
|
std::map<uint64_t, std::string>::const_iterator iterSource;
|
|
|
|
for (iterSource = sourceVocab.begin(); iterSource != sourceVocab.end(); ++iterSource) {
|
|
|
|
const string &wordStr = iterSource->second;
|
2015-11-18 16:07:16 +03:00
|
|
|
const Factor *factor = vocab.AddFactor(wordStr, system);
|
2015-11-03 18:04:26 +03:00
|
|
|
|
|
|
|
uint64_t probingId = iterSource->first;
|
2015-12-09 14:13:52 +03:00
|
|
|
size_t factorId = factor->GetId();
|
2015-11-03 18:04:26 +03:00
|
|
|
|
2015-12-09 14:13:52 +03:00
|
|
|
if (factorId >= m_sourceVocab.size()) {
|
|
|
|
m_sourceVocab.resize(factorId + 1, m_unkId);
|
|
|
|
}
|
|
|
|
m_sourceVocab[factorId] = probingId;
|
2015-11-03 18:04:26 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// target vocab
|
|
|
|
const std::map<unsigned int, std::string> &probingVocab = m_engine->getVocab();
|
|
|
|
std::map<unsigned int, std::string>::const_iterator iter;
|
|
|
|
for (iter = probingVocab.begin(); iter != probingVocab.end(); ++iter) {
|
|
|
|
const string &wordStr = iter->second;
|
2015-11-18 16:07:16 +03:00
|
|
|
const Factor *factor = vocab.AddFactor(wordStr, system);
|
2015-11-03 18:04:26 +03:00
|
|
|
|
|
|
|
unsigned int probingId = iter->first;
|
|
|
|
|
2015-12-08 16:07:31 +03:00
|
|
|
if (probingId >= m_targetVocab.size()) {
|
|
|
|
m_targetVocab.resize(probingId + 1, NULL);
|
|
|
|
}
|
2015-12-08 16:36:17 +03:00
|
|
|
m_targetVocab[probingId] = factor;
|
2015-11-03 18:04:26 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-07 21:30:17 +03:00
|
|
|
void ProbingPT::Lookup(const Manager &mgr, InputPaths &inputPaths) const
|
|
|
|
{
|
2016-01-01 20:14:40 +03:00
|
|
|
RecycleData &recycler = GetThreadSpecificObj(m_recycleData);
|
2016-01-01 19:20:37 +03:00
|
|
|
|
2016-01-13 20:18:40 +03:00
|
|
|
BOOST_FOREACH(InputPath *path, inputPaths) {
|
2015-12-10 17:06:42 +03:00
|
|
|
TargetPhrases *tpsPtr;
|
2016-01-13 20:18:40 +03:00
|
|
|
tpsPtr = Lookup(mgr, mgr.GetPool(), *path, recycler);
|
|
|
|
path->AddTargetPhrases(*this, tpsPtr);
|
2015-12-07 21:30:17 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-01 19:20:37 +03:00
|
|
|
TargetPhrases* ProbingPT::Lookup(const Manager &mgr,
|
|
|
|
MemPool &pool,
|
|
|
|
InputPath &inputPath,
|
2016-01-01 20:14:40 +03:00
|
|
|
RecycleData &recycler) const
|
2015-11-03 18:04:26 +03:00
|
|
|
{
|
2016-01-13 19:46:23 +03:00
|
|
|
TargetPhrases *ret;
|
|
|
|
if (inputPath.prefixPath && inputPath.prefixPath->GetTargetPhrases(*this) == NULL) {
|
|
|
|
// assume all paths have prefixes, except rules with 1 word source
|
|
|
|
ret = NULL;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
const Phrase &sourcePhrase = inputPath.subPhrase;
|
|
|
|
ret = CreateTargetPhrase(pool, mgr.system, sourcePhrase, recycler);
|
|
|
|
}
|
2015-11-04 03:37:35 +03:00
|
|
|
return ret;
|
2015-11-03 19:09:49 +03:00
|
|
|
}
|
|
|
|
|
2016-01-01 19:20:37 +03:00
|
|
|
TargetPhrases* ProbingPT::CreateTargetPhrase(MemPool &pool,
|
|
|
|
const System &system,
|
|
|
|
const Phrase &sourcePhrase,
|
2016-01-01 20:14:40 +03:00
|
|
|
RecycleData &recycler) const
|
2015-11-03 19:09:49 +03:00
|
|
|
{
|
2016-01-13 19:46:23 +03:00
|
|
|
TargetPhrases *tps = NULL;
|
2015-11-03 19:09:49 +03:00
|
|
|
|
2015-11-03 18:04:26 +03:00
|
|
|
// create a target phrase from the 1st word of the source, prefix with 'ProbingPT:'
|
2015-12-11 01:01:43 +03:00
|
|
|
size_t sourceSize = sourcePhrase.GetSize();
|
2016-01-13 19:46:23 +03:00
|
|
|
assert(sourceSize);
|
2015-11-03 18:04:26 +03:00
|
|
|
|
2015-12-11 01:01:43 +03:00
|
|
|
uint64_t probingSource[sourceSize];
|
2015-11-03 18:04:26 +03:00
|
|
|
bool ok;
|
2015-12-11 01:01:43 +03:00
|
|
|
ConvertToProbingSourcePhrase(sourcePhrase, ok, probingSource);
|
2015-11-03 18:04:26 +03:00
|
|
|
if (!ok) {
|
|
|
|
// source phrase contains a word unknown in the pt.
|
|
|
|
// We know immediately there's no translation for it
|
2015-12-18 01:07:19 +03:00
|
|
|
return tps;
|
2015-11-03 18:04:26 +03:00
|
|
|
}
|
|
|
|
|
2016-01-01 21:34:25 +03:00
|
|
|
std::pair<bool, std::vector<target_text*> > query_result;
|
2016-01-01 16:49:57 +03:00
|
|
|
|
2015-11-03 18:04:26 +03:00
|
|
|
//Actual lookup
|
2016-01-15 14:36:07 +03:00
|
|
|
uint64_t key = m_engine->getKey(probingSource, sourceSize);
|
|
|
|
query_result = m_engine->query(key, recycler);
|
2015-11-03 18:04:26 +03:00
|
|
|
|
|
|
|
if (query_result.first) {
|
|
|
|
//m_engine->printTargetInfo(query_result.second);
|
2016-01-01 21:34:25 +03:00
|
|
|
const std::vector<target_text*> &probingTargetPhrases = query_result.second;
|
2016-01-01 21:34:22 +03:00
|
|
|
tps = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, probingTargetPhrases.size());
|
2015-11-03 18:04:26 +03:00
|
|
|
|
2016-01-01 21:34:22 +03:00
|
|
|
for (size_t i = 0; i < probingTargetPhrases.size(); ++i) {
|
|
|
|
target_text *probingTargetPhrase = probingTargetPhrases[i];
|
2015-12-11 15:40:54 +03:00
|
|
|
TargetPhrase *tp = CreateTargetPhrase(pool, system, sourcePhrase, *probingTargetPhrase);
|
2015-11-03 18:04:26 +03:00
|
|
|
|
2015-12-18 01:07:19 +03:00
|
|
|
tps->AddTargetPhrase(*tp);
|
2015-12-11 15:40:54 +03:00
|
|
|
|
2016-01-02 00:08:58 +03:00
|
|
|
recycler.tt.push_back(probingTargetPhrase);
|
2015-11-03 18:04:26 +03:00
|
|
|
}
|
|
|
|
|
2015-12-18 01:07:19 +03:00
|
|
|
tps->SortAndPrune(m_tableLimit);
|
2015-12-20 20:03:16 +03:00
|
|
|
system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, sourcePhrase);
|
2015-11-03 18:04:26 +03:00
|
|
|
}
|
2015-12-11 15:40:54 +03:00
|
|
|
else {
|
2016-01-01 21:34:22 +03:00
|
|
|
assert(query_result.second.size() == 0);
|
2015-12-11 15:40:54 +03:00
|
|
|
}
|
2015-11-03 18:04:26 +03:00
|
|
|
|
2015-12-18 01:07:19 +03:00
|
|
|
return tps;
|
2015-11-03 19:09:49 +03:00
|
|
|
|
2015-11-03 18:04:26 +03:00
|
|
|
}
|
|
|
|
|
2015-11-03 19:09:49 +03:00
|
|
|
TargetPhrase *ProbingPT::CreateTargetPhrase(MemPool &pool, const System &system, const Phrase &sourcePhrase, const target_text &probingTargetPhrase) const
|
2015-11-03 18:04:26 +03:00
|
|
|
{
|
2015-11-03 19:09:49 +03:00
|
|
|
|
|
|
|
const std::vector<unsigned int> &probingPhrase = probingTargetPhrase.target_phrase;
|
|
|
|
size_t size = probingPhrase.size();
|
|
|
|
|
|
|
|
TargetPhrase *tp = new (pool.Allocate<TargetPhrase>()) TargetPhrase(pool, system, size);
|
|
|
|
|
|
|
|
// words
|
|
|
|
for (size_t i = 0; i < size; ++i) {
|
|
|
|
uint64_t probingId = probingPhrase[i];
|
2015-11-13 01:51:13 +03:00
|
|
|
const Factor *factor = GetTargetFactor(probingId);
|
2015-11-03 19:09:49 +03:00
|
|
|
assert(factor);
|
|
|
|
|
|
|
|
Word &word = (*tp)[i];
|
|
|
|
word[0] = factor;
|
|
|
|
}
|
|
|
|
|
|
|
|
// score for this phrase table
|
2015-12-11 00:47:41 +03:00
|
|
|
SCORE scores[probingTargetPhrase.prob.size()];
|
|
|
|
std::copy(probingTargetPhrase.prob.begin(), probingTargetPhrase.prob.end(), scores);
|
2016-01-07 15:06:49 +03:00
|
|
|
|
|
|
|
if (!m_engine->IsLogProb()) {
|
|
|
|
std::transform(scores, scores + probingTargetPhrase.prob.size(), scores, TransformScore);
|
|
|
|
std::transform(scores, scores + probingTargetPhrase.prob.size(), scores, FloorScore);
|
|
|
|
}
|
2015-11-03 19:09:49 +03:00
|
|
|
tp->GetScores().PlusEquals(system, *this, scores);
|
|
|
|
|
2015-12-28 23:07:47 +03:00
|
|
|
// extra scores
|
2015-12-28 23:41:33 +03:00
|
|
|
//cerr << "probingTargetPhrase.prob.size()=" << probingTargetPhrase.prob.size() << endl;
|
2015-12-28 23:07:47 +03:00
|
|
|
if (probingTargetPhrase.prob.size() > m_numScores) {
|
|
|
|
// we have extra scores, possibly for lex ro. Keep them in the target phrase.
|
|
|
|
size_t numExtraScores = probingTargetPhrase.prob.size() - m_numScores;
|
|
|
|
tp->scoreProperties = pool.Allocate<SCORE>(numExtraScores);
|
|
|
|
memcpy(tp->scoreProperties, scores + m_numScores, sizeof(SCORE) * numExtraScores);
|
|
|
|
|
2015-12-28 23:41:33 +03:00
|
|
|
/*
|
2015-12-28 23:07:47 +03:00
|
|
|
for (size_t i = 0; i < probingTargetPhrase.prob.size(); ++i) {
|
|
|
|
cerr << probingTargetPhrase.prob[i] << " ";
|
|
|
|
}
|
|
|
|
cerr << endl;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < probingTargetPhrase.prob.size(); ++i) {
|
|
|
|
cerr << scores[i] << " ";
|
|
|
|
}
|
|
|
|
cerr << endl;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < numExtraScores; ++i) {
|
|
|
|
cerr << tp->scoreProperties[i] << " ";
|
|
|
|
}
|
|
|
|
cerr << endl;
|
2015-12-28 23:41:33 +03:00
|
|
|
*/
|
2015-12-28 23:07:47 +03:00
|
|
|
}
|
|
|
|
|
2015-11-03 18:04:26 +03:00
|
|
|
// // alignment
|
|
|
|
// /*
|
|
|
|
// const std::vector<unsigned char> &alignments = probingTargetPhrase.word_all1;
|
|
|
|
//
|
|
|
|
// AlignmentInfo &aligns = tp->GetAlignTerm();
|
|
|
|
// for (size_t i = 0; i < alignS.size(); i += 2 ) {
|
|
|
|
// aligns.Add((size_t) alignments[i], (size_t) alignments[i+1]);
|
|
|
|
// }
|
|
|
|
// */
|
2015-11-03 19:09:49 +03:00
|
|
|
|
|
|
|
// score of all other ff when this rule is being loaded
|
2015-11-05 14:19:37 +03:00
|
|
|
const FeatureFunctions &ffs = system.featureFunctions;
|
2015-11-03 19:09:49 +03:00
|
|
|
ffs.EvaluateInIsolation(pool, system, sourcePhrase, *tp);
|
|
|
|
return tp;
|
2015-11-03 18:04:26 +03:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-12-11 01:01:43 +03:00
|
|
|
void ProbingPT::ConvertToProbingSourcePhrase(const Phrase &sourcePhrase, bool &ok, uint64_t probingSource[]) const
|
2015-11-03 18:04:26 +03:00
|
|
|
{
|
|
|
|
|
|
|
|
size_t size = sourcePhrase.GetSize();
|
|
|
|
for (size_t i = 0; i < size; ++i) {
|
2015-11-13 01:51:13 +03:00
|
|
|
const Factor *factor = sourcePhrase[i][0];
|
2015-11-03 18:04:26 +03:00
|
|
|
uint64_t probingId = GetSourceProbingId(factor);
|
|
|
|
if (probingId == m_unkId) {
|
|
|
|
ok = false;
|
2015-12-11 01:01:43 +03:00
|
|
|
return;
|
2015-11-03 18:04:26 +03:00
|
|
|
} else {
|
2015-12-11 01:01:43 +03:00
|
|
|
probingSource[i] = probingId;
|
2015-11-03 18:04:26 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ok = true;
|
|
|
|
}
|
|
|
|
|
2015-12-28 23:07:47 +03:00
|
|
|
void ProbingPT::GetScoresProperty(const std::string &key, size_t ind, SCORE *scoreArr)
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-12-10 23:49:30 +03:00
|
|
|
}
|
|
|
|
|