a little bit more towards using prefix subphrase to optimize translation rule lookup

This commit is contained in:
Hieu Hoang 2013-07-04 09:24:13 +01:00
parent 1212944653
commit 934e24718e
6 changed files with 24 additions and 13 deletions

View File

@ -6,7 +6,9 @@ const TargetPhraseCollection *InputLatticeNode::GetTargetPhrases(const PhraseDic
{
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
iter = m_targetPhrases.find(&phraseDictionary);
CHECK(iter != m_targetPhrases.end());
if (iter == m_targetPhrases.end()) {
return NULL;
}
return iter->second.first;
}
@ -14,13 +16,23 @@ const void *InputLatticeNode::GetPtNode(const PhraseDictionary &phraseDictionary
{
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
iter = m_targetPhrases.find(&phraseDictionary);
CHECK(iter != m_targetPhrases.end());
if (iter == m_targetPhrases.end()) {
return NULL;
}
return iter->second.second;
}
std::ostream& operator<<(std::ostream& out, const InputLatticeNode& obj)
{
out << &obj << " " << obj.GetWordsRange() << " " << obj.GetPrevNode() << " " << obj.GetPhrase();
out << "pt: ";
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
for (iter = obj.m_targetPhrases.begin(); iter != obj.m_targetPhrases.end(); ++iter) {
const PhraseDictionary *pt = iter->first;
out << pt << " ";
}
return out;
}

View File

@ -19,7 +19,7 @@ This is for both sentence input, and confusion network/lattices
*/
class InputLatticeNode
{
friend std::ostream& operator<<(std::ostream& out, const InputLatticeNode &pbj);
friend std::ostream& operator<<(std::ostream& out, const InputLatticeNode &obj);
protected:
const InputLatticeNode *m_prevNode;
@ -32,6 +32,7 @@ public:
: m_prevNode(NULL)
, m_range(NOT_FOUND, NOT_FOUND)
{}
InputLatticeNode(const Phrase &phrase, const WordsRange &range, const InputLatticeNode *prevNode)
:m_prevNode(prevNode)
,m_phrase(phrase)

View File

@ -133,7 +133,7 @@ void PhraseDictionaryMemory::SortAndPrune()
m_collection.Sort(GetTableLimit());
}
}
/*
void PhraseDictionaryMemory::SetTargetPhraseFromPtMatrix(const std::vector<InputLatticeNode*> &phraseDictionaryQueue) const
{
// UTIL_THROW(util::Exception, "SetTargetPhraseFromPtMatrix() not implemented");
@ -142,7 +142,6 @@ void PhraseDictionaryMemory::SetTargetPhraseFromPtMatrix(const std::vector<Input
InputLatticeNode &node = *phraseDictionaryQueue[i];
const Phrase &phrase = node.GetPhrase();
const InputLatticeNode *prevNode = node.GetPrevNode();
cerr << node << endl;
const PhraseDictionaryNodeMemory *prevPtNode;
@ -172,7 +171,7 @@ cerr << node << endl;
}
}
}
*/
TO_STRING_BODY(PhraseDictionaryMemory);
// friend

View File

@ -53,7 +53,7 @@ public:
const InputType &,
const ChartCellCollectionBase &);
//void SetTargetPhraseFromPtMatrix(const std::vector<InputLatticeNode*> &phraseDictionaryQueue) const;
void SetTargetPhraseFromPtMatrix(const std::vector<InputLatticeNode*> &phraseDictionaryQueue) const;
TO_STRING();

View File

@ -40,21 +40,20 @@ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
size_t endPos = startPos + phaseSize -1;
std::vector<InputLatticeNode> &vec = m_targetPhrasesfromPt[startPos];
std::vector<InputLatticeNode*> &vec = m_targetPhrasesfromPt[startPos];
Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos)));
WordsRange range(startPos, endPos);
if (range.GetNumWordsCovered() == 1) {
InputLatticeNode node(subphrase, range, NULL);
InputLatticeNode *node = new InputLatticeNode(subphrase, range, NULL);
vec.push_back(node);
}
else {
const InputLatticeNode &prevNode = GetInputLatticeNode(startPos, endPos - 1);
InputLatticeNode node(subphrase, range, &prevNode);
InputLatticeNode *node = new InputLatticeNode(subphrase, range, &prevNode);
vec.push_back(node);
}
cerr << vec.back() << endl;
}
}
@ -108,7 +107,7 @@ InputLatticeNode &TranslationOptionCollectionText::GetInputLatticeNode(size_t st
{
size_t offset = endPos - startPos;
CHECK(offset < m_targetPhrasesfromPt[startPos].size());
return m_targetPhrasesfromPt[startPos][offset];
return *m_targetPhrasesfromPt[startPos][offset];
}
void TranslationOptionCollectionText::CreateTranslationOptions()

View File

@ -38,7 +38,7 @@ class Sentence;
class TranslationOptionCollectionText : public TranslationOptionCollection
{
public:
typedef std::vector< std::vector<InputLatticeNode> > TargetPhraseMatrix;
typedef std::vector< std::vector<InputLatticeNode*> > TargetPhraseMatrix;
protected:
TargetPhraseMatrix m_targetPhrasesfromPt; /*< contains translation options */