create prefix array for sentence input

This commit is contained in:
Hieu Hoang 2013-06-17 19:45:47 +01:00
parent acb4baacaf
commit 4ccb1c9dec
4 changed files with 43 additions and 5 deletions

View File

@ -39,6 +39,17 @@ using namespace std;
namespace Moses
{
InputLatticeNode::InputLatticeNode(const Phrase &phrase, const WordsRange &range)
:m_phrase(phrase)
,m_range(range)
{
}
void InputLatticeNode::AddNext(const InputLatticeNode &next)
{
m_next.push_back(&next);
}
/** helper for pruning */
bool CompareTranslationOption(const TranslationOption *a, const TranslationOption *b)
{

View File

@ -43,6 +43,25 @@ class FactorMask;
class Word;
class DecodeGraph;
/** Each node contains
1. substring used to searching the phrase table
2. the source range it covers
3. a list of InputLatticeNode that it is a prefix of
This is for both sentence input, and confusion network/lattices
*/
class InputLatticeNode
{
protected:
Phrase m_phrase;
WordsRange m_range;
std::vector<const InputLatticeNode*> m_next;
public:
InputLatticeNode(const Phrase &phrase, const WordsRange &range);
void AddNext(const InputLatticeNode &next);
};
/** Contains all phrase translations applicable to current input type (a sentence or confusion network).
* A key insight into efficient decoding is that various input
* conditions (trelliss, factored input, normal text, xml markup)

View File

@ -38,10 +38,18 @@ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const
size_t size = input.GetSize();
m_collection.resize(size);
for (size_t startPos = 0; startPos < size; ++startPos) {
std::vector<Phrase> &vec = m_collection[startPos];
std::vector<InputLatticeNode> &vec = m_collection[startPos];
for (size_t endPos = startPos; endPos < size; ++endPos) {
Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos)));
vec.push_back(subphrase);
WordsRange range(startPos, endPos);
InputLatticeNode node(subphrase, range);
if (range.GetNumWordsCovered() > 1) {
InputLatticeNode prevNode = GetPhrase(startPos, endPos - 1);
node.AddNext(prevNode);
}
vec.push_back(node);
}
}
/*
@ -88,7 +96,7 @@ void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPosit
}
}
const Phrase &TranslationOptionCollectionText::GetPhrase(size_t startPos, size_t endPos) const
const InputLatticeNode &TranslationOptionCollectionText::GetPhrase(size_t startPos, size_t endPos) const
{
size_t offset = endPos - startPos;
CHECK(offset < m_collection[startPos].size());

View File

@ -42,10 +42,10 @@ public:
void ProcessUnknownWord(size_t sourcePos);
bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
const Phrase &GetPhrase(size_t startPos, size_t endPos) const;
const InputLatticeNode &GetPhrase(size_t startPos, size_t endPos) const;
protected:
std::vector<std::vector<Phrase> > m_collection;
std::vector<std::vector<InputLatticeNode> > m_collection;
};