create matrix of subphrases in class TranslationOptionCollectionText

This commit is contained in:
Hieu Hoang 2013-06-28 13:21:04 +01:00
parent c963338476
commit fd1a70739f
4 changed files with 93 additions and 3 deletions

View File

@ -38,6 +38,14 @@ using namespace std;
namespace Moses
{
const TargetPhraseCollection *InputLatticeNode::GetTargetPhrases(const PhraseDictionary *phraseDictionary) const {
std::map<const PhraseDictionary*, const TargetPhraseCollection *>::const_iterator iter;
iter = m_targetPhrases.find(phraseDictionary);
CHECK(iter != m_targetPhrases.end());
return iter->second;
}
/** helper for pruning */
bool CompareTranslationOption(const TranslationOption *a, const TranslationOption *b)
{

View File

@ -42,6 +42,44 @@ class InputType;
class FactorMask;
class Word;
class DecodeGraph;
class PhraseDictionary;
/** Each node contains
1. substring used to searching the phrase table
2. the source range it covers
3. a list of InputLatticeNode that it is a prefix of
This is for both sentence input, and confusion network/lattices
*/
class InputLatticeNode
{
protected:
Phrase m_phrase;
WordsRange m_range;
std::map<const PhraseDictionary*, const TargetPhraseCollection *> m_targetPhrases;
public:
InputLatticeNode()
:m_range(NOT_FOUND, NOT_FOUND)
{}
InputLatticeNode(const Phrase &phrase, const WordsRange &range)
:m_phrase(phrase)
,m_range(range) {
}
const Phrase &GetPhrase() const {
return m_phrase;
}
const WordsRange &GetWordsRange() const {
return m_range;
}
void SetTargetPhrases(const PhraseDictionary *phraseDictionary, const TargetPhraseCollection *targetPhrases) {
m_targetPhrases[phraseDictionary] = targetPhrases;
}
const TargetPhraseCollection *GetTargetPhrases(const PhraseDictionary *phraseDictionary) const;
};
/** Contains all phrase translations applicable to current input type (a sentence or confusion network).
* A key insight into efficient decoding is that various input
@ -67,6 +105,7 @@ protected:
const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span */
const float m_translationOptionThreshold; /*< threshold for translation options with regard to best option for input span */
std::vector<Phrase*> m_unksrcs;
std::vector<InputLatticeNode*> m_phraseDictionaryQueue;
TranslationOptionCollection(InputType const& src, size_t maxNoTransOptPerCoverage,
float translationOptionThreshold);

View File

@ -30,8 +30,33 @@ using namespace std;
namespace Moses
{
/** constructor; just initialize the base class */
TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: TranslationOptionCollection(inputSentence, maxNoTransOptPerCoverage, translationOptionThreshold) {}
TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
{
size_t size = input.GetSize();
m_targetPhrasesfromPt.resize(size);
for (size_t startPos = 0; startPos < size; ++startPos) {
std::vector<InputLatticeNode> &vec = m_targetPhrasesfromPt[startPos];
for (size_t endPos = startPos; endPos < size; ++endPos) {
Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos)));
WordsRange range(startPos, endPos);
InputLatticeNode node(subphrase, range);
vec.push_back(node);
}
}
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
size_t endPos = startPos + phaseSize -1;
//cerr << startPos << "-" << endPos << "=" << GetPhrase(startPos, endPos) << endl;
InputLatticeNode &node = GetInputLatticeNode(startPos, endPos);
m_phraseDictionaryQueue.push_back(&node);
}
}
}
/* forcibly create translation option for a particular source word.
* For text, this function is easy, just call the base class' ProcessOneUnknownWord()
@ -69,6 +94,14 @@ void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPosit
};
InputLatticeNode &TranslationOptionCollectionText::GetInputLatticeNode(size_t startPos, size_t endPos)
{
size_t offset = endPos - startPos;
CHECK(offset < m_targetPhrasesfromPt[startPos].size());
return m_targetPhrasesfromPt[startPos][offset];
}
}

View File

@ -23,6 +23,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#define moses_TranslationOptionCollectionText_h
#include "TranslationOptionCollection.h"
#include <map>
#include <vector>
namespace Moses
{
@ -34,10 +36,18 @@ class Sentence;
*/
class TranslationOptionCollectionText : public TranslationOptionCollection
{
public:
typedef std::vector< std::vector<InputLatticeNode> > TargetPhraseMatrix;
protected:
TargetPhraseMatrix m_targetPhrasesfromPt; /*< contains translation options */
InputLatticeNode &GetInputLatticeNode(size_t startPos, size_t endPos);
public:
void ProcessUnknownWord(size_t sourcePos);
TranslationOptionCollectionText(Sentence const& inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
TranslationOptionCollectionText(Sentence const& input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;