mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
create matrix of subphrases in class TranslationOptionCollectionText
This commit is contained in:
parent
c963338476
commit
fd1a70739f
@ -38,6 +38,14 @@ using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
const TargetPhraseCollection *InputLatticeNode::GetTargetPhrases(const PhraseDictionary *phraseDictionary) const {
|
||||
std::map<const PhraseDictionary*, const TargetPhraseCollection *>::const_iterator iter;
|
||||
iter = m_targetPhrases.find(phraseDictionary);
|
||||
CHECK(iter != m_targetPhrases.end());
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
/** helper for pruning */
|
||||
bool CompareTranslationOption(const TranslationOption *a, const TranslationOption *b)
|
||||
{
|
||||
|
@ -42,6 +42,44 @@ class InputType;
|
||||
class FactorMask;
|
||||
class Word;
|
||||
class DecodeGraph;
|
||||
class PhraseDictionary;
|
||||
|
||||
/** Each node contains
|
||||
1. substring used to searching the phrase table
|
||||
2. the source range it covers
|
||||
3. a list of InputLatticeNode that it is a prefix of
|
||||
This is for both sentence input, and confusion network/lattices
|
||||
*/
|
||||
class InputLatticeNode
|
||||
{
|
||||
protected:
|
||||
Phrase m_phrase;
|
||||
WordsRange m_range;
|
||||
std::map<const PhraseDictionary*, const TargetPhraseCollection *> m_targetPhrases;
|
||||
|
||||
public:
|
||||
InputLatticeNode()
|
||||
:m_range(NOT_FOUND, NOT_FOUND)
|
||||
{}
|
||||
InputLatticeNode(const Phrase &phrase, const WordsRange &range)
|
||||
:m_phrase(phrase)
|
||||
,m_range(range) {
|
||||
}
|
||||
|
||||
const Phrase &GetPhrase() const {
|
||||
return m_phrase;
|
||||
}
|
||||
const WordsRange &GetWordsRange() const {
|
||||
return m_range;
|
||||
}
|
||||
|
||||
void SetTargetPhrases(const PhraseDictionary *phraseDictionary, const TargetPhraseCollection *targetPhrases) {
|
||||
m_targetPhrases[phraseDictionary] = targetPhrases;
|
||||
}
|
||||
const TargetPhraseCollection *GetTargetPhrases(const PhraseDictionary *phraseDictionary) const;
|
||||
|
||||
};
|
||||
|
||||
|
||||
/** Contains all phrase translations applicable to current input type (a sentence or confusion network).
|
||||
* A key insight into efficient decoding is that various input
|
||||
@ -67,6 +105,7 @@ protected:
|
||||
const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span */
|
||||
const float m_translationOptionThreshold; /*< threshold for translation options with regard to best option for input span */
|
||||
std::vector<Phrase*> m_unksrcs;
|
||||
std::vector<InputLatticeNode*> m_phraseDictionaryQueue;
|
||||
|
||||
TranslationOptionCollection(InputType const& src, size_t maxNoTransOptPerCoverage,
|
||||
float translationOptionThreshold);
|
||||
|
@ -30,8 +30,33 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
/** constructor; just initialize the base class */
|
||||
TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
||||
: TranslationOptionCollection(inputSentence, maxNoTransOptPerCoverage, translationOptionThreshold) {}
|
||||
TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
||||
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
|
||||
{
|
||||
size_t size = input.GetSize();
|
||||
m_targetPhrasesfromPt.resize(size);
|
||||
for (size_t startPos = 0; startPos < size; ++startPos) {
|
||||
std::vector<InputLatticeNode> &vec = m_targetPhrasesfromPt[startPos];
|
||||
for (size_t endPos = startPos; endPos < size; ++endPos) {
|
||||
Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos)));
|
||||
WordsRange range(startPos, endPos);
|
||||
InputLatticeNode node(subphrase, range);
|
||||
|
||||
vec.push_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
|
||||
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
|
||||
size_t endPos = startPos + phaseSize -1;
|
||||
//cerr << startPos << "-" << endPos << "=" << GetPhrase(startPos, endPos) << endl;
|
||||
InputLatticeNode &node = GetInputLatticeNode(startPos, endPos);
|
||||
m_phraseDictionaryQueue.push_back(&node);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* forcibly create translation option for a particular source word.
|
||||
* For text, this function is easy, just call the base class' ProcessOneUnknownWord()
|
||||
@ -69,6 +94,14 @@ void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPosit
|
||||
|
||||
};
|
||||
|
||||
InputLatticeNode &TranslationOptionCollectionText::GetInputLatticeNode(size_t startPos, size_t endPos)
|
||||
{
|
||||
size_t offset = endPos - startPos;
|
||||
CHECK(offset < m_targetPhrasesfromPt[startPos].size());
|
||||
return m_targetPhrasesfromPt[startPos][offset];
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -23,6 +23,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#define moses_TranslationOptionCollectionText_h
|
||||
|
||||
#include "TranslationOptionCollection.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -34,10 +36,18 @@ class Sentence;
|
||||
*/
|
||||
class TranslationOptionCollectionText : public TranslationOptionCollection
|
||||
{
|
||||
public:
|
||||
typedef std::vector< std::vector<InputLatticeNode> > TargetPhraseMatrix;
|
||||
|
||||
protected:
|
||||
TargetPhraseMatrix m_targetPhrasesfromPt; /*< contains translation options */
|
||||
|
||||
InputLatticeNode &GetInputLatticeNode(size_t startPos, size_t endPos);
|
||||
|
||||
public:
|
||||
void ProcessUnknownWord(size_t sourcePos);
|
||||
|
||||
TranslationOptionCollectionText(Sentence const& inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
|
||||
TranslationOptionCollectionText(Sentence const& input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
|
||||
|
||||
bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user