mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
segment confusion network in class TranslationOptionCollectionConfusionNet, rather than in PDAImp. Get ready to make it work with any phrase table
This commit is contained in:
parent
3516ef42b8
commit
8c6344c4ce
@ -60,11 +60,16 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet
|
||||
|
||||
// create subphrases by appending words to previous subphrases
|
||||
for (size_t startPos = 0; startPos < size; ++startPos) {
|
||||
for (size_t endPos = startPos + 1; endPos < size; ++endPos) {
|
||||
std::vector<SourcePath> &newSubphrases = GetPhrases(startPos, endPos);
|
||||
const std::vector<SourcePath> &prevSubphrases = GetPhrases(startPos, endPos - 1);
|
||||
const ConfusionNet::Column &col = input.GetColumn(endPos);
|
||||
CreateSubPhrases(newSubphrases, prevSubphrases, col, *inputFeature);
|
||||
size_t maxSize = size - startPos;
|
||||
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
|
||||
maxSize = std::min(maxSize, maxSizePhrase);
|
||||
size_t end = startPos + maxSize - 1;
|
||||
|
||||
for (size_t endPos = startPos + 1; endPos < end; ++endPos) {
|
||||
std::vector<SourcePath> &newSubphrases = GetPhrases(startPos, endPos);
|
||||
const std::vector<SourcePath> &prevSubphrases = GetPhrases(startPos, endPos - 1);
|
||||
const ConfusionNet::Column &col = input.GetColumn(endPos);
|
||||
CreateSubPhrases(newSubphrases, prevSubphrases, col, *inputFeature);
|
||||
}
|
||||
}
|
||||
|
||||
@ -85,29 +90,29 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet
|
||||
}
|
||||
|
||||
void TranslationOptionCollectionConfusionNet::CreateSubPhrases(std::vector<SourcePath> &newSubphrases
|
||||
, const std::vector<SourcePath> &prevSubphrases
|
||||
, const ConfusionNet::Column &col
|
||||
, const InputFeature &inputFeature)
|
||||
, const std::vector<SourcePath> &prevSubphrases
|
||||
, const ConfusionNet::Column &col
|
||||
, const InputFeature &inputFeature)
|
||||
{
|
||||
std::vector<SourcePath>::const_iterator iterSourcePath;
|
||||
for (iterSourcePath = prevSubphrases.begin(); iterSourcePath != prevSubphrases.end(); ++iterSourcePath) {
|
||||
const SourcePath &sourcePath = *iterSourcePath;
|
||||
const Phrase &prevSubPhrase = sourcePath.first;
|
||||
const ScoreComponentCollection &prevScore = sourcePath.second;
|
||||
std::vector<SourcePath>::const_iterator iterSourcePath;
|
||||
for (iterSourcePath = prevSubphrases.begin(); iterSourcePath != prevSubphrases.end(); ++iterSourcePath) {
|
||||
const SourcePath &sourcePath = *iterSourcePath;
|
||||
const Phrase &prevSubPhrase = sourcePath.first;
|
||||
const ScoreComponentCollection &prevScore = sourcePath.second;
|
||||
|
||||
ConfusionNet::Column::const_iterator iterCol;
|
||||
for (iterCol = col.begin(); iterCol != col.end(); ++iterCol) {
|
||||
const std::pair<Word,std::vector<float> > &node = *iterCol;
|
||||
Phrase subphrase(prevSubPhrase);
|
||||
subphrase.AddWord(node.first);
|
||||
ConfusionNet::Column::const_iterator iterCol;
|
||||
for (iterCol = col.begin(); iterCol != col.end(); ++iterCol) {
|
||||
const std::pair<Word,std::vector<float> > &node = *iterCol;
|
||||
Phrase subphrase(prevSubPhrase);
|
||||
subphrase.AddWord(node.first);
|
||||
|
||||
ScoreComponentCollection score(prevScore);
|
||||
score.PlusEquals(&inputFeature, node.second);
|
||||
ScoreComponentCollection score(prevScore);
|
||||
score.PlusEquals(&inputFeature, node.second);
|
||||
|
||||
SourcePath newSourcePath(subphrase, score);
|
||||
newSubphrases.push_back(newSourcePath);
|
||||
}
|
||||
}
|
||||
SourcePath newSourcePath(subphrase, score);
|
||||
newSubphrases.push_back(newSourcePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* forcibly create translation option for a particular source word.
|
||||
@ -128,24 +133,16 @@ void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(size_t sourcePo
|
||||
|
||||
const std::vector<TranslationOptionCollectionConfusionNet::SourcePath> &TranslationOptionCollectionConfusionNet::GetPhrases(size_t startPos, size_t endPos) const
|
||||
{
|
||||
size_t maxSize = endPos - startPos;
|
||||
//size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
|
||||
//maxSize = std::min(maxSize, maxSizePhrase);
|
||||
|
||||
CHECK(maxSize < m_collection[startPos].size());
|
||||
return m_collection[startPos][maxSize];
|
||||
|
||||
size_t offset = endPos - startPos;
|
||||
CHECK(offset < m_collection[startPos].size());
|
||||
return m_collection[startPos][offset];
|
||||
}
|
||||
|
||||
std::vector<TranslationOptionCollectionConfusionNet::SourcePath> &TranslationOptionCollectionConfusionNet::GetPhrases(size_t startPos, size_t endPos)
|
||||
{
|
||||
size_t maxSize = endPos - startPos;
|
||||
//size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
|
||||
//maxSize = std::min(maxSize, maxSizePhrase);
|
||||
|
||||
CHECK(maxSize < m_collection[startPos].size());
|
||||
return m_collection[startPos][maxSize];
|
||||
|
||||
size_t offset = endPos - startPos;
|
||||
CHECK(offset < m_collection[startPos].size());
|
||||
return m_collection[startPos][offset];
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -27,9 +27,9 @@ protected:
|
||||
std::vector<std::vector<std::vector<SourcePath> > > m_collection;
|
||||
|
||||
void CreateSubPhrases(std::vector<SourcePath> &newSubphrases
|
||||
, const std::vector<SourcePath> &prevSubphrases
|
||||
, const ConfusionNet::Column &col
|
||||
, const InputFeature &inputFeature);
|
||||
, const std::vector<SourcePath> &prevSubphrases
|
||||
, const ConfusionNet::Column &col
|
||||
, const InputFeature &inputFeature);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -30,8 +30,28 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
/** constructor; just initialize the base class */
|
||||
TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
||||
: TranslationOptionCollection(inputSentence, maxNoTransOptPerCoverage, translationOptionThreshold) {}
|
||||
TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &input
|
||||
, size_t maxNoTransOptPerCoverage
|
||||
, float translationOptionThreshold)
|
||||
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
|
||||
{
|
||||
size_t size = input.GetSize();
|
||||
m_collection.resize(size);
|
||||
for (size_t startPos = 0; startPos < size; ++startPos) {
|
||||
std::vector<Phrase> &vec = m_collection[startPos];
|
||||
for (size_t endPos = startPos; endPos < size; ++endPos) {
|
||||
Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos)));
|
||||
vec.push_back(subphrase);
|
||||
}
|
||||
}
|
||||
/*
|
||||
for (size_t startPos = 0; startPos < size; ++startPos) {
|
||||
for (size_t endPos = startPos; endPos < size; ++endPos) {
|
||||
cerr << startPos << "-" << endPos << "=" << GetPhrase(startPos, endPos) << endl;
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
/* forcibly create translation option for a particular source word.
|
||||
* For text, this function is easy, just call the base class' ProcessOneUnknownWord()
|
||||
@ -66,10 +86,16 @@ void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPosit
|
||||
for(size_t i=0; i<xmlOptions.size(); i++) {
|
||||
Add(xmlOptions[i]);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
const Phrase &TranslationOptionCollectionText::GetPhrase(size_t startPos, size_t endPos) const
|
||||
{
|
||||
size_t offset = endPos - startPos;
|
||||
CHECK(offset < m_collection[startPos].size());
|
||||
return m_collection[startPos][offset];
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
||||
|
@ -22,7 +22,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#ifndef moses_TranslationOptionCollectionText_h
|
||||
#define moses_TranslationOptionCollectionText_h
|
||||
|
||||
#include <vector>
|
||||
#include "TranslationOptionCollection.h"
|
||||
#include "Phrase.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -35,14 +37,15 @@ class Sentence;
|
||||
class TranslationOptionCollectionText : public TranslationOptionCollection
|
||||
{
|
||||
public:
|
||||
TranslationOptionCollectionText(Sentence const& input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
|
||||
|
||||
void ProcessUnknownWord(size_t sourcePos);
|
||||
|
||||
TranslationOptionCollectionText(Sentence const& inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
|
||||
|
||||
bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
|
||||
|
||||
void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
|
||||
const Phrase &GetPhrase(size_t startPos, size_t endPos) const;
|
||||
|
||||
protected:
|
||||
std::vector<std::vector<Phrase> > m_collection;
|
||||
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user