segment confusion network in class TranslationOptionCollectionConfusionNet, rather than in PDAImp. Get ready to make it work with any phrase table

This commit is contained in:
Hieu Hoang 2013-06-14 23:42:30 +01:00
parent 3516ef42b8
commit 8c6344c4ce
4 changed files with 76 additions and 50 deletions

View File

@ -60,11 +60,16 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet
// create subphrases by appending words to previous subphrases
for (size_t startPos = 0; startPos < size; ++startPos) {
for (size_t endPos = startPos + 1; endPos < size; ++endPos) {
std::vector<SourcePath> &newSubphrases = GetPhrases(startPos, endPos);
const std::vector<SourcePath> &prevSubphrases = GetPhrases(startPos, endPos - 1);
const ConfusionNet::Column &col = input.GetColumn(endPos);
CreateSubPhrases(newSubphrases, prevSubphrases, col, *inputFeature);
size_t maxSize = size - startPos;
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
maxSize = std::min(maxSize, maxSizePhrase);
size_t end = startPos + maxSize - 1;
for (size_t endPos = startPos + 1; endPos < end; ++endPos) {
std::vector<SourcePath> &newSubphrases = GetPhrases(startPos, endPos);
const std::vector<SourcePath> &prevSubphrases = GetPhrases(startPos, endPos - 1);
const ConfusionNet::Column &col = input.GetColumn(endPos);
CreateSubPhrases(newSubphrases, prevSubphrases, col, *inputFeature);
}
}
@ -85,29 +90,29 @@ TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet
}
void TranslationOptionCollectionConfusionNet::CreateSubPhrases(std::vector<SourcePath> &newSubphrases
, const std::vector<SourcePath> &prevSubphrases
, const ConfusionNet::Column &col
, const InputFeature &inputFeature)
, const std::vector<SourcePath> &prevSubphrases
, const ConfusionNet::Column &col
, const InputFeature &inputFeature)
{
std::vector<SourcePath>::const_iterator iterSourcePath;
for (iterSourcePath = prevSubphrases.begin(); iterSourcePath != prevSubphrases.end(); ++iterSourcePath) {
const SourcePath &sourcePath = *iterSourcePath;
const Phrase &prevSubPhrase = sourcePath.first;
const ScoreComponentCollection &prevScore = sourcePath.second;
std::vector<SourcePath>::const_iterator iterSourcePath;
for (iterSourcePath = prevSubphrases.begin(); iterSourcePath != prevSubphrases.end(); ++iterSourcePath) {
const SourcePath &sourcePath = *iterSourcePath;
const Phrase &prevSubPhrase = sourcePath.first;
const ScoreComponentCollection &prevScore = sourcePath.second;
ConfusionNet::Column::const_iterator iterCol;
for (iterCol = col.begin(); iterCol != col.end(); ++iterCol) {
const std::pair<Word,std::vector<float> > &node = *iterCol;
Phrase subphrase(prevSubPhrase);
subphrase.AddWord(node.first);
ConfusionNet::Column::const_iterator iterCol;
for (iterCol = col.begin(); iterCol != col.end(); ++iterCol) {
const std::pair<Word,std::vector<float> > &node = *iterCol;
Phrase subphrase(prevSubPhrase);
subphrase.AddWord(node.first);
ScoreComponentCollection score(prevScore);
score.PlusEquals(&inputFeature, node.second);
ScoreComponentCollection score(prevScore);
score.PlusEquals(&inputFeature, node.second);
SourcePath newSourcePath(subphrase, score);
newSubphrases.push_back(newSourcePath);
}
}
SourcePath newSourcePath(subphrase, score);
newSubphrases.push_back(newSourcePath);
}
}
}
/* forcibly create translation option for a particular source word.
@ -128,24 +133,16 @@ void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(size_t sourcePo
const std::vector<TranslationOptionCollectionConfusionNet::SourcePath> &TranslationOptionCollectionConfusionNet::GetPhrases(size_t startPos, size_t endPos) const
{
size_t maxSize = endPos - startPos;
//size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
//maxSize = std::min(maxSize, maxSizePhrase);
CHECK(maxSize < m_collection[startPos].size());
return m_collection[startPos][maxSize];
size_t offset = endPos - startPos;
CHECK(offset < m_collection[startPos].size());
return m_collection[startPos][offset];
}
std::vector<TranslationOptionCollectionConfusionNet::SourcePath> &TranslationOptionCollectionConfusionNet::GetPhrases(size_t startPos, size_t endPos)
{
size_t maxSize = endPos - startPos;
//size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
//maxSize = std::min(maxSize, maxSizePhrase);
CHECK(maxSize < m_collection[startPos].size());
return m_collection[startPos][maxSize];
size_t offset = endPos - startPos;
CHECK(offset < m_collection[startPos].size());
return m_collection[startPos][offset];
}
} // namespace

View File

@ -27,9 +27,9 @@ protected:
std::vector<std::vector<std::vector<SourcePath> > > m_collection;
void CreateSubPhrases(std::vector<SourcePath> &newSubphrases
, const std::vector<SourcePath> &prevSubphrases
, const ConfusionNet::Column &col
, const InputFeature &inputFeature);
, const std::vector<SourcePath> &prevSubphrases
, const ConfusionNet::Column &col
, const InputFeature &inputFeature);
};
}

View File

@ -30,8 +30,28 @@ using namespace std;
namespace Moses
{
/** constructor; just initialize the base class */
TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: TranslationOptionCollection(inputSentence, maxNoTransOptPerCoverage, translationOptionThreshold) {}
TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &input
, size_t maxNoTransOptPerCoverage
, float translationOptionThreshold)
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
{
size_t size = input.GetSize();
m_collection.resize(size);
for (size_t startPos = 0; startPos < size; ++startPos) {
std::vector<Phrase> &vec = m_collection[startPos];
for (size_t endPos = startPos; endPos < size; ++endPos) {
Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos)));
vec.push_back(subphrase);
}
}
/*
for (size_t startPos = 0; startPos < size; ++startPos) {
for (size_t endPos = startPos; endPos < size; ++endPos) {
cerr << startPos << "-" << endPos << "=" << GetPhrase(startPos, endPos) << endl;
}
}
*/
}
/* forcibly create translation option for a particular source word.
* For text, this function is easy, just call the base class' ProcessOneUnknownWord()
@ -66,10 +86,16 @@ void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPosit
for(size_t i=0; i<xmlOptions.size(); i++) {
Add(xmlOptions[i]);
}
};
}
const Phrase &TranslationOptionCollectionText::GetPhrase(size_t startPos, size_t endPos) const
{
size_t offset = endPos - startPos;
CHECK(offset < m_collection[startPos].size());
return m_collection[startPos][offset];
}
} // namespace

View File

@ -22,7 +22,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#ifndef moses_TranslationOptionCollectionText_h
#define moses_TranslationOptionCollectionText_h
#include <vector>
#include "TranslationOptionCollection.h"
#include "Phrase.h"
namespace Moses
{
@ -35,14 +37,15 @@ class Sentence;
class TranslationOptionCollectionText : public TranslationOptionCollection
{
public:
TranslationOptionCollectionText(Sentence const& input, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
void ProcessUnknownWord(size_t sourcePos);
TranslationOptionCollectionText(Sentence const& inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
void CreateXmlOptionsForRange(size_t startPosition, size_t endPosition);
const Phrase &GetPhrase(size_t startPos, size_t endPos) const;
protected:
std::vector<std::vector<Phrase> > m_collection;
};