mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 12:52:29 +03:00
terminal and non-terminal word alignments stored in 2 separate objects
This commit is contained in:
parent
fb040861f7
commit
397e704b49
@ -247,17 +247,26 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
|
||||
ret->SetScoreChart(phraseDict.GetFeature(), m_scores, weightT, lmList, wpProducer);
|
||||
|
||||
// alignments
|
||||
int indicator[m_align.size()];
|
||||
int index = 0;
|
||||
Moses::AlignmentInfo::CollType alignTerm, alignNonTerm;
|
||||
std::set<std::pair<size_t, size_t> > alignmentInfo;
|
||||
const PhrasePtr sp = GetSourcePhrase();
|
||||
for (size_t ind = 0; ind < m_align.size(); ++ind) {
|
||||
const std::pair<size_t, size_t> &entry = m_align[ind];
|
||||
alignmentInfo.insert(entry);
|
||||
size_t sourcePos = entry.first;
|
||||
indicator[index++] = sp->GetWord(sourcePos).IsNonTerminal() ? 1: 0;
|
||||
size_t targetPos = entry.second;
|
||||
|
||||
if (GetWord(targetPos).IsNonTerminal()) {
|
||||
alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
|
||||
}
|
||||
else {
|
||||
alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
|
||||
}
|
||||
|
||||
}
|
||||
ret->SetAlignmentInfo(alignmentInfo, indicator);
|
||||
ret->SetAlignTerm(alignTerm);
|
||||
ret->SetAlignNonTerm(alignNonTerm);
|
||||
|
||||
GetWord(GetSize() - 1).ConvertToMoses(outputFactors, vocab, ret->MutableTargetLHS());
|
||||
|
||||
|
@ -254,7 +254,7 @@ void OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
|
||||
const TargetPhrase &tp = edge.GetCurrTargetPhrase();
|
||||
size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
|
||||
|
||||
OutputAlignment(out, tp.GetAlignmentInfo(), sourceOffset, targetOffset);
|
||||
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
|
||||
|
||||
targetOffset += tp.GetSize();
|
||||
}
|
||||
@ -457,7 +457,7 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
|
||||
WordsRange targetRange = path.GetTargetWordsRange(edge);
|
||||
const int sourceOffset = sourceRange.GetStartPos();
|
||||
const int targetOffset = targetRange.GetStartPos();
|
||||
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignmentInfo();
|
||||
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
|
||||
|
||||
OutputAlignment(out, ai, sourceOffset, targetOffset);
|
||||
|
||||
|
@ -42,7 +42,7 @@ void PrintTranslationAnalysis(const TranslationSystem* system, std::ostream &os,
|
||||
std::string source = (*tpi)->GetSourcePhraseStringRep();
|
||||
WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
|
||||
WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
|
||||
const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignmentInfo();
|
||||
const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignTerm();
|
||||
// language model backoff stats,
|
||||
if (doLMStats) {
|
||||
std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
|
||||
|
@ -51,6 +51,9 @@ class AlignmentInfo
|
||||
const_iterator begin() const { return m_collection.begin(); }
|
||||
const_iterator end() const { return m_collection.end(); }
|
||||
|
||||
void Add(size_t sourcePos, size_t targetPos) {
|
||||
m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
|
||||
}
|
||||
/** Provides a map from target-side to source-side non-terminal indices.
|
||||
* The target-side index should be the rule symbol index (counting terminals).
|
||||
* The index returned is the rule non-terminal index (ignoring terminals).
|
||||
|
@ -53,12 +53,5 @@ const AlignmentInfo *AlignmentInfoCollection::Add(
|
||||
return &(*ret.first);
|
||||
}
|
||||
|
||||
const AlignmentInfo *AlignmentInfoCollection::Add(
|
||||
const std::set<std::pair<size_t,size_t> > &pairs, int* indicator)
|
||||
{
|
||||
std::pair<AlignmentInfoSet::iterator, bool> ret =
|
||||
m_collection.insert(AlignmentInfo(pairs, indicator));
|
||||
return &(*ret.first);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -46,7 +46,6 @@ class AlignmentInfoCollection
|
||||
* one is inserted.
|
||||
*/
|
||||
const AlignmentInfo *Add(const std::set<std::pair<size_t,size_t> > &);
|
||||
const AlignmentInfo *Add(const std::set<std::pair<size_t,size_t> > &, int* indicator);
|
||||
|
||||
//! Returns a pointer to an empty AlignmentInfo object.
|
||||
const AlignmentInfo &GetEmptyAlignmentInfo() const;
|
||||
|
@ -45,11 +45,9 @@ struct AlignmentInfoFixture {
|
||||
aligns2.insert(IndexPair(2,1));
|
||||
aligns3.insert(IndexPair(1,2));
|
||||
aligns3.insert(IndexPair(2,1));
|
||||
int ind1[] = {0,1};
|
||||
int ind2[] = {0};
|
||||
ai1 = collection.Add(aligns1, ind1);
|
||||
ai2 = collection.Add(aligns2, ind1);
|
||||
ai3 = collection.Add(aligns3, ind2);
|
||||
ai1 = collection.Add(aligns1);
|
||||
ai2 = collection.Add(aligns2);
|
||||
ai3 = collection.Add(aligns3);
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -97,7 +97,7 @@ void ChartHypothesis::CreateOutputPhrase(Phrase &outPhrase) const
|
||||
const Word &word = GetCurrTargetPhrase().GetWord(pos);
|
||||
if (word.IsNonTerminal()) {
|
||||
// non-term. fill out with prev hypo
|
||||
size_t nonTermInd = GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap()[pos];
|
||||
size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
|
||||
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
|
||||
prevHypo->CreateOutputPhrase(outPhrase);
|
||||
}
|
||||
|
@ -78,7 +78,7 @@ Phrase ChartTrellisNode::GetOutputPhrase() const
|
||||
|
||||
const Phrase &currTargetPhrase = m_hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
m_hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
|
||||
m_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
|
||||
for (size_t pos = 0; pos < currTargetPhrase.GetSize(); ++pos) {
|
||||
const Word &word = currTargetPhrase.GetWord(pos);
|
||||
if (word.IsNonTerminal()) {
|
||||
|
@ -401,8 +401,8 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
if(m_phraseDictionary.m_useAlignmentInfo)
|
||||
{
|
||||
// reconstruct the alignment data based on the alignment of the subphrase
|
||||
for(AlignmentInfo::const_iterator it = subTp.GetAlignmentInfo().begin();
|
||||
it != subTp.GetAlignmentInfo().end(); it++)
|
||||
for(AlignmentInfo::const_iterator it = subTp.GetAlignNonTerm().begin();
|
||||
it != subTp.GetAlignNonTerm().end(); it++)
|
||||
{
|
||||
alignment.insert(AlignPointSizeT(srcStart + it->first,
|
||||
targetPhrase->GetSize() + it->second));
|
||||
@ -456,7 +456,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
if(state == Add)
|
||||
{
|
||||
if(m_phraseDictionary.m_useAlignmentInfo)
|
||||
targetPhrase->SetAlignmentInfo(alignment);
|
||||
targetPhrase->SetAlignTerm(alignment);
|
||||
|
||||
if(m_coding == PREnc)
|
||||
{
|
||||
|
@ -503,7 +503,7 @@ ostream& operator<<(ostream& out, const Hypothesis& hypo)
|
||||
out << " " << hypo.GetScoreBreakdown();
|
||||
|
||||
// alignment
|
||||
out << " " << hypo.GetCurrTargetPhrase().GetAlignmentInfo();
|
||||
out << " " << hypo.GetCurrTargetPhrase().GetAlignNonTerm();
|
||||
|
||||
/*
|
||||
const Hypothesis *prevHypo = hypo.GetPrevHypo();
|
||||
|
@ -36,7 +36,7 @@ template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targe
|
||||
for (TargetPhraseCollection::const_iterator p(targets.begin()); p != targets.end(); ++p) {
|
||||
words.clear();
|
||||
const TargetPhrase &phrase = **p;
|
||||
const AlignmentInfo::NonTermIndexMap &align = phrase.GetAlignmentInfo().GetNonTermIndexMap();
|
||||
const AlignmentInfo::NonTermIndexMap &align = phrase.GetAlignNonTerm().GetNonTermIndexMap();
|
||||
search::PartialEdge edge(edges_.AllocateEdge(nts.size()));
|
||||
|
||||
size_t i = 0;
|
||||
|
@ -231,7 +231,7 @@ private:
|
||||
{
|
||||
const TargetPhrase &target = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
target.GetAlignmentInfo().GetNonTermIndexMap();
|
||||
target.GetAlignNonTerm().GetNonTermIndexMap();
|
||||
|
||||
// loop over the rule that is being applied
|
||||
for (size_t pos = 0; pos < target.GetSize(); ++pos) {
|
||||
@ -285,7 +285,7 @@ private:
|
||||
else {
|
||||
const TargetPhrase& target = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
target.GetAlignmentInfo().GetNonTermIndexMap();
|
||||
target.GetAlignNonTerm().GetNonTermIndexMap();
|
||||
for (int pos = (int) target.GetSize() - 1; pos >= 0 ; --pos) {
|
||||
const Word &word = target.GetWord(pos);
|
||||
|
||||
@ -391,7 +391,7 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
|
||||
// get index map for underlying hypotheses
|
||||
const TargetPhrase &target = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
|
||||
hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
|
||||
|
||||
// loop over rule
|
||||
for (size_t phrasePos = 0, wordPos = 0;
|
||||
|
@ -300,7 +300,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const Cha
|
||||
lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
|
||||
const TargetPhrase &target = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
target.GetAlignmentInfo().GetNonTermIndexMap();
|
||||
target.GetAlignNonTerm().GetNonTermIndexMap();
|
||||
|
||||
const size_t size = hypo.GetCurrTargetPhrase().GetSize();
|
||||
size_t phrasePos = 0;
|
||||
|
@ -143,14 +143,16 @@ void RuleTableLoaderCompact::LoadAlignmentSection(
|
||||
reader.ReadLine();
|
||||
const size_t alignmentSetCount = std::atoi(reader.m_line.c_str());
|
||||
|
||||
alignmentSets.resize(alignmentSetCount);
|
||||
std::set<std::pair<size_t,size_t> > alignmentInfo;
|
||||
alignmentSets.resize(alignmentSetCount * 2);
|
||||
AlignmentInfo::CollType alignTerm, alignNonTerm;
|
||||
std::vector<std::string> tokens;
|
||||
std::vector<size_t> points;
|
||||
for (size_t i = 0; i < alignmentSetCount; ++i) {
|
||||
// Read alignment set, lookup in collection, and store pointer.
|
||||
alignmentInfo.clear();
|
||||
alignTerm.clear();
|
||||
alignNonTerm.clear();
|
||||
tokens.clear();
|
||||
|
||||
reader.ReadLine();
|
||||
Tokenize(tokens, reader.m_line);
|
||||
std::vector<std::string>::const_iterator p;
|
||||
@ -160,10 +162,17 @@ void RuleTableLoaderCompact::LoadAlignmentSection(
|
||||
points.clear();
|
||||
Tokenize<size_t>(points, *p, "-");
|
||||
std::pair<size_t, size_t> alignmentPair(points[0], points[1]);
|
||||
alignmentInfo.insert(alignmentPair);
|
||||
indicator[index++] = sourcePhrases[i].GetWord(points[0]).IsNonTerminal() ? 1: 0;
|
||||
|
||||
if (sourcePhrases[i].GetWord(alignmentPair.first).IsNonTerminal()) {
|
||||
alignNonTerm.insert(alignmentPair);
|
||||
}
|
||||
else {
|
||||
alignTerm.insert(alignmentPair);
|
||||
}
|
||||
|
||||
}
|
||||
alignmentSets[i] = AlignmentInfoCollection::Instance().Add(alignmentInfo, indicator);
|
||||
alignmentSets[i*2] = AlignmentInfoCollection::Instance().Add(alignNonTerm);
|
||||
alignmentSets[i*2 + 1] = AlignmentInfoCollection::Instance().Add(alignTerm);
|
||||
}
|
||||
}
|
||||
|
||||
@ -206,7 +215,7 @@ bool RuleTableLoaderCompact::LoadRuleSection(
|
||||
const Phrase &targetPhrasePhrase = targetPhrases[targetPhraseId];
|
||||
const Word &targetLhs = vocab[targetLhsIds[targetPhraseId]];
|
||||
Word sourceLHS("X"); // TODO not implemented for compact
|
||||
const AlignmentInfo *alignmentInfo = alignmentSets[alignmentSetId];
|
||||
const AlignmentInfo *alignNonTerm = alignmentSets[alignmentSetId];
|
||||
|
||||
// Then there should be one score for each score component.
|
||||
for (size_t j = 0; j < numScoreComponents; ++j) {
|
||||
@ -226,7 +235,7 @@ bool RuleTableLoaderCompact::LoadRuleSection(
|
||||
|
||||
// Create and score target phrase.
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase);
|
||||
targetPhrase->SetAlignmentInfo(alignmentInfo);
|
||||
targetPhrase->SetAlignNonTerm(alignNonTerm);
|
||||
targetPhrase->SetTargetLHS(targetLhs);
|
||||
targetPhrase->SetScoreChart(ruleTable.GetFeature(), scoreVector, weights,
|
||||
languageModels, wpProducer);
|
||||
|
@ -235,7 +235,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
||||
targetPhrase->SetSourcePhrase(sourcePhrase);
|
||||
|
||||
// rest of target phrase
|
||||
targetPhrase->SetAlignmentInfo(alignString, sourcePhrase);
|
||||
targetPhrase->SetAlignmentInfo(alignString);
|
||||
targetPhrase->SetTargetLHS(targetLHS);
|
||||
|
||||
targetPhrase->SetRuleCount(ruleCountString, scoreVector[0]);
|
||||
|
@ -286,7 +286,7 @@ namespace Moses
|
||||
cerr << source << endl << target << endl;
|
||||
const size_t size = source.GetSize();
|
||||
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
|
||||
AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
|
||||
|
||||
PhraseDictionaryNodeSCFG *currNode = &rootNode;
|
||||
@ -297,7 +297,7 @@ namespace Moses
|
||||
// indexed by source label 1st
|
||||
const Word &sourceNonTerm = word;
|
||||
|
||||
CHECK(iterAlign != target.GetAlignmentInfo().end());
|
||||
CHECK(iterAlign != alignmentInfo.end());
|
||||
CHECK(iterAlign->first == pos);
|
||||
size_t targetNonTermInd = iterAlign->second;
|
||||
++iterAlign;
|
||||
|
@ -55,7 +55,7 @@ PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &so
|
||||
{
|
||||
const size_t size = source.GetSize();
|
||||
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
|
||||
AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
|
||||
|
||||
PhraseDictionaryNodeSCFG *currNode = &m_collection;
|
||||
@ -66,7 +66,7 @@ PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &so
|
||||
// indexed by source label 1st
|
||||
const Word &sourceNonTerm = word;
|
||||
|
||||
CHECK(iterAlign != target.GetAlignmentInfo().end());
|
||||
CHECK(iterAlign != alignmentInfo.end());
|
||||
CHECK(iterAlign->first == pos);
|
||||
size_t targetNonTermInd = iterAlign->second;
|
||||
++iterAlign;
|
||||
|
@ -52,7 +52,7 @@ UTrieNode &RuleTableUTrie::GetOrCreateNode(const Phrase &source,
|
||||
{
|
||||
const size_t size = source.GetSize();
|
||||
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
|
||||
AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
|
||||
|
||||
UTrieNode *currNode = &m_root;
|
||||
@ -60,7 +60,7 @@ UTrieNode &RuleTableUTrie::GetOrCreateNode(const Phrase &source,
|
||||
const Word &word = source.GetWord(pos);
|
||||
|
||||
if (word.IsNonTerminal()) {
|
||||
assert(iterAlign != target.GetAlignmentInfo().end());
|
||||
assert(iterAlign != alignmentInfo.end());
|
||||
assert(iterAlign->first == pos);
|
||||
size_t targetNonTermInd = iterAlign->second;
|
||||
++iterAlign;
|
||||
|
@ -93,7 +93,7 @@ UTrieNode *UTrieNode::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
|
||||
TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection(
|
||||
const TargetPhrase &target)
|
||||
{
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
|
||||
const size_t rank = alignmentInfo.GetSize();
|
||||
|
||||
std::vector<int> vec;
|
||||
|
@ -36,7 +36,7 @@ void SourceWordDeletionFeature::Evaluate(
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
const TargetPhrase& targetPhrase = context.GetTargetPhrase();
|
||||
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignmentInfo();
|
||||
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
|
||||
const AlignmentInfo::CollType &alignment = alignmentInfo.GetAlignments();
|
||||
ComputeFeatures(targetPhrase, accumulator, alignment);
|
||||
}
|
||||
@ -45,7 +45,7 @@ void SourceWordDeletionFeature::EvaluateChart(
|
||||
const ChartBasedFeatureContext& context,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
const AlignmentInfo &alignmentInfo = context.GetTargetPhrase().GetAlignmentInfo();
|
||||
const AlignmentInfo &alignmentInfo = context.GetTargetPhrase().GetAlignTerm();
|
||||
const AlignmentInfo::CollType &alignment = alignmentInfo.GetTerminalAlignments();
|
||||
ComputeFeatures(context.GetTargetPhrase(), accumulator, alignment);
|
||||
}
|
||||
|
@ -187,7 +187,7 @@ FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int
|
||||
|
||||
// get index map for underlying hypotheses
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
cur_hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
|
||||
cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
|
||||
|
||||
// loop over rule
|
||||
bool makePrefix = false;
|
||||
|
@ -43,7 +43,7 @@ private:
|
||||
{
|
||||
const TargetPhrase &target = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
target.GetAlignmentInfo().GetNonTermIndexMap();
|
||||
target.GetAlignNonTerm().GetNonTermIndexMap();
|
||||
|
||||
// loop over the rule that is being applied
|
||||
for (size_t pos = 0; pos < target.GetSize(); ++pos) {
|
||||
@ -100,7 +100,7 @@ private:
|
||||
else {
|
||||
const TargetPhrase targetPhrase = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
targetPhrase.GetAlignmentInfo().GetNonTermIndexMap();
|
||||
targetPhrase.GetAlignTerm().GetNonTermIndexMap();
|
||||
for (int pos = (int) targetPhrase.GetSize() - 1; pos >= 0 ; --pos) {
|
||||
const Word &word = targetPhrase.GetWord(pos);
|
||||
|
||||
|
@ -42,7 +42,8 @@ namespace Moses
|
||||
{
|
||||
TargetPhrase::TargetPhrase( std::string out_string)
|
||||
:Phrase(0), m_fullScore(0.0), m_sourcePhrase(0)
|
||||
, m_alignmentInfo(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
, m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
{
|
||||
|
||||
//ACAT
|
||||
@ -55,7 +56,8 @@ TargetPhrase::TargetPhrase()
|
||||
:Phrase(ARRAY_SIZE_INCR)
|
||||
, m_fullScore(0.0)
|
||||
,m_sourcePhrase(0)
|
||||
, m_alignmentInfo(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
, m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
{
|
||||
}
|
||||
|
||||
@ -63,7 +65,8 @@ TargetPhrase::TargetPhrase(const Phrase &phrase)
|
||||
: Phrase(phrase)
|
||||
, m_fullScore(0.0)
|
||||
, m_sourcePhrase(0)
|
||||
, m_alignmentInfo(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
, m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
{
|
||||
}
|
||||
|
||||
@ -290,7 +293,7 @@ void MosesShouldUseExceptions(bool value) {
|
||||
|
||||
void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
|
||||
{
|
||||
set<pair<size_t,size_t> > alignmentInfo;
|
||||
AlignmentInfo::CollType alignTerm, alignNonTerm;
|
||||
for (util::TokenIter<util::AnyCharacter, true> token(alignString, util::AnyCharacter(" \t")); token; ++token) {
|
||||
util::TokenIter<util::AnyCharacter, false> dash(*token, util::AnyCharacter("-"));
|
||||
MosesShouldUseExceptions(dash);
|
||||
@ -299,50 +302,35 @@ void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
|
||||
size_t targetPos = boost::lexical_cast<size_t>(*dash++);
|
||||
MosesShouldUseExceptions(!dash);
|
||||
|
||||
alignmentInfo.insert(pair<size_t,size_t>(sourcePos, targetPos));
|
||||
if (GetWord(targetPos).IsNonTerminal()) {
|
||||
alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
|
||||
}
|
||||
else {
|
||||
alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
|
||||
}
|
||||
}
|
||||
SetAlignTerm(alignTerm);
|
||||
SetAlignNonTerm(alignNonTerm);
|
||||
|
||||
SetAlignmentInfo(alignmentInfo);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString, Phrase &sourcePhrase)
|
||||
void TargetPhrase::SetAlignTerm(const AlignmentInfo::CollType &coll)
|
||||
{
|
||||
std::vector<int> indicator;
|
||||
|
||||
set<pair<size_t,size_t> > alignmentInfo;
|
||||
for (util::TokenIter<util::AnyCharacter, true> token(alignString, util::AnyCharacter(" \t")); token; ++token) {
|
||||
util::TokenIter<util::AnyCharacter, false> dash(*token, util::AnyCharacter("-"));
|
||||
MosesShouldUseExceptions(dash);
|
||||
size_t sourcePos = boost::lexical_cast<size_t>(*dash++);
|
||||
MosesShouldUseExceptions(dash);
|
||||
size_t targetPos = boost::lexical_cast<size_t>(*dash++);
|
||||
MosesShouldUseExceptions(!dash);
|
||||
|
||||
alignmentInfo.insert(pair<size_t,size_t>(sourcePos, targetPos));
|
||||
indicator.push_back(sourcePhrase.GetWord(sourcePos).IsNonTerminal() ? 1: 0);
|
||||
}
|
||||
|
||||
SetAlignmentInfo(alignmentInfo, &indicator[0]);
|
||||
const AlignmentInfo *alignmentInfo = AlignmentInfoCollection::Instance().Add(coll);
|
||||
m_alignTerm = alignmentInfo;
|
||||
}
|
||||
|
||||
void TargetPhrase::SetAlignmentInfo(const std::set<std::pair<size_t,size_t> > &alignmentInfo)
|
||||
void TargetPhrase::SetAlignNonTerm(const AlignmentInfo::CollType &coll)
|
||||
{
|
||||
m_alignmentInfo = AlignmentInfoCollection::Instance().Add(alignmentInfo);
|
||||
}
|
||||
|
||||
|
||||
void TargetPhrase::SetAlignmentInfo(const std::set<std::pair<size_t,size_t> > &alignmentInfo, int* indicator)
|
||||
{
|
||||
m_alignmentInfo = AlignmentInfoCollection::Instance().Add(alignmentInfo, indicator);
|
||||
const AlignmentInfo *alignmentInfo = AlignmentInfoCollection::Instance().Add(coll);
|
||||
m_alignNonTerm = alignmentInfo;
|
||||
}
|
||||
|
||||
TO_STRING_BODY(TargetPhrase);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
|
||||
{
|
||||
os << static_cast<const Phrase&>(tp) << ":" << tp.GetAlignmentInfo();
|
||||
os << static_cast<const Phrase&>(tp) << ":" << tp.GetAlignNonTerm();
|
||||
os << ": c=" << tp.m_fullScore;
|
||||
|
||||
return os;
|
||||
|
@ -53,7 +53,7 @@ protected:
|
||||
|
||||
// in case of confusion net, ptr to source phrase
|
||||
Phrase m_sourcePhrase;
|
||||
const AlignmentInfo* m_alignmentInfo;
|
||||
const AlignmentInfo* m_alignTerm, *m_alignNonTerm;
|
||||
Word m_lhsTarget;
|
||||
size_t m_ruleCount;
|
||||
|
||||
@ -149,15 +149,20 @@ public:
|
||||
}
|
||||
|
||||
void SetAlignmentInfo(const StringPiece &alignString);
|
||||
void SetAlignmentInfo(const StringPiece &alignString, Phrase &sourcePhrase);
|
||||
void SetAlignmentInfo(const std::set<std::pair<size_t,size_t> > &alignmentInfo);
|
||||
void SetAlignmentInfo(const std::set<std::pair<size_t,size_t> > &alignmentInfo, int* indicator);
|
||||
void SetAlignmentInfo(const AlignmentInfo *alignmentInfo) {
|
||||
m_alignmentInfo = alignmentInfo;
|
||||
void SetAlignTerm(const AlignmentInfo *alignTerm) {
|
||||
m_alignTerm = alignTerm;
|
||||
}
|
||||
|
||||
const AlignmentInfo &GetAlignmentInfo() const
|
||||
{ return *m_alignmentInfo; }
|
||||
void SetAlignNonTerm(const AlignmentInfo *alignNonTerm) {
|
||||
m_alignNonTerm = alignNonTerm;
|
||||
}
|
||||
|
||||
void SetAlignTerm(const AlignmentInfo::CollType &coll);
|
||||
void SetAlignNonTerm(const AlignmentInfo::CollType &coll);
|
||||
|
||||
const AlignmentInfo &GetAlignTerm() const
|
||||
{ return *m_alignTerm; }
|
||||
const AlignmentInfo &GetAlignNonTerm() const
|
||||
{ return *m_alignNonTerm; }
|
||||
|
||||
void SetRuleCount(const StringPiece &ruleCountString, float p_f_given_e);
|
||||
size_t GetRuleCount() const { return m_ruleCount; }
|
||||
@ -179,7 +184,9 @@ struct TargetPhraseHasher
|
||||
size_t seed = 0;
|
||||
boost::hash_combine(seed, targetPhrase);
|
||||
boost::hash_combine(seed, targetPhrase.GetSourcePhrase());
|
||||
boost::hash_combine(seed, targetPhrase.GetAlignmentInfo());
|
||||
boost::hash_combine(seed, targetPhrase.GetAlignTerm());
|
||||
boost::hash_combine(seed, targetPhrase.GetAlignNonTerm());
|
||||
|
||||
return seed;
|
||||
}
|
||||
};
|
||||
@ -190,7 +197,8 @@ struct TargetPhraseComparator
|
||||
{
|
||||
return lhs.Compare(rhs) == 0 &&
|
||||
lhs.GetSourcePhrase().Compare(rhs.GetSourcePhrase()) == 0 &&
|
||||
lhs.GetAlignmentInfo() == rhs.GetAlignmentInfo();
|
||||
lhs.GetAlignTerm() == rhs.GetAlignTerm() &&
|
||||
lhs.GetAlignNonTerm() == rhs.GetAlignNonTerm();
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -36,7 +36,7 @@ void TargetWordInsertionFeature::Evaluate(
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
const TargetPhrase& targetPhrase = context.GetTargetPhrase();
|
||||
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignmentInfo();
|
||||
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
|
||||
const AlignmentInfo::CollType &alignment = alignmentInfo.GetAlignments();
|
||||
ComputeFeatures(targetPhrase, accumulator, alignment);
|
||||
}
|
||||
@ -46,7 +46,7 @@ void TargetWordInsertionFeature::EvaluateChart(
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
const TargetPhrase& targetPhrase = context.GetTargetPhrase();
|
||||
const AlignmentInfo &alignmentInfo = context.GetTargetPhrase().GetAlignmentInfo();
|
||||
const AlignmentInfo &alignmentInfo = context.GetTargetPhrase().GetAlignTerm();
|
||||
const AlignmentInfo::CollType &alignment = alignmentInfo.GetTerminalAlignments();
|
||||
ComputeFeatures(targetPhrase, accumulator, alignment);
|
||||
}
|
||||
|
@ -78,7 +78,7 @@ void WordTranslationFeature::Evaluate
|
||||
{
|
||||
const Sentence& input = static_cast<const Sentence&>(context.GetSource());
|
||||
const TargetPhrase& targetPhrase = context.GetTargetPhrase();
|
||||
const AlignmentInfo &alignment = targetPhrase.GetAlignmentInfo();
|
||||
const AlignmentInfo &alignment = targetPhrase.GetAlignTerm();
|
||||
|
||||
// process aligned words
|
||||
for (AlignmentInfo::const_iterator alignmentPoint = alignment.begin(); alignmentPoint != alignment.end(); alignmentPoint++) {
|
||||
@ -293,7 +293,7 @@ void WordTranslationFeature::EvaluateChart(
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
const TargetPhrase& targetPhrase = context.GetTargetPhrase();
|
||||
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignmentInfo();
|
||||
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
|
||||
const AlignmentInfo::CollType &alignment = alignmentInfo.GetTerminalAlignments();
|
||||
|
||||
// process aligned words
|
||||
|
Loading…
Reference in New Issue
Block a user