mosesdecoder/moses/TranslationModel/PhraseDictionaryMemory.cpp

191 lines
6.0 KiB
C++
Raw Normal View History

// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <fstream>
#include <string>
#include <iterator>
#include <algorithm>
#include "PhraseDictionaryMemory.h"
2012-11-12 23:56:18 +04:00
#include "moses/FactorCollection.h"
#include "moses/Word.h"
#include "moses/Util.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/WordsRange.h"
#include "moses/UserMessage.h"
#include "moses/TranslationModel/RuleTable/LoaderFactory.h"
#include "moses/TranslationModel/RuleTable/Loader.h"
2012-11-27 21:23:31 +04:00
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h"
#include "moses/InputLatticeNode.h"
using namespace std;
namespace Moses
{
PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line)
: RuleTableTrie("PhraseDictionaryMemory", line)
{
ReadParameters();
}
TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollection(
2013-05-29 21:16:15 +04:00
const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS)
{
PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetOrCreateTargetPhraseCollection();
}
const TargetPhraseCollection *PhraseDictionaryMemory::GetTargetPhraseCollection(const Phrase& sourceOrig) const
{
Phrase source(sourceOrig);
source.OnlyTheseFactors(m_inputFactors);
// exactly like CreateTargetPhraseCollection, but don't create
const size_t size = source.GetSize();
const PhraseDictionaryNodeMemory *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos) {
const Word& word = source.GetWord(pos);
currNode = currNode->GetChild(word);
if (currNode == NULL)
return NULL;
}
const TargetPhraseCollection *coll = currNode->GetTargetPhraseCollection();
/*
if (coll) {
cerr << "source=" << source << endl
<< *coll << endl;
}
*/
return coll;
}
PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source
2013-05-29 21:16:15 +04:00
, const TargetPhrase &target
, const Word *sourceLHS)
{
const size_t size = source.GetSize();
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
PhraseDictionaryNodeMemory *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos) {
const Word& word = source.GetWord(pos);
if (word.IsNonTerminal()) {
// indexed by source label 1st
const Word &sourceNonTerm = word;
CHECK(iterAlign != alignmentInfo.end());
CHECK(iterAlign->first == pos);
size_t targetNonTermInd = iterAlign->second;
++iterAlign;
const Word &targetNonTerm = target.GetWord(targetNonTermInd);
currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
} else {
currNode = currNode->GetOrCreateChild(word);
}
CHECK(currNode != NULL);
}
2013-05-29 21:16:15 +04:00
// finally, the source LHS
//currNode = currNode->GetOrCreateChild(sourceLHS);
//CHECK(currNode != NULL);
2013-05-29 21:16:15 +04:00
return *currNode;
}
ChartRuleLookupManager *PhraseDictionaryMemory::CreateRuleLookupManager(
const InputType &sentence,
const ChartCellCollectionBase &cellCollection)
{
return new ChartRuleLookupManagerMemory(sentence, cellCollection, *this);
}
void PhraseDictionaryMemory::SortAndPrune()
{
2013-05-29 21:16:15 +04:00
if (GetTableLimit()) {
m_collection.Sort(GetTableLimit());
}
}
void PhraseDictionaryMemory::SetTargetPhraseFromPtMatrix(const std::vector<InputLatticeNode*> &phraseDictionaryQueue) const
{
// UTIL_THROW(util::Exception, "SetTargetPhraseFromPtMatrix() not implemented");
for (size_t i = 0; i < phraseDictionaryQueue.size(); ++i) {
2013-07-05 02:38:18 +04:00
InputLatticeNode &node = *phraseDictionaryQueue[i];
const Phrase &phrase = node.GetPhrase();
2013-07-05 02:38:18 +04:00
const InputLatticeNode *prevNode = node.GetPrevNode();
const PhraseDictionaryNodeMemory *prevPtNode = NULL;
if (prevNode) {
prevPtNode = static_cast<const PhraseDictionaryNodeMemory*>(prevNode->GetPtNode(*this));
} else {
// Starting subphrase.
assert(phrase.GetSize() == 1);
prevPtNode = &GetRootNode();
}
if (prevPtNode) {
Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
lastWord.OnlyTheseFactors(m_inputFactors);
const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord);
if (ptNode) {
const TargetPhraseCollection *targetPhrases = ptNode->GetTargetPhraseCollection();
node.SetTargetPhrases(*this, targetPhrases, ptNode);
} else {
node.SetTargetPhrases(*this, NULL, NULL);
}
}
}
}
TO_STRING_BODY(PhraseDictionaryMemory);
// friend
ostream& operator<<(ostream& out, const PhraseDictionaryMemory& phraseDict)
{
typedef PhraseDictionaryNodeMemory::TerminalMap TermMap;
typedef PhraseDictionaryNodeMemory::NonTerminalMap NonTermMap;
const PhraseDictionaryNodeMemory &coll = phraseDict.m_collection;
for (NonTermMap::const_iterator p = coll.m_nonTermMap.begin(); p != coll.m_nonTermMap.end(); ++p) {
const Word &sourceNonTerm = p->first.first;
out << sourceNonTerm;
}
for (TermMap::const_iterator p = coll.m_sourceTermMap.begin(); p != coll.m_sourceTermMap.end(); ++p) {
const Word &sourceTerm = p->first;
out << sourceTerm;
}
return out;
}
}