mosesdecoder/moses/TranslationModel/PhraseDictionaryMemory.cpp

208 lines
6.3 KiB
C++
Raw Normal View History

// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <fstream>
#include <string>
#include <iterator>
#include <algorithm>
#include "PhraseDictionaryMemory.h"
2012-11-12 23:56:18 +04:00
#include "moses/FactorCollection.h"
#include "moses/Word.h"
#include "moses/Util.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/WordsRange.h"
#include "moses/UserMessage.h"
#include "moses/TranslationModel/RuleTable/LoaderFactory.h"
#include "moses/TranslationModel/RuleTable/Loader.h"
2012-11-27 21:23:31 +04:00
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h"
#include "moses/InputPath.h"
using namespace std;
namespace Moses
{
PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line)
: RuleTableTrie(line)
{
ReadParameters();
2013-08-16 18:05:36 +04:00
// caching for memory pt is pointless
m_maxCacheSize = 0;
}
TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollection(
2013-05-29 21:16:15 +04:00
const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS)
{
PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetTargetPhraseCollection();
}
2013-09-25 02:57:49 +04:00
const TargetPhraseCollection*
PhraseDictionaryMemory::
GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
{
Phrase source(sourceOrig);
source.OnlyTheseFactors(m_inputFactors);
// exactly like CreateTargetPhraseCollection, but don't create
const size_t size = source.GetSize();
const PhraseDictionaryNodeMemory *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos) {
const Word& word = source.GetWord(pos);
currNode = currNode->GetChild(word);
if (currNode == NULL)
return NULL;
}
return &currNode->GetTargetPhraseCollection();
}
PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source
2013-05-29 21:16:15 +04:00
, const TargetPhrase &target
, const Word *sourceLHS)
{
const size_t size = source.GetSize();
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
PhraseDictionaryNodeMemory *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos) {
const Word& word = source.GetWord(pos);
if (word.IsNonTerminal()) {
// indexed by source label 1st
const Word &sourceNonTerm = word;
2013-11-23 00:27:46 +04:00
UTIL_THROW_IF2(iterAlign == alignmentInfo.end(),
2014-01-15 19:42:02 +04:00
"No alignment for non-term at position " << pos);
2013-11-23 00:27:46 +04:00
UTIL_THROW_IF2(iterAlign->first != pos,
2014-01-15 19:42:02 +04:00
"Alignment info incorrect at position " << pos);
size_t targetNonTermInd = iterAlign->second;
++iterAlign;
const Word &targetNonTerm = target.GetWord(targetNonTermInd);
#if defined(UNLABELLED_SOURCE)
currNode = currNode->GetOrCreateNonTerminalChild(targetNonTerm);
#else
currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
#endif
} else {
currNode = currNode->GetOrCreateChild(word);
}
2013-11-23 00:27:46 +04:00
UTIL_THROW_IF2(currNode == NULL,
2014-01-15 19:42:02 +04:00
"Node not found at position " << pos);
}
2013-05-29 21:16:15 +04:00
// finally, the source LHS
//currNode = currNode->GetOrCreateChild(sourceLHS);
2013-05-29 21:16:15 +04:00
return *currNode;
}
ChartRuleLookupManager *PhraseDictionaryMemory::CreateRuleLookupManager(
const ChartParser &parser,
const ChartCellCollectionBase &cellCollection,
std::size_t /*maxChartSpan */)
{
return new ChartRuleLookupManagerMemory(parser, cellCollection, *this);
}
void PhraseDictionaryMemory::SortAndPrune()
{
2013-05-29 21:16:15 +04:00
if (GetTableLimit()) {
m_collection.Sort(GetTableLimit());
}
}
2013-09-27 12:35:24 +04:00
void
2013-09-25 02:57:49 +04:00
PhraseDictionaryMemory::
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
{
InputPathList::const_iterator iter;
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
2014-05-12 18:40:18 +04:00
InputPath &inputPath = **iter;
const Phrase &phrase = inputPath.GetPhrase();
const InputPath *prevPath = inputPath.GetPrevPath();
2013-07-05 02:38:18 +04:00
const PhraseDictionaryNodeMemory *prevPtNode = NULL;
2013-10-02 19:51:16 +04:00
if (prevPath) {
prevPtNode = static_cast<const PhraseDictionaryNodeMemory*>(prevPath->GetPtNode(*this));
2013-07-05 02:38:18 +04:00
} else {
// Starting subphrase.
assert(phrase.GetSize() == 1);
prevPtNode = &GetRootNode();
}
// backoff
2014-05-12 18:40:18 +04:00
if (!SatisfyBackoff(inputPath)) {
2014-06-08 11:44:59 +04:00
continue;
}
2013-07-05 02:38:18 +04:00
if (prevPtNode) {
Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
lastWord.OnlyTheseFactors(m_inputFactors);
const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord);
if (ptNode) {
2013-07-16 20:11:12 +04:00
const TargetPhraseCollection &targetPhrases = ptNode->GetTargetPhraseCollection();
2014-05-12 18:40:18 +04:00
inputPath.SetTargetPhrases(*this, &targetPhrases, ptNode);
2013-07-05 02:38:18 +04:00
} else {
2014-06-08 11:44:59 +04:00
inputPath.SetTargetPhrases(*this, NULL, NULL);
2013-07-05 02:38:18 +04:00
}
}
}
}
TO_STRING_BODY(PhraseDictionaryMemory);
// friend
ostream& operator<<(ostream& out, const PhraseDictionaryMemory& phraseDict)
{
typedef PhraseDictionaryNodeMemory::TerminalMap TermMap;
typedef PhraseDictionaryNodeMemory::NonTerminalMap NonTermMap;
const PhraseDictionaryNodeMemory &coll = phraseDict.m_collection;
for (NonTermMap::const_iterator p = coll.m_nonTermMap.begin(); p != coll.m_nonTermMap.end(); ++p) {
#if defined(UNLABELLED_SOURCE)
const Word &targetNonTerm = p->first;
out << targetNonTerm;
#else
const Word &sourceNonTerm = p->first.first;
out << sourceNonTerm;
#endif
}
for (TermMap::const_iterator p = coll.m_sourceTermMap.begin(); p != coll.m_sourceTermMap.end(); ++p) {
const Word &sourceTerm = p->first;
out << sourceTerm;
}
return out;
}
}