2010-04-08 21:16:10 +04:00
|
|
|
// vim:tabstop=2
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <map>
|
|
|
|
#include <vector>
|
|
|
|
#include <iterator>
|
2010-08-17 17:41:46 +04:00
|
|
|
#include <utility>
|
|
|
|
#include <ostream>
|
2012-11-12 23:56:18 +04:00
|
|
|
#include "moses/Word.h"
|
|
|
|
#include "moses/TargetPhraseCollection.h"
|
|
|
|
#include "moses/Terminal.h"
|
2010-04-08 21:16:10 +04:00
|
|
|
|
2010-10-01 01:28:30 +04:00
|
|
|
#include <boost/functional/hash.hpp>
|
|
|
|
#include <boost/unordered_map.hpp>
|
|
|
|
#include <boost/version.hpp>
|
|
|
|
|
2010-04-08 21:16:10 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
|
2010-08-17 17:41:46 +04:00
|
|
|
class PhraseDictionarySCFG;
|
2012-08-14 02:53:14 +04:00
|
|
|
class PhraseDictionaryFuzzyMatch;
|
2012-07-18 17:52:48 +04:00
|
|
|
|
2012-06-29 02:29:46 +04:00
|
|
|
//! @todo why?
|
2010-10-01 01:28:30 +04:00
|
|
|
class NonTerminalMapKeyHasher
|
|
|
|
{
|
|
|
|
public:
|
2011-02-24 16:14:42 +03:00
|
|
|
size_t operator()(const std::pair<Word, Word> & k) const {
|
|
|
|
// Assumes that only the first factor of each Word is relevant.
|
|
|
|
const Word & w1 = k.first;
|
|
|
|
const Word & w2 = k.second;
|
|
|
|
const Factor * f1 = w1[0];
|
|
|
|
const Factor * f2 = w2[0];
|
|
|
|
size_t seed = 0;
|
|
|
|
boost::hash_combine(seed, *f1);
|
|
|
|
boost::hash_combine(seed, *f2);
|
|
|
|
return seed;
|
|
|
|
}
|
2010-10-01 01:28:30 +04:00
|
|
|
};
|
|
|
|
|
2012-06-29 02:29:46 +04:00
|
|
|
//! @todo why?
|
2010-10-01 01:28:30 +04:00
|
|
|
class NonTerminalMapKeyEqualityPred
|
|
|
|
{
|
|
|
|
public:
|
2011-02-24 16:14:42 +03:00
|
|
|
bool operator()(const std::pair<Word, Word> & k1,
|
|
|
|
const std::pair<Word, Word> & k2) const {
|
|
|
|
// Compare first non-terminal of each key. Assumes that for Words
|
|
|
|
// representing non-terminals only the first factor is relevant.
|
2010-10-01 01:28:30 +04:00
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
const Word & w1 = k1.first;
|
|
|
|
const Word & w2 = k2.first;
|
|
|
|
const Factor * f1 = w1[0];
|
|
|
|
const Factor * f2 = w2[0];
|
|
|
|
if (f1->Compare(*f2)) {
|
|
|
|
return false;
|
|
|
|
}
|
2010-10-01 01:28:30 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
// Compare second non-terminal of each key.
|
|
|
|
{
|
|
|
|
const Word & w1 = k1.second;
|
|
|
|
const Word & w2 = k2.second;
|
|
|
|
const Factor * f1 = w1[0];
|
|
|
|
const Factor * f2 = w2[0];
|
|
|
|
if (f1->Compare(*f2)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2010-10-01 01:28:30 +04:00
|
|
|
};
|
|
|
|
|
2010-08-17 15:01:03 +04:00
|
|
|
/** One node of the PhraseDictionarySCFG structure
|
2010-04-08 21:16:10 +04:00
|
|
|
*/
|
2010-08-17 15:01:03 +04:00
|
|
|
class PhraseDictionaryNodeSCFG
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
2011-01-13 03:25:10 +03:00
|
|
|
public:
|
2011-02-24 16:14:42 +03:00
|
|
|
typedef std::pair<Word, Word> NonTerminalMapKey;
|
2010-10-01 01:28:30 +04:00
|
|
|
|
|
|
|
#if defined(BOOST_VERSION) && (BOOST_VERSION >= 104200)
|
2011-02-24 16:14:42 +03:00
|
|
|
typedef boost::unordered_map<Word,
|
2013-05-23 15:50:57 +04:00
|
|
|
PhraseDictionaryNodeSCFG*,
|
2011-02-24 16:14:42 +03:00
|
|
|
TerminalHasher,
|
|
|
|
TerminalEqualityPred> TerminalMap;
|
|
|
|
|
|
|
|
typedef boost::unordered_map<NonTerminalMapKey,
|
2013-05-23 15:50:57 +04:00
|
|
|
PhraseDictionaryNodeSCFG*,
|
2011-02-24 16:14:42 +03:00
|
|
|
NonTerminalMapKeyHasher,
|
|
|
|
NonTerminalMapKeyEqualityPred> NonTerminalMap;
|
2010-10-01 01:28:30 +04:00
|
|
|
#else
|
2013-05-23 15:50:57 +04:00
|
|
|
typedef std::map<Word, PhraseDictionaryNodeSCFG*> TerminalMap;
|
|
|
|
typedef std::map<NonTerminalMapKey, PhraseDictionaryNodeSCFG*> NonTerminalMap;
|
2010-10-01 01:28:30 +04:00
|
|
|
#endif
|
2010-08-17 17:41:46 +04:00
|
|
|
|
2011-01-13 03:25:10 +03:00
|
|
|
private:
|
2011-02-24 16:14:42 +03:00
|
|
|
friend std::ostream& operator<<(std::ostream&, const PhraseDictionarySCFG&);
|
2012-08-14 02:53:14 +04:00
|
|
|
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryFuzzyMatch&);
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
// only these classes are allowed to instantiate this class
|
|
|
|
friend class PhraseDictionarySCFG;
|
2012-08-14 02:53:14 +04:00
|
|
|
friend class PhraseDictionaryFuzzyMatch;
|
2011-02-24 16:14:42 +03:00
|
|
|
friend class std::map<Word, PhraseDictionaryNodeSCFG>;
|
2012-07-19 22:36:46 +04:00
|
|
|
friend class std::map<long, PhraseDictionaryNodeSCFG>;
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
protected:
|
2011-02-24 16:14:42 +03:00
|
|
|
TerminalMap m_sourceTermMap;
|
|
|
|
NonTerminalMap m_nonTermMap;
|
|
|
|
TargetPhraseCollection *m_targetPhraseCollection;
|
|
|
|
|
|
|
|
PhraseDictionaryNodeSCFG()
|
|
|
|
:m_targetPhraseCollection(NULL)
|
|
|
|
{}
|
2010-04-08 21:16:10 +04:00
|
|
|
public:
|
2011-02-24 16:14:42 +03:00
|
|
|
virtual ~PhraseDictionaryNodeSCFG();
|
|
|
|
|
|
|
|
bool IsLeaf() const {
|
|
|
|
return m_sourceTermMap.empty() && m_nonTermMap.empty();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Prune(size_t tableLimit);
|
2011-06-27 19:13:15 +04:00
|
|
|
void Sort(size_t tableLimit);
|
2011-02-24 16:14:42 +03:00
|
|
|
PhraseDictionaryNodeSCFG *GetOrCreateChild(const Word &sourceTerm);
|
|
|
|
PhraseDictionaryNodeSCFG *GetOrCreateChild(const Word &sourceNonTerm, const Word &targetNonTerm);
|
|
|
|
const PhraseDictionaryNodeSCFG *GetChild(const Word &sourceTerm) const;
|
|
|
|
const PhraseDictionaryNodeSCFG *GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const;
|
|
|
|
|
|
|
|
const TargetPhraseCollection *GetTargetPhraseCollection() const {
|
|
|
|
return m_targetPhraseCollection;
|
|
|
|
}
|
2013-05-23 15:50:57 +04:00
|
|
|
TargetPhraseCollection &GetOrCreateTargetPhraseCollection();
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
const NonTerminalMap & GetNonTerminalMap() const {
|
|
|
|
return m_nonTermMap;
|
|
|
|
}
|
|
|
|
|
2011-11-06 13:08:37 +04:00
|
|
|
void Clear();
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
TO_STRING();
|
2010-04-08 21:16:10 +04:00
|
|
|
};
|
|
|
|
|
2010-08-17 15:01:03 +04:00
|
|
|
std::ostream& operator<<(std::ostream&, const PhraseDictionaryNodeSCFG&);
|
|
|
|
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|