2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
2010-02-24 14:15:44 +03:00
|
|
|
#ifndef moses_FactorCollection_h
|
|
|
|
#define moses_FactorCollection_h
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2014-03-21 14:53:15 +04:00
|
|
|
// reserve space for non-terminal symbols (ensuring consecutive numbering, and allowing quick lookup by ID)
|
|
|
|
#ifndef moses_MaxNumNonterminals
|
|
|
|
#define moses_MaxNumNonterminals 10000
|
|
|
|
#endif
|
|
|
|
|
2009-08-07 20:47:54 +04:00
|
|
|
#ifdef WITH_THREADS
|
|
|
|
#include <boost/thread/shared_mutex.hpp>
|
|
|
|
#endif
|
|
|
|
|
2011-09-21 14:26:04 +04:00
|
|
|
#include "util/murmur_hash.hh"
|
|
|
|
#include <boost/unordered_set.hpp>
|
2011-09-20 23:08:42 +04:00
|
|
|
|
2011-09-21 14:26:04 +04:00
|
|
|
#include <functional>
|
2011-09-20 23:08:42 +04:00
|
|
|
#include <string>
|
|
|
|
|
2011-10-13 17:32:14 +04:00
|
|
|
#include "util/string_piece.hh"
|
2013-04-25 22:42:30 +04:00
|
|
|
#include "util/pool.hh"
|
2008-06-11 14:52:57 +04:00
|
|
|
#include "Factor.h"
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
|
2012-06-27 03:45:02 +04:00
|
|
|
/** We don't want Factor to be copyable by anybody. But we also want to store
|
2011-09-22 00:35:12 +04:00
|
|
|
* it in an STL container. The solution is that Factor's copy constructor is
|
|
|
|
* private and friended to FactorFriend. The STL containers can delegate
|
|
|
|
* copying, so friending the container isn't sufficient. STL containers see
|
|
|
|
* FactorFriend's public copy constructor and everybody else sees Factor's
|
2013-05-29 21:16:15 +04:00
|
|
|
* private copy constructor.
|
2011-09-22 00:35:12 +04:00
|
|
|
*/
|
|
|
|
struct FactorFriend {
|
|
|
|
Factor in;
|
|
|
|
};
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
/** collection of factors
|
|
|
|
*
|
|
|
|
* All Factors in moses are accessed and created by a FactorCollection.
|
|
|
|
* By enforcing this strict creation processes (ie, forbidding factors
|
|
|
|
* from being created on the stack, etc), their memory addresses can
|
|
|
|
* be used as keys to uniquely identify them.
|
|
|
|
* Only 1 FactorCollection object should be created.
|
|
|
|
*/
|
|
|
|
class FactorCollection
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
friend std::ostream& operator<<(std::ostream&, const FactorCollection&);
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-09-22 00:35:12 +04:00
|
|
|
struct HashFactor : public std::unary_function<const FactorFriend &, std::size_t> {
|
|
|
|
std::size_t operator()(const FactorFriend &factor) const {
|
2013-04-25 22:42:30 +04:00
|
|
|
return util::MurmurHashNative(factor.in.m_string.data(), factor.in.m_string.size());
|
2011-09-21 14:26:04 +04:00
|
|
|
}
|
|
|
|
};
|
2011-09-22 12:55:54 +04:00
|
|
|
struct EqualsFactor : public std::binary_function<const FactorFriend &, const FactorFriend &, bool> {
|
|
|
|
bool operator()(const FactorFriend &left, const FactorFriend &right) const {
|
2011-09-22 00:35:12 +04:00
|
|
|
return left.in.GetString() == right.in.GetString();
|
2011-09-21 14:26:04 +04:00
|
|
|
}
|
|
|
|
};
|
2011-09-22 00:35:12 +04:00
|
|
|
typedef boost::unordered_set<FactorFriend, HashFactor, EqualsFactor> Set;
|
2011-09-21 14:26:04 +04:00
|
|
|
Set m_set;
|
2014-03-21 14:53:15 +04:00
|
|
|
Set m_setNonTerminal;
|
2011-09-20 23:08:42 +04:00
|
|
|
|
2013-04-25 22:42:30 +04:00
|
|
|
util::Pool m_string_backing;
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
static FactorCollection s_instance;
|
|
|
|
#ifdef WITH_THREADS
|
|
|
|
//reader-writer lock
|
2011-09-20 23:08:42 +04:00
|
|
|
mutable boost::shared_mutex m_accessLock;
|
2009-08-07 20:47:54 +04:00
|
|
|
#endif
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2014-03-21 14:53:15 +04:00
|
|
|
size_t m_factorIdNonTerminal; /**< unique, contiguous ids, starting from 0, for each non-terminal factor */
|
|
|
|
size_t m_factorId; /**< unique, contiguous ids, starting from moses_MaxNumNonterminals, for each terminal factor */
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
//! constructor. only the 1 static variable can be created
|
|
|
|
FactorCollection()
|
2014-03-21 14:53:15 +04:00
|
|
|
: m_factorIdNonTerminal(0)
|
|
|
|
, m_factorId(moses_MaxNumNonterminals) {
|
2013-06-10 21:11:55 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
public:
|
|
|
|
static FactorCollection& Instance() {
|
|
|
|
return s_instance;
|
|
|
|
}
|
|
|
|
|
|
|
|
~FactorCollection();
|
|
|
|
|
|
|
|
/** returns a factor with the same direction, factorType and factorString.
|
|
|
|
* If a factor already exist in the collection, return the existing factor, if not create a new 1
|
|
|
|
*/
|
2014-03-21 14:53:15 +04:00
|
|
|
const Factor *AddFactor(const StringPiece &factorString, bool isNonTerminal = false);
|
|
|
|
|
|
|
|
const size_t GetNumNonTerminals() {
|
|
|
|
return m_factorIdNonTerminal;
|
|
|
|
}
|
2011-09-24 19:58:23 +04:00
|
|
|
|
|
|
|
// TODO: remove calls to this function, replacing them with the simpler AddFactor(factorString)
|
2014-03-21 14:53:15 +04:00
|
|
|
const Factor *AddFactor(FactorDirection /*direction*/, FactorType /*factorType*/, const StringPiece &factorString, bool isNonTerminal = false) {
|
|
|
|
return AddFactor(factorString, isNonTerminal);
|
2011-09-24 19:58:23 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
TO_STRING();
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
};
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
2010-02-24 14:15:44 +03:00
|
|
|
#endif
|