Store the string in the factor itself. Use MurmurHash because it beats Boost's hash. Also, apparently nobody calls Exists.

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4244 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
heafield 2011-09-21 10:26:04 +00:00
parent 4c8552b16f
commit fdc6c37ca4
3 changed files with 62 additions and 28 deletions

View File

@ -46,7 +46,7 @@ class Factor
protected:
// FactorCollection writes here.
const std::string *m_ptrString;
std::string m_string;
size_t m_id;
//! protected constructor. only friend class, FactorCollection, is allowed to create Factor objects
@ -55,7 +55,7 @@ protected:
public:
//! original string representation of the factor
inline const std::string &GetString() const {
return *m_ptrString;
return m_string;
}
//! contiguous ID
inline size_t GetId() const {

View File

@ -19,6 +19,9 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifdef HAVE_BOOST
#include <boost/version.hpp>
#endif
#include <ostream>
#include <string>
#include "FactorCollection.h"
@ -30,34 +33,41 @@ namespace Moses
{
FactorCollection FactorCollection::s_instance;
bool FactorCollection::Exists(FactorDirection direction, FactorType factorType, const string &factorString) const
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_accessLock);
#endif
return m_map.find(factorString) != m_map.end();
}
const Factor *FactorCollection::AddFactor(FactorDirection direction
, FactorType factorType
, const string &factorString)
{
// Sorry this is so complicated. Can't we just require everybody to use Boost >= 1.42? The issue is that I can't check BOOST_VERSION unless we have Boost.
#ifdef WITH_THREADS
#if BOOST_VERSION < 104200
Factor to_ins;
to_ins.m_string = factorString;
#endif // BOOST_VERSION
{
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
Map::const_iterator i = m_map.find(factorString);
if (i != m_map.end()) return &i->second;
#if BOOST_VERSION >= 104200
// If this line doesn't compile, upgrade your Boost.
Set::const_iterator i = m_set.find(factorString, HashFactor(), EqualsFactor());
#else // BOOST_VERSION
Set::const_iterator i = m_set.find(to_ins);
#endif // BOOST_VERSION
if (i != m_set.end()) return &*i;
}
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
std::pair<std::string, Factor> to_ins(factorString, Factor());
std::pair<Map::iterator, bool> ret(m_map.insert(to_ins));
#if BOOST_VERSION >= 102400
Factor to_ins;
to_ins.m_string = factorString;
#endif // BOOST_VERSION
#else // WITH_THREADS
Factor to_ins;
to_ins.m_string = factorString;
#endif // WITH_THREADS
to_ins.m_id = m_factorId;
std::pair<Set::iterator, bool> ret(m_set.insert(to_ins));
if (ret.second) {
Factor &factor = ret.first->second;
factor.m_id = m_factorId++;
factor.m_ptrString = &ret.first->first;
m_factorId++;
}
return &ret.first->second;
return &*ret.first;
}
FactorCollection::~FactorCollection() {}
@ -70,8 +80,8 @@ ostream& operator<<(ostream& out, const FactorCollection& factorCollection)
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(factorCollection.m_accessLock);
#endif
for (FactorCollection::Map::const_iterator i = factorCollection.m_map.begin(); i != factorCollection.m_map.end(); ++i) {
out << i->second;
for (FactorCollection::Set::const_iterator i = factorCollection.m_set.begin(); i != factorCollection.m_set.end(); ++i) {
out << *i;
}
return out;
}

View File

@ -29,11 +29,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#endif
#ifdef HAVE_BOOST
#include <boost/unordered_map.hpp>
#include "util/murmur_hash.hh"
#include <boost/unordered_set.hpp>
#else
#include <map>
#include <set>
#endif
#include <functional>
#include <string>
#include "Factor.h"
@ -54,11 +56,35 @@ class FactorCollection
friend std::ostream& operator<<(std::ostream&, const FactorCollection&);
#ifdef HAVE_BOOST
typedef boost::unordered_map<std::string, Factor> Map;
struct HashFactor : public std::unary_function<const Factor &, std::size_t> {
std::size_t operator()(const std::string &str) const {
return util::MurmurHashNative(str.data(), str.size());
}
std::size_t operator()(const Factor &factor) const {
return (*this)(factor.GetString());
}
};
struct EqualsFactor : public std::binary_function<const Factor &, const Factor &, bool> {
bool operator()(const Factor &left, const Factor &right) const {
return left.GetString() == right.GetString();
}
bool operator()(const Factor &left, const std::string &right) const {
return left.GetString() == right;
}
bool operator()(const std::string &left, const Factor &right) const {
return left == right.GetString();
}
};
typedef boost::unordered_set<Factor, HashFactor, EqualsFactor> Set;
#else
typedef std::map<std::string, Factor> Map;
struct LessFactor : public std::binary_function<const Factor &, const Factor &, bool> {
bool operator()(const Factor &left, const Factor &right) const {
return left.GetString() < right.GetString();
}
};
typedef std::set<Factor, LessFactor> Set;
#endif
Map m_map;
Set m_set;
static FactorCollection s_instance;
#ifdef WITH_THREADS
@ -80,8 +106,6 @@ public:
~FactorCollection();
//! Test to see whether a factor exists
bool Exists(FactorDirection direction, FactorType factorType, const std::string &factorString) const;
/** returns a factor with the same direction, factorType and factorString.
* If a factor already exist in the collection, return the existing factor, if not create a new 1
*/