StringPiece. Does a body good. And avoids string copying.

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4263 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
heafield 2011-09-24 15:58:23 +00:00
parent 55e24da4d5
commit 8edf53dcf3
3 changed files with 21 additions and 19 deletions

View File

@ -33,15 +33,13 @@ namespace Moses
{ {
FactorCollection FactorCollection::s_instance; FactorCollection FactorCollection::s_instance;
const Factor *FactorCollection::AddFactor(FactorDirection direction const Factor *FactorCollection::AddFactor(const StringPiece &factorString)
, FactorType factorType
, const string &factorString)
{ {
// Sorry this is so complicated. Can't we just require everybody to use Boost >= 1.42? The issue is that I can't check BOOST_VERSION unless we have Boost. // Sorry this is so complicated. Can't we just require everybody to use Boost >= 1.42? The issue is that I can't check BOOST_VERSION unless we have Boost.
#ifdef WITH_THREADS #ifdef WITH_THREADS
#if BOOST_VERSION < 104200 #if BOOST_VERSION < 104200
FactorFriend to_ins; FactorFriend to_ins;
to_ins.in.m_string = factorString; to_ins.in.m_string.assign(factorString.data(), factorString.size());
#endif // BOOST_VERSION #endif // BOOST_VERSION
{ {
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock); boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
@ -56,11 +54,11 @@ const Factor *FactorCollection::AddFactor(FactorDirection direction
boost::unique_lock<boost::shared_mutex> lock(m_accessLock); boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#if BOOST_VERSION >= 102400 #if BOOST_VERSION >= 102400
FactorFriend to_ins; FactorFriend to_ins;
to_ins.in.m_string = factorString; to_ins.in.m_string.assign(factorString.data(), factorString.size());
#endif // BOOST_VERSION #endif // BOOST_VERSION
#else // WITH_THREADS #else // WITH_THREADS
FactorFriend to_ins; FactorFriend to_ins;
to_ins.in.m_string = factorString; to_ins.in.m_string.assign(factorString.data(), factorString.size());
#endif // WITH_THREADS #endif // WITH_THREADS
to_ins.in.m_id = m_factorId; to_ins.in.m_id = m_factorId;
std::pair<Set::iterator, bool> ret(m_set.insert(to_ins)); std::pair<Set::iterator, bool> ret(m_set.insert(to_ins));

View File

@ -35,6 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <set> #include <set>
#endif #endif
#include "util/string_piece.hh"
#include <functional> #include <functional>
#include <string> #include <string>
@ -68,7 +70,7 @@ class FactorCollection
#ifdef HAVE_BOOST #ifdef HAVE_BOOST
struct HashFactor : public std::unary_function<const FactorFriend &, std::size_t> { struct HashFactor : public std::unary_function<const FactorFriend &, std::size_t> {
std::size_t operator()(const std::string &str) const { std::size_t operator()(const StringPiece &str) const {
return util::MurmurHashNative(str.data(), str.size()); return util::MurmurHashNative(str.data(), str.size());
} }
std::size_t operator()(const FactorFriend &factor) const { std::size_t operator()(const FactorFriend &factor) const {
@ -79,10 +81,10 @@ class FactorCollection
bool operator()(const FactorFriend &left, const FactorFriend &right) const { bool operator()(const FactorFriend &left, const FactorFriend &right) const {
return left.in.GetString() == right.in.GetString(); return left.in.GetString() == right.in.GetString();
} }
bool operator()(const FactorFriend &left, const std::string &right) const { bool operator()(const FactorFriend &left, const StringPiece &right) const {
return left.in.GetString() == right; return left.in.GetString() == right;
} }
bool operator()(const std::string &left, const FactorFriend &right) const { bool operator()(const StringPiece &left, const FactorFriend &right) const {
return left == right.in.GetString(); return left == right.in.GetString();
} }
}; };
@ -120,7 +122,12 @@ public:
/** returns a factor with the same direction, factorType and factorString. /** returns a factor with the same direction, factorType and factorString.
* If a factor already exist in the collection, return the existing factor, if not create a new 1 * If a factor already exist in the collection, return the existing factor, if not create a new 1
*/ */
const Factor *AddFactor(FactorDirection direction, FactorType factorType, const std::string &factorString); const Factor *AddFactor(const StringPiece &factorString);
// TODO: remove calls to this function, replacing them with the simpler AddFactor(factorString)
const Factor *AddFactor(FactorDirection /*direction*/, FactorType /*factorType*/, const StringPiece &factorString) {
return AddFactor(factorString);
}
TO_STRING(); TO_STRING();

View File

@ -75,12 +75,11 @@ namespace
class MappingBuilder : public lm::ngram::EnumerateVocab class MappingBuilder : public lm::ngram::EnumerateVocab
{ {
public: public:
MappingBuilder(FactorType factorType, FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping) MappingBuilder(FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
: m_factorType(factorType), m_factorCollection(factorCollection), m_mapping(mapping) {} : m_factorCollection(factorCollection), m_mapping(mapping) {}
void Add(lm::WordIndex index, const StringPiece &str) { void Add(lm::WordIndex index, const StringPiece &str) {
m_word.assign(str.data(), str.size()); std::size_t factorId = m_factorCollection.AddFactor(str)->GetId();
std::size_t factorId = m_factorCollection.AddFactor(Output, m_factorType, m_word)->GetId();
if (m_mapping.size() <= factorId) { if (m_mapping.size() <= factorId) {
// 0 is <unk> :-) // 0 is <unk> :-)
m_mapping.resize(factorId + 1); m_mapping.resize(factorId + 1);
@ -89,8 +88,6 @@ public:
} }
private: private:
std::string m_word;
FactorType m_factorType;
FactorCollection &m_factorCollection; FactorCollection &m_factorCollection;
std::vector<lm::WordIndex> &m_mapping; std::vector<lm::WordIndex> &m_mapping;
}; };
@ -229,12 +226,12 @@ template <class Model> bool LanguageModelKen<Model>::Load(const std::string &fil
m_filePath = filePath; m_filePath = filePath;
FactorCollection &factorCollection = FactorCollection::Instance(); FactorCollection &factorCollection = FactorCollection::Instance();
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_); m_sentenceStart = factorCollection.AddFactor(BOS_);
m_sentenceStartArray[m_factorType] = m_sentenceStart; m_sentenceStartArray[m_factorType] = m_sentenceStart;
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_); m_sentenceEnd = factorCollection.AddFactor(EOS_);
m_sentenceEndArray[m_factorType] = m_sentenceEnd; m_sentenceEndArray[m_factorType] = m_sentenceEnd;
MappingBuilder builder(m_factorType, factorCollection, m_lmIdLookup); MappingBuilder builder(factorCollection, m_lmIdLookup);
lm::ngram::Config config; lm::ngram::Config config;
IFVERBOSE(1) { IFVERBOSE(1) {