StringPiece. Does a body good. And avoids string copying.

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4263 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
heafield 2011-09-24 15:58:23 +00:00
parent 55e24da4d5
commit 8edf53dcf3
3 changed files with 21 additions and 19 deletions

View File

@ -33,15 +33,13 @@ namespace Moses
{
FactorCollection FactorCollection::s_instance;
const Factor *FactorCollection::AddFactor(FactorDirection direction
, FactorType factorType
, const string &factorString)
const Factor *FactorCollection::AddFactor(const StringPiece &factorString)
{
// Sorry this is so complicated. Can't we just require everybody to use Boost >= 1.42? The issue is that I can't check BOOST_VERSION unless we have Boost.
#ifdef WITH_THREADS
#if BOOST_VERSION < 104200
FactorFriend to_ins;
to_ins.in.m_string = factorString;
to_ins.in.m_string.assign(factorString.data(), factorString.size());
#endif // BOOST_VERSION
{
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
@ -56,11 +54,11 @@ const Factor *FactorCollection::AddFactor(FactorDirection direction
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#if BOOST_VERSION >= 102400
FactorFriend to_ins;
to_ins.in.m_string = factorString;
to_ins.in.m_string.assign(factorString.data(), factorString.size());
#endif // BOOST_VERSION
#else // WITH_THREADS
FactorFriend to_ins;
to_ins.in.m_string = factorString;
to_ins.in.m_string.assign(factorString.data(), factorString.size());
#endif // WITH_THREADS
to_ins.in.m_id = m_factorId;
std::pair<Set::iterator, bool> ret(m_set.insert(to_ins));

View File

@ -35,6 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <set>
#endif
#include "util/string_piece.hh"
#include <functional>
#include <string>
@ -68,7 +70,7 @@ class FactorCollection
#ifdef HAVE_BOOST
struct HashFactor : public std::unary_function<const FactorFriend &, std::size_t> {
std::size_t operator()(const std::string &str) const {
std::size_t operator()(const StringPiece &str) const {
return util::MurmurHashNative(str.data(), str.size());
}
std::size_t operator()(const FactorFriend &factor) const {
@ -79,10 +81,10 @@ class FactorCollection
bool operator()(const FactorFriend &left, const FactorFriend &right) const {
return left.in.GetString() == right.in.GetString();
}
bool operator()(const FactorFriend &left, const std::string &right) const {
bool operator()(const FactorFriend &left, const StringPiece &right) const {
return left.in.GetString() == right;
}
bool operator()(const std::string &left, const FactorFriend &right) const {
bool operator()(const StringPiece &left, const FactorFriend &right) const {
return left == right.in.GetString();
}
};
@ -120,7 +122,12 @@ public:
/** returns a factor with the same direction, factorType and factorString.
* If a factor already exist in the collection, return the existing factor, if not create a new 1
*/
const Factor *AddFactor(FactorDirection direction, FactorType factorType, const std::string &factorString);
const Factor *AddFactor(const StringPiece &factorString);
// TODO: remove calls to this function, replacing them with the simpler AddFactor(factorString)
const Factor *AddFactor(FactorDirection /*direction*/, FactorType /*factorType*/, const StringPiece &factorString) {
return AddFactor(factorString);
}
TO_STRING();

View File

@ -75,12 +75,11 @@ namespace
class MappingBuilder : public lm::ngram::EnumerateVocab
{
public:
MappingBuilder(FactorType factorType, FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
: m_factorType(factorType), m_factorCollection(factorCollection), m_mapping(mapping) {}
MappingBuilder(FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
: m_factorCollection(factorCollection), m_mapping(mapping) {}
void Add(lm::WordIndex index, const StringPiece &str) {
m_word.assign(str.data(), str.size());
std::size_t factorId = m_factorCollection.AddFactor(Output, m_factorType, m_word)->GetId();
std::size_t factorId = m_factorCollection.AddFactor(str)->GetId();
if (m_mapping.size() <= factorId) {
// 0 is <unk> :-)
m_mapping.resize(factorId + 1);
@ -89,8 +88,6 @@ public:
}
private:
std::string m_word;
FactorType m_factorType;
FactorCollection &m_factorCollection;
std::vector<lm::WordIndex> &m_mapping;
};
@ -229,12 +226,12 @@ template <class Model> bool LanguageModelKen<Model>::Load(const std::string &fil
m_filePath = filePath;
FactorCollection &factorCollection = FactorCollection::Instance();
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
m_sentenceStart = factorCollection.AddFactor(BOS_);
m_sentenceStartArray[m_factorType] = m_sentenceStart;
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
m_sentenceEnd = factorCollection.AddFactor(EOS_);
m_sentenceEndArray[m_factorType] = m_sentenceEnd;
MappingBuilder builder(m_factorType, factorCollection, m_lmIdLookup);
MappingBuilder builder(factorCollection, m_lmIdLookup);
lm::ngram::Config config;
IFVERBOSE(1) {