mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
StringPiece. Does a body good. And avoids string copying.
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4263 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
55e24da4d5
commit
8edf53dcf3
@ -33,15 +33,13 @@ namespace Moses
|
||||
{
|
||||
FactorCollection FactorCollection::s_instance;
|
||||
|
||||
const Factor *FactorCollection::AddFactor(FactorDirection direction
|
||||
, FactorType factorType
|
||||
, const string &factorString)
|
||||
const Factor *FactorCollection::AddFactor(const StringPiece &factorString)
|
||||
{
|
||||
// Sorry this is so complicated. Can't we just require everybody to use Boost >= 1.42? The issue is that I can't check BOOST_VERSION unless we have Boost.
|
||||
#ifdef WITH_THREADS
|
||||
#if BOOST_VERSION < 104200
|
||||
FactorFriend to_ins;
|
||||
to_ins.in.m_string = factorString;
|
||||
to_ins.in.m_string.assign(factorString.data(), factorString.size());
|
||||
#endif // BOOST_VERSION
|
||||
{
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
||||
@ -56,11 +54,11 @@ const Factor *FactorCollection::AddFactor(FactorDirection direction
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
||||
#if BOOST_VERSION >= 102400
|
||||
FactorFriend to_ins;
|
||||
to_ins.in.m_string = factorString;
|
||||
to_ins.in.m_string.assign(factorString.data(), factorString.size());
|
||||
#endif // BOOST_VERSION
|
||||
#else // WITH_THREADS
|
||||
FactorFriend to_ins;
|
||||
to_ins.in.m_string = factorString;
|
||||
to_ins.in.m_string.assign(factorString.data(), factorString.size());
|
||||
#endif // WITH_THREADS
|
||||
to_ins.in.m_id = m_factorId;
|
||||
std::pair<Set::iterator, bool> ret(m_set.insert(to_ins));
|
||||
|
@ -35,6 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include <set>
|
||||
#endif
|
||||
|
||||
#include "util/string_piece.hh"
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
@ -68,7 +70,7 @@ class FactorCollection
|
||||
|
||||
#ifdef HAVE_BOOST
|
||||
struct HashFactor : public std::unary_function<const FactorFriend &, std::size_t> {
|
||||
std::size_t operator()(const std::string &str) const {
|
||||
std::size_t operator()(const StringPiece &str) const {
|
||||
return util::MurmurHashNative(str.data(), str.size());
|
||||
}
|
||||
std::size_t operator()(const FactorFriend &factor) const {
|
||||
@ -79,10 +81,10 @@ class FactorCollection
|
||||
bool operator()(const FactorFriend &left, const FactorFriend &right) const {
|
||||
return left.in.GetString() == right.in.GetString();
|
||||
}
|
||||
bool operator()(const FactorFriend &left, const std::string &right) const {
|
||||
bool operator()(const FactorFriend &left, const StringPiece &right) const {
|
||||
return left.in.GetString() == right;
|
||||
}
|
||||
bool operator()(const std::string &left, const FactorFriend &right) const {
|
||||
bool operator()(const StringPiece &left, const FactorFriend &right) const {
|
||||
return left == right.in.GetString();
|
||||
}
|
||||
};
|
||||
@ -120,7 +122,12 @@ public:
|
||||
/** returns a factor with the same direction, factorType and factorString.
|
||||
* If a factor already exist in the collection, return the existing factor, if not create a new 1
|
||||
*/
|
||||
const Factor *AddFactor(FactorDirection direction, FactorType factorType, const std::string &factorString);
|
||||
const Factor *AddFactor(const StringPiece &factorString);
|
||||
|
||||
// TODO: remove calls to this function, replacing them with the simpler AddFactor(factorString)
|
||||
const Factor *AddFactor(FactorDirection /*direction*/, FactorType /*factorType*/, const StringPiece &factorString) {
|
||||
return AddFactor(factorString);
|
||||
}
|
||||
|
||||
TO_STRING();
|
||||
|
||||
|
@ -75,12 +75,11 @@ namespace
|
||||
class MappingBuilder : public lm::ngram::EnumerateVocab
|
||||
{
|
||||
public:
|
||||
MappingBuilder(FactorType factorType, FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
|
||||
: m_factorType(factorType), m_factorCollection(factorCollection), m_mapping(mapping) {}
|
||||
MappingBuilder(FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
|
||||
: m_factorCollection(factorCollection), m_mapping(mapping) {}
|
||||
|
||||
void Add(lm::WordIndex index, const StringPiece &str) {
|
||||
m_word.assign(str.data(), str.size());
|
||||
std::size_t factorId = m_factorCollection.AddFactor(Output, m_factorType, m_word)->GetId();
|
||||
std::size_t factorId = m_factorCollection.AddFactor(str)->GetId();
|
||||
if (m_mapping.size() <= factorId) {
|
||||
// 0 is <unk> :-)
|
||||
m_mapping.resize(factorId + 1);
|
||||
@ -89,8 +88,6 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
std::string m_word;
|
||||
FactorType m_factorType;
|
||||
FactorCollection &m_factorCollection;
|
||||
std::vector<lm::WordIndex> &m_mapping;
|
||||
};
|
||||
@ -229,12 +226,12 @@ template <class Model> bool LanguageModelKen<Model>::Load(const std::string &fil
|
||||
m_filePath = filePath;
|
||||
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
|
||||
m_sentenceStart = factorCollection.AddFactor(BOS_);
|
||||
m_sentenceStartArray[m_factorType] = m_sentenceStart;
|
||||
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
|
||||
m_sentenceEnd = factorCollection.AddFactor(EOS_);
|
||||
m_sentenceEndArray[m_factorType] = m_sentenceEnd;
|
||||
|
||||
MappingBuilder builder(m_factorType, factorCollection, m_lmIdLookup);
|
||||
MappingBuilder builder(factorCollection, m_lmIdLookup);
|
||||
lm::ngram::Config config;
|
||||
|
||||
IFVERBOSE(1) {
|
||||
|
Loading…
Reference in New Issue
Block a user