mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
StringPiece. Does a body good. And avoids string copying.
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4263 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
55e24da4d5
commit
8edf53dcf3
@ -33,15 +33,13 @@ namespace Moses
|
|||||||
{
|
{
|
||||||
FactorCollection FactorCollection::s_instance;
|
FactorCollection FactorCollection::s_instance;
|
||||||
|
|
||||||
const Factor *FactorCollection::AddFactor(FactorDirection direction
|
const Factor *FactorCollection::AddFactor(const StringPiece &factorString)
|
||||||
, FactorType factorType
|
|
||||||
, const string &factorString)
|
|
||||||
{
|
{
|
||||||
// Sorry this is so complicated. Can't we just require everybody to use Boost >= 1.42? The issue is that I can't check BOOST_VERSION unless we have Boost.
|
// Sorry this is so complicated. Can't we just require everybody to use Boost >= 1.42? The issue is that I can't check BOOST_VERSION unless we have Boost.
|
||||||
#ifdef WITH_THREADS
|
#ifdef WITH_THREADS
|
||||||
#if BOOST_VERSION < 104200
|
#if BOOST_VERSION < 104200
|
||||||
FactorFriend to_ins;
|
FactorFriend to_ins;
|
||||||
to_ins.in.m_string = factorString;
|
to_ins.in.m_string.assign(factorString.data(), factorString.size());
|
||||||
#endif // BOOST_VERSION
|
#endif // BOOST_VERSION
|
||||||
{
|
{
|
||||||
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
||||||
@ -56,11 +54,11 @@ const Factor *FactorCollection::AddFactor(FactorDirection direction
|
|||||||
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
||||||
#if BOOST_VERSION >= 102400
|
#if BOOST_VERSION >= 102400
|
||||||
FactorFriend to_ins;
|
FactorFriend to_ins;
|
||||||
to_ins.in.m_string = factorString;
|
to_ins.in.m_string.assign(factorString.data(), factorString.size());
|
||||||
#endif // BOOST_VERSION
|
#endif // BOOST_VERSION
|
||||||
#else // WITH_THREADS
|
#else // WITH_THREADS
|
||||||
FactorFriend to_ins;
|
FactorFriend to_ins;
|
||||||
to_ins.in.m_string = factorString;
|
to_ins.in.m_string.assign(factorString.data(), factorString.size());
|
||||||
#endif // WITH_THREADS
|
#endif // WITH_THREADS
|
||||||
to_ins.in.m_id = m_factorId;
|
to_ins.in.m_id = m_factorId;
|
||||||
std::pair<Set::iterator, bool> ret(m_set.insert(to_ins));
|
std::pair<Set::iterator, bool> ret(m_set.insert(to_ins));
|
||||||
|
@ -35,6 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include <set>
|
#include <set>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "util/string_piece.hh"
|
||||||
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@ -68,7 +70,7 @@ class FactorCollection
|
|||||||
|
|
||||||
#ifdef HAVE_BOOST
|
#ifdef HAVE_BOOST
|
||||||
struct HashFactor : public std::unary_function<const FactorFriend &, std::size_t> {
|
struct HashFactor : public std::unary_function<const FactorFriend &, std::size_t> {
|
||||||
std::size_t operator()(const std::string &str) const {
|
std::size_t operator()(const StringPiece &str) const {
|
||||||
return util::MurmurHashNative(str.data(), str.size());
|
return util::MurmurHashNative(str.data(), str.size());
|
||||||
}
|
}
|
||||||
std::size_t operator()(const FactorFriend &factor) const {
|
std::size_t operator()(const FactorFriend &factor) const {
|
||||||
@ -79,10 +81,10 @@ class FactorCollection
|
|||||||
bool operator()(const FactorFriend &left, const FactorFriend &right) const {
|
bool operator()(const FactorFriend &left, const FactorFriend &right) const {
|
||||||
return left.in.GetString() == right.in.GetString();
|
return left.in.GetString() == right.in.GetString();
|
||||||
}
|
}
|
||||||
bool operator()(const FactorFriend &left, const std::string &right) const {
|
bool operator()(const FactorFriend &left, const StringPiece &right) const {
|
||||||
return left.in.GetString() == right;
|
return left.in.GetString() == right;
|
||||||
}
|
}
|
||||||
bool operator()(const std::string &left, const FactorFriend &right) const {
|
bool operator()(const StringPiece &left, const FactorFriend &right) const {
|
||||||
return left == right.in.GetString();
|
return left == right.in.GetString();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -120,7 +122,12 @@ public:
|
|||||||
/** returns a factor with the same direction, factorType and factorString.
|
/** returns a factor with the same direction, factorType and factorString.
|
||||||
* If a factor already exist in the collection, return the existing factor, if not create a new 1
|
* If a factor already exist in the collection, return the existing factor, if not create a new 1
|
||||||
*/
|
*/
|
||||||
const Factor *AddFactor(FactorDirection direction, FactorType factorType, const std::string &factorString);
|
const Factor *AddFactor(const StringPiece &factorString);
|
||||||
|
|
||||||
|
// TODO: remove calls to this function, replacing them with the simpler AddFactor(factorString)
|
||||||
|
const Factor *AddFactor(FactorDirection /*direction*/, FactorType /*factorType*/, const StringPiece &factorString) {
|
||||||
|
return AddFactor(factorString);
|
||||||
|
}
|
||||||
|
|
||||||
TO_STRING();
|
TO_STRING();
|
||||||
|
|
||||||
|
@ -75,12 +75,11 @@ namespace
|
|||||||
class MappingBuilder : public lm::ngram::EnumerateVocab
|
class MappingBuilder : public lm::ngram::EnumerateVocab
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
MappingBuilder(FactorType factorType, FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
|
MappingBuilder(FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
|
||||||
: m_factorType(factorType), m_factorCollection(factorCollection), m_mapping(mapping) {}
|
: m_factorCollection(factorCollection), m_mapping(mapping) {}
|
||||||
|
|
||||||
void Add(lm::WordIndex index, const StringPiece &str) {
|
void Add(lm::WordIndex index, const StringPiece &str) {
|
||||||
m_word.assign(str.data(), str.size());
|
std::size_t factorId = m_factorCollection.AddFactor(str)->GetId();
|
||||||
std::size_t factorId = m_factorCollection.AddFactor(Output, m_factorType, m_word)->GetId();
|
|
||||||
if (m_mapping.size() <= factorId) {
|
if (m_mapping.size() <= factorId) {
|
||||||
// 0 is <unk> :-)
|
// 0 is <unk> :-)
|
||||||
m_mapping.resize(factorId + 1);
|
m_mapping.resize(factorId + 1);
|
||||||
@ -89,8 +88,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::string m_word;
|
|
||||||
FactorType m_factorType;
|
|
||||||
FactorCollection &m_factorCollection;
|
FactorCollection &m_factorCollection;
|
||||||
std::vector<lm::WordIndex> &m_mapping;
|
std::vector<lm::WordIndex> &m_mapping;
|
||||||
};
|
};
|
||||||
@ -229,12 +226,12 @@ template <class Model> bool LanguageModelKen<Model>::Load(const std::string &fil
|
|||||||
m_filePath = filePath;
|
m_filePath = filePath;
|
||||||
|
|
||||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||||
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
|
m_sentenceStart = factorCollection.AddFactor(BOS_);
|
||||||
m_sentenceStartArray[m_factorType] = m_sentenceStart;
|
m_sentenceStartArray[m_factorType] = m_sentenceStart;
|
||||||
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
|
m_sentenceEnd = factorCollection.AddFactor(EOS_);
|
||||||
m_sentenceEndArray[m_factorType] = m_sentenceEnd;
|
m_sentenceEndArray[m_factorType] = m_sentenceEnd;
|
||||||
|
|
||||||
MappingBuilder builder(m_factorType, factorCollection, m_lmIdLookup);
|
MappingBuilder builder(factorCollection, m_lmIdLookup);
|
||||||
lm::ngram::Config config;
|
lm::ngram::Config config;
|
||||||
|
|
||||||
IFVERBOSE(1) {
|
IFVERBOSE(1) {
|
||||||
|
Loading…
Reference in New Issue
Block a user