KenLM f81d02792087a837ea17e6ce2b33f9b7aaecca68 should fix segfault with ArrayTrie

This commit is contained in:
Kenneth Heafield 2014-06-04 16:03:39 -07:00
parent d82bd475a2
commit 6d9173ba72
3 changed files with 22 additions and 29 deletions

View File

@ -13,14 +13,16 @@
#ifndef LM_BHIKSHA_H
#define LM_BHIKSHA_H
#include <stdint.h>
#include <assert.h>
#include "lm/model_type.hh"
#include "lm/trie.hh"
#include "util/bit_packing.hh"
#include "util/sorted_uniform.hh"
#include <algorithm>
#include <stdint.h>
#include <assert.h>
namespace lm {
namespace ngram {
struct Config;
@ -73,15 +75,24 @@ class ArrayBhiksha {
ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_value, const Config &config);
void ReadNext(const void *base, uint64_t bit_offset, uint64_t index, uint8_t total_bits, NodeRange &out) const {
const uint64_t *begin_it = util::BinaryBelow(util::IdentityAccessor<uint64_t>(), offset_begin_, offset_end_, index);
// Some assertions are commented out because they are expensive.
// assert(*offset_begin_ == 0);
// std::upper_bound returns the first element that is greater. Want the
// last element that is <= to the index.
const uint64_t *begin_it = std::upper_bound(offset_begin_, offset_end_, index) - 1;
// Since *offset_begin_ == 0, the position should be in range.
// assert(begin_it >= offset_begin_);
const uint64_t *end_it;
for (end_it = begin_it; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {}
for (end_it = begin_it + 1; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {}
// assert(end_it == std::upper_bound(offset_begin_, offset_end_, index + 1));
--end_it;
// assert(end_it >= begin_it);
out.begin = ((begin_it - offset_begin_) << next_inline_.bits) |
util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask);
out.end = ((end_it - offset_begin_) << next_inline_.bits) |
util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask);
//assert(out.end >= out.begin);
// If this fails, consider rebuilding your model using KenLM after 1e333d786b748555e8f368d2bbba29a016c98052
assert(out.end >= out.begin);
}
void WriteNext(void *base, uint64_t bit_offset, uint64_t index, uint64_t value) {

View File

@ -99,8 +99,11 @@ template <class Bhiksha> util::BitAddress BitPackedMiddle<Bhiksha>::Find(WordInd
}
template <class Bhiksha> void BitPackedMiddle<Bhiksha>::FinishedLoading(uint64_t next_end, const Config &config) {
uint64_t last_next_write = (insert_index_ + 1) * total_bits_ - bhiksha_.InlineBits();
bhiksha_.WriteNext(base_, last_next_write, insert_index_ + 1, next_end);
// Write at insert_index. . .
uint64_t last_next_write = insert_index_ * total_bits_ +
// at the offset where the next pointers are stored.
(total_bits_ - bhiksha_.InlineBits());
bhiksha_.WriteNext(base_, last_next_write, insert_index_, next_end);
bhiksha_.FinishedLoading(config);
}

View File

@ -101,27 +101,6 @@ template <class Iterator, class Accessor, class Pivot> bool SortedUniformFind(co
return BoundedSortedUniformFind<Iterator, Accessor, Pivot>(accessor, begin, below, end, above, key, out);
}
// May return begin - 1.
template <class Iterator, class Accessor> Iterator BinaryBelow(
const Accessor &accessor,
Iterator begin,
Iterator end,
const typename Accessor::Key key) {
while (end > begin) {
Iterator pivot(begin + (end - begin) / 2);
typename Accessor::Key mid(accessor(pivot));
if (mid < key) {
begin = pivot + 1;
} else if (mid > key) {
end = pivot;
} else {
for (++pivot; (pivot < end) && accessor(pivot) == mid; ++pivot) {}
return pivot - 1;
}
}
return begin - 1;
}
} // namespace util
#endif // UTIL_SORTED_UNIFORM_H