1
1
mirror of https://github.com/mawww/kakoune.git synced 2024-09-21 09:59:08 +03:00

Regex: Use only 128 characters in start desc and encode others as 0

Using 257 was using lots of memory for no good reason, as > 127
codepoint are not common enough to be treated specially.
This commit is contained in:
Maxime Coste 2018-04-29 16:42:46 +10:00
parent 528ecb7417
commit 1e8026f143
2 changed files with 15 additions and 11 deletions

View File

@ -15,6 +15,7 @@ namespace Kakoune
{
constexpr Codepoint CompiledRegex::StartDesc::other;
constexpr Codepoint CompiledRegex::StartDesc::count;
struct ParsedRegex
{
@ -906,15 +907,16 @@ private:
{
for (auto& range : character_class.ranges)
{
auto min = std::min(CompiledRegex::StartDesc::other, range.min);
auto max = std::min(CompiledRegex::StartDesc::other, range.max);
for (Codepoint cp = min; cp <= max; ++cp)
constexpr auto clamp = [](Codepoint cp) { return std::min(CompiledRegex::StartDesc::count, cp); };
for (auto cp = clamp(range.min), end = clamp(range.max + 1); cp < end; ++cp)
start_desc.map[cp] = true;
if (range.max >= CompiledRegex::StartDesc::count)
start_desc.map[CompiledRegex::StartDesc::other] = true;
}
}
else
{
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
{
if (start_desc.map[cp] or is_character_class(character_class, cp))
start_desc.map[cp] = true;
@ -926,7 +928,7 @@ private:
case ParsedRegex::CharacterType:
{
const CharacterType ctype = (CharacterType)node.value;
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
{
if (is_ctype(ctype, cp))
start_desc.map[cp] = true;

View File

@ -103,9 +103,9 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
struct StartDesc : UseMemoryDomain<MemoryDomain::Regex>
{
static constexpr size_t count = 256;
static constexpr Codepoint other = 256;
bool map[count+1];
static constexpr Codepoint count = 128;
static constexpr Codepoint other = 0;
bool map[count];
};
std::unique_ptr<StartDesc> forward_start_desc;
@ -213,7 +213,8 @@ public:
return false;
}
else if (start != config.end and
not start_desc->map[std::min(*start, CompiledRegex::StartDesc::other)])
not start_desc->map[*start < CompiledRegex::StartDesc::count ?
*start : CompiledRegex::StartDesc::other])
return false;
}
@ -509,8 +510,9 @@ private:
void to_next_start(EffectiveIt& start, const EffectiveIt& end,
const CompiledRegex::StartDesc& start_desc)
{
while (start != end and *start >= 0 and
not start_desc.map[std::min(*start, CompiledRegex::StartDesc::other)])
Codepoint cp;
while (start != end and (cp = *start) >= 0 and
not start_desc.map[cp < CompiledRegex::StartDesc::count ? cp : CompiledRegex::StartDesc::other])
++start;
}