diff --git a/src/utf8.hh b/src/utf8.hh index 85200dc76..8e7d209a4 100644 --- a/src/utf8.hh +++ b/src/utf8.hh @@ -17,6 +17,14 @@ template [[gnu::always_inline]] inline char read(Iterator& it) { char c = *it; ++it; return c; } +// return true if it points to the first byte of a (either single or +// multibyte) character +[[gnu::always_inline]] +inline bool is_character_start(char c) +{ + return (c & 0xC0) != 0x80; +} + // returns an iterator to next character first byte template Iterator next(Iterator it, const Iterator& end) @@ -52,27 +60,28 @@ Iterator previous(Iterator it, const Iterator& begin) template Iterator advance(Iterator it, const Iterator& end, CharCount d) { + if (it == end) + return it; + if (d < 0) { - while (it != end and d++) - it = utf8::previous(it, end); + while (it != end and d != 0) + { + if (is_character_start(*--it)) + ++d; + } } - else + else if (d > 0) { - while (it != end and d--) - it = utf8::next(it, end); + while (it != end and d != 0) + { + if (is_character_start(*++it)) + --d; + } } return it; } -// return true if it points to the first byte of a (either single or -// multibyte) character -[[gnu::always_inline]] -inline bool is_character_start(char c) -{ - return (c & 0xC0) != 0x80; -} - // returns the character count between begin and end template CharCount distance(Iterator begin, const Iterator& end)