2012-10-09 21:15:05 +04:00
|
|
|
#ifndef unicode_hh_INCLUDED
|
|
|
|
#define unicode_hh_INCLUDED
|
|
|
|
|
2014-01-05 19:14:58 +04:00
|
|
|
#include <wctype.h>
|
2016-10-01 15:46:55 +03:00
|
|
|
#include <wchar.h>
|
2016-05-11 11:49:45 +03:00
|
|
|
#include <locale>
|
2012-10-09 21:15:05 +04:00
|
|
|
|
|
|
|
namespace Kakoune
|
|
|
|
{
|
|
|
|
|
2015-04-29 15:51:15 +03:00
|
|
|
using Codepoint = char32_t;
|
2012-10-09 21:15:05 +04:00
|
|
|
|
|
|
|
inline bool is_eol(Codepoint c)
|
|
|
|
{
|
|
|
|
return c == '\n';
|
|
|
|
}
|
|
|
|
|
2013-11-18 02:54:26 +04:00
|
|
|
inline bool is_horizontal_blank(Codepoint c)
|
|
|
|
{
|
|
|
|
return c == ' ' or c == '\t';
|
|
|
|
}
|
|
|
|
|
2015-07-02 01:47:22 +03:00
|
|
|
inline bool is_blank(Codepoint c)
|
|
|
|
{
|
|
|
|
return c == ' ' or c == '\t' or c == '\n';
|
|
|
|
}
|
|
|
|
|
2013-12-14 18:49:10 +04:00
|
|
|
enum WordType { Word, WORD };
|
|
|
|
|
|
|
|
template<WordType word_type = Word>
|
|
|
|
inline bool is_word(Codepoint c)
|
|
|
|
{
|
2016-05-19 23:45:23 +03:00
|
|
|
return c == '_' or iswalnum((wchar_t)c);
|
2013-12-14 18:49:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline bool is_word<WORD>(Codepoint c)
|
|
|
|
{
|
2015-04-15 02:34:00 +03:00
|
|
|
return not is_horizontal_blank(c) and not is_eol(c);
|
2013-12-14 18:49:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
inline bool is_punctuation(Codepoint c)
|
|
|
|
{
|
2015-04-15 02:34:00 +03:00
|
|
|
return not (is_word(c) or is_horizontal_blank(c) or is_eol(c));
|
2013-12-14 18:49:10 +04:00
|
|
|
}
|
|
|
|
|
2015-11-15 16:24:39 +03:00
|
|
|
inline bool is_basic_alpha(Codepoint c)
|
|
|
|
{
|
|
|
|
return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z');
|
|
|
|
}
|
|
|
|
|
2016-09-25 12:55:57 +03:00
|
|
|
inline size_t codepoint_width(Codepoint c)
|
2016-09-22 22:36:26 +03:00
|
|
|
{
|
|
|
|
return wcwidth((wchar_t)c);
|
|
|
|
}
|
|
|
|
|
2013-12-14 18:49:10 +04:00
|
|
|
enum class CharCategories
|
|
|
|
{
|
|
|
|
Blank,
|
|
|
|
EndOfLine,
|
|
|
|
Word,
|
|
|
|
Punctuation,
|
|
|
|
};
|
|
|
|
|
|
|
|
template<WordType word_type = Word>
|
|
|
|
inline CharCategories categorize(Codepoint c)
|
|
|
|
{
|
|
|
|
if (is_eol(c))
|
|
|
|
return CharCategories::EndOfLine;
|
2015-04-15 02:34:00 +03:00
|
|
|
if (is_horizontal_blank(c))
|
2013-12-14 18:49:10 +04:00
|
|
|
return CharCategories::Blank;
|
2016-04-03 20:25:48 +03:00
|
|
|
if (word_type == WORD or is_word(c))
|
|
|
|
return CharCategories::Word;
|
|
|
|
return CharCategories::Punctuation;
|
2013-12-14 18:49:10 +04:00
|
|
|
}
|
|
|
|
|
2016-05-19 23:45:23 +03:00
|
|
|
inline Codepoint to_lower(Codepoint cp) { return towlower((wchar_t)cp); }
|
|
|
|
inline Codepoint to_upper(Codepoint cp) { return towupper((wchar_t)cp); }
|
2015-11-11 03:21:20 +03:00
|
|
|
|
|
|
|
inline char to_lower(char c) { return c >= 'A' and c <= 'Z' ? c - 'A' + 'a' : c; }
|
|
|
|
inline char to_upper(char c) { return c >= 'a' and c <= 'z' ? c - 'a' + 'A' : c; }
|
|
|
|
|
2012-10-09 21:15:05 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif // unicode_hh_INCLUDED
|