Have all marks return a wcwidth of -1

Mirrors recent change to onsidering all marks combining chars instead of
only the marks with a non-zero combining class
This commit is contained in:
Kovid Goyal 2018-01-17 23:25:59 +05:30
parent ed700ff830
commit 2ddc7e8c80
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 285 additions and 102 deletions

View File

@ -34,7 +34,7 @@ def get_data(fname, folder='UCD'):
# Map of class names to set of codepoints in class
class_maps = {}
combining_codepoints = set()
marks = set()
not_assigned = set(range(0, sys.maxunicode))
@ -57,9 +57,8 @@ def parse_ucd():
for codepoint in codepoints:
s.add(codepoint)
not_assigned.discard(codepoint)
cc = parts[3]
if cc and cc != '0':
combining_codepoints.add(codepoint)
if category.startswith('M'):
marks.add(codepoint)
def split_two(line):
@ -179,7 +178,7 @@ def add(p, comment, chars_, ret):
non_printing = class_maps['Cc'] | class_maps['Cf'] | class_maps['Cs']
add(p, 'Non-printing characters', non_printing, -1)
add(p, 'Combining characters', combining_codepoints, -1)
add(p, 'Marks', marks, -1)
add(p, 'Private use', class_maps['Co'], -3)
add(p, 'East Asian ambiguous width', ambiguous, -2)
add(p, 'East Asian double width', doublewidth, 2)

2
kitty/emoji.h generated
View File

@ -1,4 +1,4 @@
// unicode data, built from the unicode standard on: 2017-12-20
// unicode data, built from the unicode standard on: 2018-01-17
// see gen-wcwidth.py
#pragma once
#include "data-types.h"

376
kitty/wcwidth-std.h generated
View File

@ -1,4 +1,4 @@
// unicode data, built from the unicode standard on: 2017-12-20
// unicode data, built from the unicode standard on: 2018-01-17
// see gen-wcwidth.py
#pragma once
#include "data-types.h"
@ -53,12 +53,10 @@ wcwidth_std(int32_t code) {
return -1;
// }}}
// Combining characters (826 codepoints) {{{
case 0x300 ... 0x34e:
// Marks (2177 codepoints) {{{
case 0x300 ... 0x36f:
return -1;
case 0x350 ... 0x36f:
return -1;
case 0x483 ... 0x487:
case 0x483 ... 0x489:
return -1;
case 0x591 ... 0x5bd:
return -1;
@ -88,6 +86,8 @@ wcwidth_std(int32_t code) {
return -1;
case 0x730 ... 0x74a:
return -1;
case 0x7a6 ... 0x7b0:
return -1;
case 0x7eb ... 0x7f3:
return -1;
case 0x816 ... 0x819:
@ -102,53 +102,149 @@ wcwidth_std(int32_t code) {
return -1;
case 0x8d4 ... 0x8e1:
return -1;
case 0x8e3 ... 0x8ff:
case 0x8e3 ... 0x903:
return -1;
case 0x93c:
case 0x93a ... 0x93c:
return -1;
case 0x94d:
case 0x93e ... 0x94f:
return -1;
case 0x951 ... 0x954:
case 0x951 ... 0x957:
return -1;
case 0x962 ... 0x963:
return -1;
case 0x981 ... 0x983:
return -1;
case 0x9bc:
return -1;
case 0x9cd:
case 0x9be ... 0x9c4:
return -1;
case 0x9c7 ... 0x9c8:
return -1;
case 0x9cb ... 0x9cd:
return -1;
case 0x9d7:
return -1;
case 0x9e2 ... 0x9e3:
return -1;
case 0xa01 ... 0xa03:
return -1;
case 0xa3c:
return -1;
case 0xa4d:
case 0xa3e ... 0xa42:
return -1;
case 0xa47 ... 0xa48:
return -1;
case 0xa4b ... 0xa4d:
return -1;
case 0xa51:
return -1;
case 0xa70 ... 0xa71:
return -1;
case 0xa75:
return -1;
case 0xa81 ... 0xa83:
return -1;
case 0xabc:
return -1;
case 0xacd:
case 0xabe ... 0xac5:
return -1;
case 0xac7 ... 0xac9:
return -1;
case 0xacb ... 0xacd:
return -1;
case 0xae2 ... 0xae3:
return -1;
case 0xafa ... 0xaff:
return -1;
case 0xb01 ... 0xb03:
return -1;
case 0xb3c:
return -1;
case 0xb4d:
case 0xb3e ... 0xb44:
return -1;
case 0xbcd:
case 0xb47 ... 0xb48:
return -1;
case 0xc4d:
case 0xb4b ... 0xb4d:
return -1;
case 0xb56 ... 0xb57:
return -1;
case 0xb62 ... 0xb63:
return -1;
case 0xb82:
return -1;
case 0xbbe ... 0xbc2:
return -1;
case 0xbc6 ... 0xbc8:
return -1;
case 0xbca ... 0xbcd:
return -1;
case 0xbd7:
return -1;
case 0xc00 ... 0xc03:
return -1;
case 0xc3e ... 0xc44:
return -1;
case 0xc46 ... 0xc48:
return -1;
case 0xc4a ... 0xc4d:
return -1;
case 0xc55 ... 0xc56:
return -1;
case 0xc62 ... 0xc63:
return -1;
case 0xc81 ... 0xc83:
return -1;
case 0xcbc:
return -1;
case 0xccd:
case 0xcbe ... 0xcc4:
return -1;
case 0xcc6 ... 0xcc8:
return -1;
case 0xcca ... 0xccd:
return -1;
case 0xcd5 ... 0xcd6:
return -1;
case 0xce2 ... 0xce3:
return -1;
case 0xd00 ... 0xd03:
return -1;
case 0xd3b ... 0xd3c:
return -1;
case 0xd4d:
case 0xd3e ... 0xd44:
return -1;
case 0xd46 ... 0xd48:
return -1;
case 0xd4a ... 0xd4d:
return -1;
case 0xd57:
return -1;
case 0xd62 ... 0xd63:
return -1;
case 0xd82 ... 0xd83:
return -1;
case 0xdca:
return -1;
case 0xe38 ... 0xe3a:
case 0xdcf ... 0xdd4:
return -1;
case 0xe48 ... 0xe4b:
case 0xdd6:
return -1;
case 0xeb8 ... 0xeb9:
case 0xdd8 ... 0xddf:
return -1;
case 0xec8 ... 0xecb:
case 0xdf2 ... 0xdf3:
return -1;
case 0xe31:
return -1;
case 0xe34 ... 0xe3a:
return -1;
case 0xe47 ... 0xe4e:
return -1;
case 0xeb1:
return -1;
case 0xeb4 ... 0xeb9:
return -1;
case 0xebb ... 0xebc:
return -1;
case 0xec8 ... 0xecd:
return -1;
case 0xf18 ... 0xf19:
return -1;
@ -158,85 +254,99 @@ wcwidth_std(int32_t code) {
return -1;
case 0xf39:
return -1;
case 0xf71 ... 0xf72:
case 0xf3e ... 0xf3f:
return -1;
case 0xf74:
return -1;
case 0xf7a ... 0xf7d:
return -1;
case 0xf80:
return -1;
case 0xf82 ... 0xf84:
case 0xf71 ... 0xf84:
return -1;
case 0xf86 ... 0xf87:
return -1;
case 0xf8d ... 0xf97:
return -1;
case 0xf99 ... 0xfbc:
return -1;
case 0xfc6:
return -1;
case 0x1037:
case 0x102b ... 0x103e:
return -1;
case 0x1039 ... 0x103a:
case 0x1056 ... 0x1059:
return -1;
case 0x108d:
case 0x105e ... 0x1060:
return -1;
case 0x1062 ... 0x1064:
return -1;
case 0x1067 ... 0x106d:
return -1;
case 0x1071 ... 0x1074:
return -1;
case 0x1082 ... 0x108d:
return -1;
case 0x108f:
return -1;
case 0x109a ... 0x109d:
return -1;
case 0x135d ... 0x135f:
return -1;
case 0x1714:
case 0x1712 ... 0x1714:
return -1;
case 0x1734:
case 0x1732 ... 0x1734:
return -1;
case 0x17d2:
case 0x1752 ... 0x1753:
return -1;
case 0x1772 ... 0x1773:
return -1;
case 0x17b4 ... 0x17d3:
return -1;
case 0x17dd:
return -1;
case 0x180b ... 0x180d:
return -1;
case 0x1885 ... 0x1886:
return -1;
case 0x18a9:
return -1;
case 0x1939 ... 0x193b:
case 0x1920 ... 0x192b:
return -1;
case 0x1a17 ... 0x1a18:
case 0x1930 ... 0x193b:
return -1;
case 0x1a60:
case 0x1a17 ... 0x1a1b:
return -1;
case 0x1a75 ... 0x1a7c:
case 0x1a55 ... 0x1a5e:
return -1;
case 0x1a60 ... 0x1a7c:
return -1;
case 0x1a7f:
return -1;
case 0x1ab0 ... 0x1abd:
case 0x1ab0 ... 0x1abe:
return -1;
case 0x1b34:
case 0x1b00 ... 0x1b04:
return -1;
case 0x1b44:
case 0x1b34 ... 0x1b44:
return -1;
case 0x1b6b ... 0x1b73:
return -1;
case 0x1baa ... 0x1bab:
case 0x1b80 ... 0x1b82:
return -1;
case 0x1be6:
case 0x1ba1 ... 0x1bad:
return -1;
case 0x1bf2 ... 0x1bf3:
case 0x1be6 ... 0x1bf3:
return -1;
case 0x1c37:
case 0x1c24 ... 0x1c37:
return -1;
case 0x1cd0 ... 0x1cd2:
return -1;
case 0x1cd4 ... 0x1ce0:
return -1;
case 0x1ce2 ... 0x1ce8:
case 0x1cd4 ... 0x1ce8:
return -1;
case 0x1ced:
return -1;
case 0x1cf4:
case 0x1cf2 ... 0x1cf4:
return -1;
case 0x1cf8 ... 0x1cf9:
case 0x1cf7 ... 0x1cf9:
return -1;
case 0x1dc0 ... 0x1df9:
return -1;
case 0x1dfb ... 0x1dff:
return -1;
case 0x20d0 ... 0x20dc:
return -1;
case 0x20e1:
return -1;
case 0x20e5 ... 0x20f0:
case 0x20d0 ... 0x20f0:
return -1;
case 0x2cef ... 0x2cf1:
return -1;
@ -248,7 +358,7 @@ wcwidth_std(int32_t code) {
return -1;
case 0x3099 ... 0x309a:
return -1;
case 0xa66f:
case 0xa66f ... 0xa672:
return -1;
case 0xa674 ... 0xa67d:
return -1;
@ -256,19 +366,37 @@ wcwidth_std(int32_t code) {
return -1;
case 0xa6f0 ... 0xa6f1:
return -1;
case 0xa802:
return -1;
case 0xa806:
return -1;
case 0xa8c4:
case 0xa80b:
return -1;
case 0xa823 ... 0xa827:
return -1;
case 0xa880 ... 0xa881:
return -1;
case 0xa8b4 ... 0xa8c5:
return -1;
case 0xa8e0 ... 0xa8f1:
return -1;
case 0xa92b ... 0xa92d:
case 0xa926 ... 0xa92d:
return -1;
case 0xa953:
case 0xa947 ... 0xa953:
return -1;
case 0xa9b3:
case 0xa980 ... 0xa983:
return -1;
case 0xa9c0:
case 0xa9b3 ... 0xa9c0:
return -1;
case 0xa9e5:
return -1;
case 0xaa29 ... 0xaa36:
return -1;
case 0xaa43:
return -1;
case 0xaa4c ... 0xaa4d:
return -1;
case 0xaa7b ... 0xaa7d:
return -1;
case 0xaab0:
return -1;
@ -280,12 +408,18 @@ wcwidth_std(int32_t code) {
return -1;
case 0xaac1:
return -1;
case 0xaaf6:
case 0xaaeb ... 0xaaef:
return -1;
case 0xabed:
case 0xaaf5 ... 0xaaf6:
return -1;
case 0xabe3 ... 0xabea:
return -1;
case 0xabec ... 0xabed:
return -1;
case 0xfb1e:
return -1;
case 0xfe00 ... 0xfe0f:
return -1;
case 0xfe20 ... 0xfe2f:
return -1;
case 0x101fd:
@ -294,9 +428,11 @@ wcwidth_std(int32_t code) {
return -1;
case 0x10376 ... 0x1037a:
return -1;
case 0x10a0d:
case 0x10a01 ... 0x10a03:
return -1;
case 0x10a0f:
case 0x10a05 ... 0x10a06:
return -1;
case 0x10a0c ... 0x10a0f:
return -1;
case 0x10a38 ... 0x10a3a:
return -1;
@ -304,65 +440,105 @@ wcwidth_std(int32_t code) {
return -1;
case 0x10ae5 ... 0x10ae6:
return -1;
case 0x11046:
case 0x11000 ... 0x11002:
return -1;
case 0x1107f:
case 0x11038 ... 0x11046:
return -1;
case 0x110b9 ... 0x110ba:
case 0x1107f ... 0x11082:
return -1;
case 0x110b0 ... 0x110ba:
return -1;
case 0x11100 ... 0x11102:
return -1;
case 0x11133 ... 0x11134:
case 0x11127 ... 0x11134:
return -1;
case 0x11173:
return -1;
case 0x111c0:
case 0x11180 ... 0x11182:
return -1;
case 0x111ca:
case 0x111b3 ... 0x111c0:
return -1;
case 0x11235 ... 0x11236:
case 0x111ca ... 0x111cc:
return -1;
case 0x112e9 ... 0x112ea:
case 0x1122c ... 0x11237:
return -1;
case 0x1123e:
return -1;
case 0x112df ... 0x112ea:
return -1;
case 0x11300 ... 0x11303:
return -1;
case 0x1133c:
return -1;
case 0x1134d:
case 0x1133e ... 0x11344:
return -1;
case 0x11347 ... 0x11348:
return -1;
case 0x1134b ... 0x1134d:
return -1;
case 0x11357:
return -1;
case 0x11362 ... 0x11363:
return -1;
case 0x11366 ... 0x1136c:
return -1;
case 0x11370 ... 0x11374:
return -1;
case 0x11442:
case 0x11435 ... 0x11446:
return -1;
case 0x11446:
case 0x114b0 ... 0x114c3:
return -1;
case 0x114c2 ... 0x114c3:
case 0x115af ... 0x115b5:
return -1;
case 0x115bf ... 0x115c0:
case 0x115b8 ... 0x115c0:
return -1;
case 0x1163f:
case 0x115dc ... 0x115dd:
return -1;
case 0x116b6 ... 0x116b7:
case 0x11630 ... 0x11640:
return -1;
case 0x1172b:
case 0x116ab ... 0x116b7:
return -1;
case 0x11a34:
case 0x1171d ... 0x1172b:
return -1;
case 0x11a01 ... 0x11a0a:
return -1;
case 0x11a33 ... 0x11a39:
return -1;
case 0x11a3b ... 0x11a3e:
return -1;
case 0x11a47:
return -1;
case 0x11a99:
case 0x11a51 ... 0x11a5b:
return -1;
case 0x11c3f:
case 0x11a8a ... 0x11a99:
return -1;
case 0x11d42:
case 0x11c2f ... 0x11c36:
return -1;
case 0x11d44 ... 0x11d45:
case 0x11c38 ... 0x11c3f:
return -1;
case 0x11c92 ... 0x11ca7:
return -1;
case 0x11ca9 ... 0x11cb6:
return -1;
case 0x11d31 ... 0x11d36:
return -1;
case 0x11d3a:
return -1;
case 0x11d3c ... 0x11d3d:
return -1;
case 0x11d3f ... 0x11d45:
return -1;
case 0x11d47:
return -1;
case 0x16af0 ... 0x16af4:
return -1;
case 0x16b30 ... 0x16b36:
return -1;
case 0x1bc9e:
case 0x16f51 ... 0x16f7e:
return -1;
case 0x16f8f ... 0x16f92:
return -1;
case 0x1bc9d ... 0x1bc9e:
return -1;
case 0x1d165 ... 0x1d169:
return -1;
@ -376,6 +552,18 @@ wcwidth_std(int32_t code) {
return -1;
case 0x1d242 ... 0x1d244:
return -1;
case 0x1da00 ... 0x1da36:
return -1;
case 0x1da3b ... 0x1da6c:
return -1;
case 0x1da75:
return -1;
case 0x1da84:
return -1;
case 0x1da9b ... 0x1da9f:
return -1;
case 0x1daa1 ... 0x1daaf:
return -1;
case 0x1e000 ... 0x1e006:
return -1;
case 0x1e008 ... 0x1e018:
@ -390,6 +578,8 @@ wcwidth_std(int32_t code) {
return -1;
case 0x1e944 ... 0x1e94a:
return -1;
case 0xe0100 ... 0xe01ef:
return -1;
// }}}
// Private use (137468 codepoints) {{{
@ -401,7 +591,7 @@ wcwidth_std(int32_t code) {
return -3;
// }}}
// East Asian ambiguous width (1159 codepoints) {{{
// East Asian ambiguous width (902 codepoints) {{{
case 0xa1:
return -2;
case 0xa4:
@ -508,8 +698,6 @@ wcwidth_std(int32_t code) {
return -2;
case 0x2df:
return -2;
case 0x34f:
return -2;
case 0x391 ... 0x3a1:
return -2;
case 0x3a3 ... 0x3a9:
@ -736,8 +924,6 @@ wcwidth_std(int32_t code) {
return -2;
case 0x3248 ... 0x324f:
return -2;
case 0xfe00 ... 0xfe0f:
return -2;
case 0xfffd:
return -2;
case 0x1f100 ... 0x1f10a:
@ -752,8 +938,6 @@ wcwidth_std(int32_t code) {
return -2;
case 0x1f19b ... 0x1f1ac:
return -2;
case 0xe0100 ... 0xe01ef:
return -2;
// }}}
// East Asian double width (181730 codepoints) {{{