mirror of
https://github.com/kovidgoyal/kitty.git
synced 2024-10-04 01:59:04 +03:00
76669ad14d
Files generated from the same Unicode version will be consistent regardless of the date they were built.
613 lines
22 KiB
Python
Executable File
613 lines
22 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# License: GPL v3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
|
|
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from collections import defaultdict
|
|
from contextlib import contextmanager
|
|
from datetime import date
|
|
from functools import partial
|
|
from html.entities import html5
|
|
from itertools import groupby
|
|
from operator import itemgetter
|
|
from typing import (
|
|
Callable, DefaultDict, Dict, FrozenSet, Generator, Iterable, List,
|
|
Optional, Set, Tuple, Union
|
|
)
|
|
from urllib.request import urlopen
|
|
|
|
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
non_characters = frozenset(range(0xfffe, 0x10ffff, 0x10000))
|
|
non_characters |= frozenset(range(0xffff, 0x10ffff + 1, 0x10000))
|
|
non_characters |= frozenset(range(0xfdd0, 0xfdf0))
|
|
if len(non_characters) != 66:
|
|
raise SystemExit('non_characters table incorrect')
|
|
emoji_skin_tone_modifiers = frozenset(range(0x1f3fb, 0x1F3FF + 1))
|
|
|
|
|
|
def get_data(fname: str, folder: str = 'UCD') -> Iterable[str]:
|
|
url = f'https://www.unicode.org/Public/{folder}/latest/{fname}'
|
|
bn = os.path.basename(url)
|
|
local = os.path.join('/tmp', bn)
|
|
if os.path.exists(local):
|
|
with open(local, 'rb') as f:
|
|
data = f.read()
|
|
else:
|
|
data = urlopen(url).read()
|
|
with open(local, 'wb') as f:
|
|
f.write(data)
|
|
for line in data.decode('utf-8').splitlines():
|
|
line = line.strip()
|
|
if line and not line.startswith('#'):
|
|
yield line
|
|
|
|
|
|
def unicode_version() -> Tuple[int, int, int]:
|
|
for line in get_data("ReadMe.txt"):
|
|
m = re.search(r'Version\s+(\d+)\.(\d+)\.(\d+)', line)
|
|
if m is not None:
|
|
return int(m.group(1)), int(m.group(2)), int(m.group(3))
|
|
raise ValueError('Could not find Unicode Version')
|
|
|
|
|
|
# Map of class names to set of codepoints in class
|
|
class_maps: Dict[str, Set[int]] = {}
|
|
all_symbols: Set[int] = set()
|
|
name_map: Dict[int, str] = {}
|
|
word_search_map: DefaultDict[str, Set[int]] = defaultdict(set)
|
|
soft_hyphen = 0xad
|
|
flag_codepoints = frozenset(range(0x1F1E6, 0x1F1E6 + 26))
|
|
# See https://github.com/harfbuzz/harfbuzz/issues/169
|
|
marks = set(emoji_skin_tone_modifiers) | flag_codepoints
|
|
not_assigned = set(range(0, sys.maxunicode))
|
|
property_maps: Dict[str, Set[int]] = defaultdict(set)
|
|
|
|
|
|
def parse_prop_list() -> None:
|
|
global marks
|
|
for line in get_data('ucd/PropList.txt'):
|
|
if line.startswith('#'):
|
|
continue
|
|
cp_or_range, rest = line.split(';', 1)
|
|
chars = parse_range_spec(cp_or_range.strip())
|
|
name = rest.strip().split()[0]
|
|
property_maps[name] |= chars
|
|
# see https://www.unicode.org/faq/unsup_char.html#3
|
|
marks |= property_maps['Other_Default_Ignorable_Code_Point']
|
|
|
|
|
|
def parse_ucd() -> None:
|
|
|
|
def add_word(w: str, c: int) -> None:
|
|
if c <= 32 or c == 127 or 128 <= c <= 159:
|
|
return
|
|
if len(w) > 1:
|
|
word_search_map[w.lower()].add(c)
|
|
|
|
first: Optional[int] = None
|
|
for word, c in html5.items():
|
|
if len(c) == 1:
|
|
add_word(word.rstrip(';'), ord(c))
|
|
word_search_map['nnbsp'].add(0x202f)
|
|
for line in get_data('ucd/UnicodeData.txt'):
|
|
parts = [x.strip() for x in line.split(';')]
|
|
codepoint = int(parts[0], 16)
|
|
name = parts[1] or parts[10]
|
|
if name == '<control>':
|
|
name = parts[10]
|
|
if name:
|
|
name_map[codepoint] = name
|
|
for word in name.lower().split():
|
|
add_word(word, codepoint)
|
|
category = parts[2]
|
|
s = class_maps.setdefault(category, set())
|
|
desc = parts[1]
|
|
codepoints: Union[Tuple[int, ...], Iterable[int]] = (codepoint,)
|
|
if first is None:
|
|
if desc.endswith(', First>'):
|
|
first = codepoint
|
|
continue
|
|
else:
|
|
codepoints = range(first, codepoint + 1)
|
|
first = None
|
|
for codepoint in codepoints:
|
|
s.add(codepoint)
|
|
not_assigned.discard(codepoint)
|
|
if category.startswith('M'):
|
|
marks.add(codepoint)
|
|
elif category.startswith('S'):
|
|
all_symbols.add(codepoint)
|
|
elif category == 'Cf':
|
|
# we add Cf to marks as it contains things like tags and zero
|
|
# width chars. Not sure if *all* of Cf should be treated as
|
|
# combining chars, might need to add individual exceptions in
|
|
# the future.
|
|
marks.add(codepoint)
|
|
|
|
with open('nerd-fonts-glyphs.txt') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line or line.startswith('#'):
|
|
continue
|
|
code, category, name = line.split(' ', 2)
|
|
codepoint = int(code, 16)
|
|
if name and codepoint not in name_map:
|
|
name_map[codepoint] = name.upper()
|
|
for word in name.lower().split():
|
|
add_word(word, codepoint)
|
|
|
|
# Some common synonyms
|
|
word_search_map['bee'] |= word_search_map['honeybee']
|
|
word_search_map['lambda'] |= word_search_map['lamda']
|
|
word_search_map['lamda'] |= word_search_map['lambda']
|
|
word_search_map['diamond'] |= word_search_map['gem']
|
|
|
|
|
|
def parse_range_spec(spec: str) -> Set[int]:
|
|
spec = spec.strip()
|
|
if '..' in spec:
|
|
chars_ = tuple(map(lambda x: int(x, 16), filter(None, spec.split('.'))))
|
|
chars = set(range(chars_[0], chars_[1] + 1))
|
|
else:
|
|
chars = {int(spec, 16)}
|
|
return chars
|
|
|
|
|
|
def split_two(line: str) -> Tuple[Set[int], str]:
|
|
spec, rest = line.split(';', 1)
|
|
spec, rest = spec.strip(), rest.strip().split(' ', 1)[0].strip()
|
|
return parse_range_spec(spec), rest
|
|
|
|
|
|
all_emoji: Set[int] = set()
|
|
emoji_presentation_bases: Set[int] = set()
|
|
narrow_emoji: Set[int] = set()
|
|
wide_emoji: Set[int] = set()
|
|
flags: Dict[int, List[int]] = {}
|
|
|
|
|
|
def parse_basic_emoji(spec: str) -> None:
|
|
parts = list(filter(None, spec.split()))
|
|
has_emoji_presentation = len(parts) < 2
|
|
chars = parse_range_spec(parts[0])
|
|
all_emoji.update(chars)
|
|
emoji_presentation_bases.update(chars)
|
|
(wide_emoji if has_emoji_presentation else narrow_emoji).update(chars)
|
|
|
|
|
|
def parse_keycap_sequence(spec: str) -> None:
|
|
base, fe0f, cc = list(filter(None, spec.split()))
|
|
chars = parse_range_spec(base)
|
|
all_emoji.update(chars)
|
|
emoji_presentation_bases.update(chars)
|
|
narrow_emoji.update(chars)
|
|
|
|
|
|
def parse_flag_emoji_sequence(spec: str) -> None:
|
|
a, b = list(filter(None, spec.split()))
|
|
left, right = int(a, 16), int(b, 16)
|
|
chars = {left, right}
|
|
all_emoji.update(chars)
|
|
wide_emoji.update(chars)
|
|
emoji_presentation_bases.update(chars)
|
|
flags.setdefault(left, []).append(right)
|
|
|
|
|
|
def parse_emoji_tag_sequence(spec: str) -> None:
|
|
a = int(spec.split()[0], 16)
|
|
all_emoji.add(a)
|
|
wide_emoji.add(a)
|
|
emoji_presentation_bases.add(a)
|
|
|
|
|
|
def parse_emoji_modifier_sequence(spec: str) -> None:
|
|
a, b = list(filter(None, spec.split()))
|
|
char, mod = int(a, 16), int(b, 16)
|
|
mod
|
|
all_emoji.add(char)
|
|
wide_emoji.add(char)
|
|
emoji_presentation_bases.add(char)
|
|
|
|
|
|
def parse_emoji() -> None:
|
|
for line in get_data('emoji-sequences.txt', 'emoji'):
|
|
parts = [x.strip() for x in line.split(';')]
|
|
if len(parts) < 2:
|
|
continue
|
|
data, etype = parts[:2]
|
|
if etype == 'Basic_Emoji':
|
|
parse_basic_emoji(data)
|
|
elif etype == 'Emoji_Keycap_Sequence':
|
|
parse_keycap_sequence(data)
|
|
elif etype == 'RGI_Emoji_Flag_Sequence':
|
|
parse_flag_emoji_sequence(data)
|
|
elif etype == 'RGI_Emoji_Tag_Sequence':
|
|
parse_emoji_tag_sequence(data)
|
|
elif etype == 'RGI_Emoji_Modifier_Sequence':
|
|
parse_emoji_modifier_sequence(data)
|
|
|
|
|
|
doublewidth: Set[int] = set()
|
|
ambiguous: Set[int] = set()
|
|
|
|
|
|
def parse_eaw() -> None:
|
|
global doublewidth, ambiguous
|
|
seen: Set[int] = set()
|
|
for line in get_data('ucd/EastAsianWidth.txt'):
|
|
chars, eaw = split_two(line)
|
|
if eaw == 'A':
|
|
ambiguous |= chars
|
|
seen |= chars
|
|
elif eaw in ('W', 'F'):
|
|
doublewidth |= chars
|
|
seen |= chars
|
|
doublewidth |= set(range(0x3400, 0x4DBF + 1)) - seen
|
|
doublewidth |= set(range(0x4E00, 0x9FFF + 1)) - seen
|
|
doublewidth |= set(range(0xF900, 0xFAFF + 1)) - seen
|
|
doublewidth |= set(range(0x20000, 0x2FFFD + 1)) - seen
|
|
doublewidth |= set(range(0x30000, 0x3FFFD + 1)) - seen
|
|
|
|
|
|
def get_ranges(items: List[int]) -> Generator[Union[int, Tuple[int, int]], None, None]:
|
|
items.sort()
|
|
for k, g in groupby(enumerate(items), lambda m: m[0]-m[1]):
|
|
group = tuple(map(itemgetter(1), g))
|
|
a, b = group[0], group[-1]
|
|
if a == b:
|
|
yield a
|
|
else:
|
|
yield a, b
|
|
|
|
|
|
def write_case(spec: Union[Tuple[int, ...], int], p: Callable[..., None], for_go: bool = False) -> None:
|
|
if isinstance(spec, tuple):
|
|
if for_go:
|
|
v = ', '.join(f'0x{x:x}' for x in range(spec[0], spec[1] + 1))
|
|
p(f'\t\tcase {v}:')
|
|
else:
|
|
p('\t\tcase 0x{:x} ... 0x{:x}:'.format(*spec))
|
|
else:
|
|
p(f'\t\tcase 0x{spec:x}:')
|
|
|
|
|
|
@contextmanager
|
|
def create_header(path: str, include_data_types: bool = True) -> Generator[Callable[..., None], None, None]:
|
|
with open(path, 'w') as f:
|
|
p = partial(print, file=f)
|
|
p('// Unicode data, built from the Unicode Standard', '.'.join(map(str, unicode_version())))
|
|
p(f'// Code generated by {os.path.basename(__file__)}, DO NOT EDIT.', end='\n\n')
|
|
if path.endswith('.h'):
|
|
p('#pragma once')
|
|
if include_data_types:
|
|
p('#include "data-types.h"\n')
|
|
p('START_ALLOW_CASE_RANGE')
|
|
p()
|
|
yield p
|
|
p()
|
|
if include_data_types:
|
|
p('END_ALLOW_CASE_RANGE')
|
|
|
|
|
|
def gen_emoji() -> None:
|
|
with create_header('kitty/emoji.h') as p:
|
|
p('static inline bool\nis_emoji(char_type code) {')
|
|
p('\tswitch(code) {')
|
|
for spec in get_ranges(list(all_emoji)):
|
|
write_case(spec, p)
|
|
p('\t\t\treturn true;')
|
|
p('\t\tdefault: return false;')
|
|
p('\t}')
|
|
p('\treturn false;\n}')
|
|
|
|
p('static inline bool\nis_symbol(char_type code) {')
|
|
p('\tswitch(code) {')
|
|
for spec in get_ranges(list(all_symbols)):
|
|
write_case(spec, p)
|
|
p('\t\t\treturn true;')
|
|
p('\t\tdefault: return false;')
|
|
p('\t}')
|
|
p('\treturn false;\n}')
|
|
|
|
|
|
def category_test(
|
|
name: str,
|
|
p: Callable[..., None],
|
|
classes: Iterable[str],
|
|
comment: str,
|
|
use_static: bool = False,
|
|
extra_chars: Union[FrozenSet[int], Set[int]] = frozenset(),
|
|
exclude: Union[Set[int], FrozenSet[int]] = frozenset(),
|
|
least_check_return: Optional[str] = None,
|
|
ascii_range: Optional[str] = None
|
|
) -> None:
|
|
static = 'static inline ' if use_static else ''
|
|
chars: Set[int] = set()
|
|
for c in classes:
|
|
chars |= class_maps[c]
|
|
chars |= extra_chars
|
|
chars -= exclude
|
|
p(f'{static}bool\n{name}(char_type code) {{')
|
|
p(f'\t// {comment} ({len(chars)} codepoints)' + ' {{' '{')
|
|
if least_check_return is not None:
|
|
least = min(chars)
|
|
p(f'\tif (LIKELY(code < {least})) return {least_check_return};')
|
|
if ascii_range is not None:
|
|
p(f'\tif (LIKELY(0x20 <= code && code <= 0x7e)) return {ascii_range};')
|
|
p('\tswitch(code) {')
|
|
for spec in get_ranges(list(chars)):
|
|
write_case(spec, p)
|
|
p('\t\t\treturn true;')
|
|
p('\t} // }}}\n')
|
|
p('\treturn false;\n}\n')
|
|
|
|
|
|
def codepoint_to_mark_map(p: Callable[..., None], mark_map: List[int]) -> Dict[int, int]:
|
|
p('\tswitch(c) { // {{{')
|
|
rmap = {c: m for m, c in enumerate(mark_map)}
|
|
for spec in get_ranges(mark_map):
|
|
if isinstance(spec, tuple):
|
|
s = rmap[spec[0]]
|
|
cases = ' '.join(f'case {i}:' for i in range(spec[0], spec[1]+1))
|
|
p(f'\t\t{cases} return {s} + c - {spec[0]};')
|
|
else:
|
|
p(f'\t\tcase {spec}: return {rmap[spec]};')
|
|
p('default: return 0;')
|
|
p('\t} // }}}')
|
|
return rmap
|
|
|
|
|
|
def classes_to_regex(classes: Iterable[str], exclude: str = '') -> Iterable[str]:
|
|
chars: Set[int] = set()
|
|
for c in classes:
|
|
chars |= class_maps[c]
|
|
for x in map(ord, exclude):
|
|
chars.discard(x)
|
|
|
|
def as_string(codepoint: int) -> str:
|
|
if codepoint < 256:
|
|
return fr'\x{codepoint:02x}'
|
|
if codepoint <= 0xffff:
|
|
return fr'\u{codepoint:04x}'
|
|
return fr'\U{codepoint:08x}'
|
|
|
|
for spec in get_ranges(list(chars)):
|
|
if isinstance(spec, tuple):
|
|
yield '{}-{}'.format(*map(as_string, (spec[0], spec[1])))
|
|
else:
|
|
yield as_string(spec)
|
|
|
|
|
|
def gen_ucd() -> None:
|
|
cz = {c for c in class_maps if c[0] in 'CZ'}
|
|
with create_header('kitty/unicode-data.c') as p:
|
|
p('#include "unicode-data.h"')
|
|
category_test(
|
|
'is_combining_char', p,
|
|
(),
|
|
'Combining and default ignored characters',
|
|
extra_chars=marks,
|
|
least_check_return='false'
|
|
)
|
|
category_test(
|
|
'is_ignored_char', p, 'Cc Cs'.split(),
|
|
'Control characters and non-characters',
|
|
extra_chars=non_characters,
|
|
ascii_range='false'
|
|
)
|
|
category_test(
|
|
'is_non_rendered_char', p, 'Cc Cs Cf'.split(),
|
|
'Other_Default_Ignorable_Code_Point and soft hyphen',
|
|
extra_chars=property_maps['Other_Default_Ignorable_Code_Point'] | set(range(0xfe00, 0xfe0f + 1)),
|
|
ascii_range='false'
|
|
)
|
|
category_test('is_word_char', p, {c for c in class_maps if c[0] in 'LN'}, 'L and N categories')
|
|
category_test('is_CZ_category', p, cz, 'C and Z categories')
|
|
category_test('is_P_category', p, {c for c in class_maps if c[0] == 'P'}, 'P category (punctuation)')
|
|
mark_map = [0] + list(sorted(marks))
|
|
p('char_type codepoint_for_mark(combining_type m) {')
|
|
p(f'\tstatic char_type map[{len(mark_map)}] =', '{', ', '.join(map(str, mark_map)), '}; // {{{ mapping }}}')
|
|
p('\tif (m < arraysz(map)) return map[m];')
|
|
p('\treturn 0;')
|
|
p('}\n')
|
|
p('combining_type mark_for_codepoint(char_type c) {')
|
|
rmap = codepoint_to_mark_map(p, mark_map)
|
|
p('}\n')
|
|
with open('kitty/unicode-data.h', 'r+') as f:
|
|
raw = f.read()
|
|
f.seek(0)
|
|
raw, num = re.subn(
|
|
r'^// START_KNOWN_MARKS.+?^// END_KNOWN_MARKS',
|
|
'// START_KNOWN_MARKS\nstatic const combining_type '
|
|
f'VS15 = {rmap[0xfe0e]}, VS16 = {rmap[0xfe0f]};'
|
|
'\n// END_KNOWN_MARKS', raw, flags=re.MULTILINE | re.DOTALL)
|
|
if not num:
|
|
raise SystemExit('Faile dto patch mark definitions in unicode-data.h')
|
|
f.truncate()
|
|
f.write(raw)
|
|
|
|
with open('kittens/hints/url_regex.py', 'w') as f:
|
|
f.write('# generated by gen-wcwidth.py, do not edit\n\n')
|
|
f.write("url_delimiters = '{}' # noqa".format(''.join(classes_to_regex(cz, exclude='\n\r'))))
|
|
|
|
|
|
def gen_names() -> None:
|
|
with create_header('kittens/unicode_input/names.h') as p:
|
|
mark_to_cp = list(sorted(name_map))
|
|
cp_to_mark = {cp: m for m, cp in enumerate(mark_to_cp)}
|
|
# Mapping of mark to codepoint name
|
|
p(f'static const char* name_map[{len(mark_to_cp)}] = {{' ' // {{{')
|
|
for cp in mark_to_cp:
|
|
w = name_map[cp].replace('"', '\\"')
|
|
p(f'\t"{w}",')
|
|
p("}; // }}}\n")
|
|
|
|
# Mapping of mark to codepoint
|
|
p(f'static const char_type mark_to_cp[{len(mark_to_cp)}] = {{' ' // {{{')
|
|
p(', '.join(map(str, mark_to_cp)))
|
|
p('}; // }}}\n')
|
|
|
|
# Function to get mark number for codepoint
|
|
p('static char_type mark_for_codepoint(char_type c) {')
|
|
codepoint_to_mark_map(p, mark_to_cp)
|
|
p('}\n')
|
|
p('static inline const char* name_for_codepoint(char_type cp) {')
|
|
p('\tchar_type m = mark_for_codepoint(cp); if (m == 0) return NULL;')
|
|
p('\treturn name_map[m];')
|
|
p('}\n')
|
|
|
|
# Array of all words
|
|
word_map = tuple(sorted(word_search_map))
|
|
word_rmap = {w: i for i, w in enumerate(word_map)}
|
|
p(f'static const char* all_words_map[{len(word_map)}] = {{' ' // {{{')
|
|
cwords = (w.replace('"', '\\"') for w in word_map)
|
|
p(', '.join(f'"{w}"' for w in cwords))
|
|
p('}; // }}}\n')
|
|
|
|
# Array of sets of marks for each word
|
|
word_to_marks = {word_rmap[w]: frozenset(map(cp_to_mark.__getitem__, cps)) for w, cps in word_search_map.items()}
|
|
all_mark_groups = frozenset(word_to_marks.values())
|
|
array = [0]
|
|
mg_to_offset = {}
|
|
for mg in all_mark_groups:
|
|
mg_to_offset[mg] = len(array)
|
|
array.append(len(mg))
|
|
array.extend(sorted(mg))
|
|
p(f'static const char_type mark_groups[{len(array)}] = {{' ' // {{{')
|
|
p(', '.join(map(str, array)))
|
|
p('}; // }}}\n')
|
|
offsets_array = []
|
|
for wi, w in enumerate(word_map):
|
|
mg = word_to_marks[wi]
|
|
offsets_array.append(mg_to_offset[mg])
|
|
p(f'static const char_type mark_to_offset[{len(offsets_array)}] = {{' ' // {{{')
|
|
p(', '.join(map(str, offsets_array)))
|
|
p('}; // }}}\n')
|
|
|
|
# The trie
|
|
p('typedef struct { uint32_t children_offset; uint32_t match_offset; } word_trie;\n')
|
|
all_trie_nodes: List['TrieNode'] = [] # noqa
|
|
|
|
class TrieNode:
|
|
|
|
def __init__(self) -> None:
|
|
self.match_offset = 0
|
|
self.children_offset = 0
|
|
self.children: Dict[int, int] = {}
|
|
|
|
def add_letter(self, letter: int) -> int:
|
|
if letter not in self.children:
|
|
self.children[letter] = len(all_trie_nodes)
|
|
all_trie_nodes.append(TrieNode())
|
|
return self.children[letter]
|
|
|
|
def __str__(self) -> str:
|
|
return f'{{ .children_offset={self.children_offset}, .match_offset={self.match_offset} }}'
|
|
|
|
root = TrieNode()
|
|
all_trie_nodes.append(root)
|
|
|
|
def add_word(word_idx: int, word: str) -> None:
|
|
parent = root
|
|
for letter in map(ord, word):
|
|
idx = parent.add_letter(letter)
|
|
parent = all_trie_nodes[idx]
|
|
parent.match_offset = offsets_array[word_idx]
|
|
|
|
for i, word in enumerate(word_map):
|
|
add_word(i, word)
|
|
children_array = [0]
|
|
for node in all_trie_nodes:
|
|
if node.children:
|
|
node.children_offset = len(children_array)
|
|
children_array.append(len(node.children))
|
|
for letter, child_offset in node.children.items():
|
|
children_array.append((child_offset << 8) | (letter & 0xff))
|
|
|
|
p(f'static const word_trie all_trie_nodes[{len(all_trie_nodes)}] = {{' ' // {{{')
|
|
p(',\n'.join(map(str, all_trie_nodes)))
|
|
p('\n}; // }}}\n')
|
|
p(f'static const uint32_t children_array[{len(children_array)}] = {{' ' // {{{')
|
|
p(', '.join(map(str, children_array)))
|
|
p('}; // }}}\n')
|
|
|
|
|
|
def gen_wcwidth() -> None:
|
|
seen: Set[int] = set()
|
|
non_printing = class_maps['Cc'] | class_maps['Cf'] | class_maps['Cs']
|
|
|
|
def add(p: Callable[..., None], comment: str, chars_: Union[Set[int], FrozenSet[int]], ret: int, for_go: bool = False) -> None:
|
|
chars = chars_ - seen
|
|
seen.update(chars)
|
|
p(f'\t\t// {comment} ({len(chars)} codepoints)' + ' {{' '{')
|
|
for spec in get_ranges(list(chars)):
|
|
write_case(spec, p, for_go)
|
|
p(f'\t\t\treturn {ret};')
|
|
p('\t\t// }}}\n')
|
|
|
|
def add_all(p: Callable[..., None], for_go: bool = False) -> None:
|
|
seen.clear()
|
|
add(p, 'Flags', flag_codepoints, 2, for_go)
|
|
add(p, 'Marks', marks | {0}, 0, for_go)
|
|
add(p, 'Non-printing characters', non_printing, -1, for_go)
|
|
add(p, 'Private use', class_maps['Co'], -3, for_go)
|
|
add(p, 'Text Presentation', narrow_emoji, 1, for_go)
|
|
add(p, 'East Asian ambiguous width', ambiguous, -2, for_go)
|
|
add(p, 'East Asian double width', doublewidth, 2, for_go)
|
|
add(p, 'Emoji Presentation', wide_emoji, 2, for_go)
|
|
|
|
add(p, 'Not assigned in the unicode character database', not_assigned, -4, for_go)
|
|
|
|
p('\t\tdefault:\n\t\t\treturn 1;')
|
|
p('\t}')
|
|
if for_go:
|
|
p('\t}')
|
|
else:
|
|
p('\treturn 1;\n}')
|
|
|
|
with create_header('kitty/wcwidth-std.h') as p, open('tools/wcswidth/std.go', 'w') as gof:
|
|
gop = partial(print, file=gof)
|
|
gop('package wcswidth\n\n')
|
|
gop('func Runewidth(code rune) int {')
|
|
p('static inline int\nwcwidth_std(int32_t code) {')
|
|
p('\tif (LIKELY(0x20 <= code && code <= 0x7e)) { return 1; }')
|
|
p('\tswitch(code) {')
|
|
gop('\tswitch(code) {')
|
|
add_all(p)
|
|
add_all(gop, True)
|
|
|
|
p('static inline bool\nis_emoji_presentation_base(uint32_t code) {')
|
|
gop('func IsEmojiPresentationBase(code rune) bool {')
|
|
p('\tswitch(code) {')
|
|
gop('\tswitch(code) {')
|
|
for spec in get_ranges(list(emoji_presentation_bases)):
|
|
write_case(spec, p)
|
|
write_case(spec, gop, for_go=True)
|
|
p('\t\t\treturn true;')
|
|
gop('\t\t\treturn true;')
|
|
p('\t\tdefault: return false;')
|
|
p('\t}')
|
|
gop('\t\tdefault:\n\t\t\treturn false')
|
|
gop('\t}')
|
|
p('\treturn true;\n}')
|
|
gop('\n}')
|
|
uv = unicode_version()
|
|
p(f'#define UNICODE_MAJOR_VERSION {uv[0]}')
|
|
p(f'#define UNICODE_MINOR_VERSION {uv[1]}')
|
|
p(f'#define UNICODE_PATCH_VERSION {uv[2]}')
|
|
gop('var UnicodeDatabaseVersion [3]int = [3]int{' f'{uv[0]}, {uv[1]}, {uv[2]}' + '}')
|
|
subprocess.check_call(['gofmt', '-w', '-s', gof.name])
|
|
|
|
|
|
parse_ucd()
|
|
parse_prop_list()
|
|
parse_emoji()
|
|
parse_eaw()
|
|
gen_ucd()
|
|
gen_wcwidth()
|
|
gen_emoji()
|
|
gen_names()
|