Drop the dependency on libunistring

2024-08-17 10:30:25 +03:00 · 2018-01-18 00:09:40 +05:30 · 2018-01-18 00:09:40 +05:30 · 5faa649452
commit 5faa649452
parent 2ddc7e8c80
11 changed files with 2375 additions and 39 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -3,6 +3,7 @@ kitty/emoji.h linguist-generated=true
 kitty/keys.h linguist-generated=true
 kitty/charsets.c linguist-generated=true
 kitty/key_encoding.py linguist-generated=true
+kitty/unicode-data.c
 kitty/rgb.py linguist-generated=true
 kitty/gl-wrapper.* linguist-generated=true
 kitty/khrplatform.h linguist-generated=true
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@ -36,6 +36,8 @@ version 0.7.0 [future]

 - Fix incorrect handling of some unicode combining marks that are not re-ordered

+- Drop the dependency on libunistring
+

 version 0.6.1 [2017-12-28]
 ---------------------------
--- a/README.asciidoc
+++ b/README.asciidoc
@ -90,7 +90,6 @@ the following dependencies are installed first.

 * python >= 3.5
 * harfbuzz >= 1.5.0
-* libunistring
 * zlib
 * libpng
 * freetype (not needed on macOS)
--- a/2
+++ b/2
@ -1,2 +1,2 @@
 #!/bin/bash
-cloc --exclude-list-file <(echo -e 'kitty/wcwidth-std.h\nkitty/glfw.c\nkitty/keys.h\nkitty/charsets.c\nkitty/key_encoding.py\nkitty/rgb.py\nkitty/gl.h\nkitty/gl-wrapper.h\nkitty/gl-wrapper.c\nkitty/khrplatform.h\nkitty/glfw-wrapper.h\nkitty/glfw-wrapper.c\nkitty/emoji.h') kitty
+cloc --exclude-list-file <(echo -e 'kitty/wcwidth-std.h\nkitty/glfw.c\nkitty/keys.h\nkitty/charsets.c\nkitty/unicode-data.c\nkitty/key_encoding.py\nkitty/rgb.py\nkitty/gl.h\nkitty/gl-wrapper.h\nkitty/gl-wrapper.c\nkitty/khrplatform.h\nkitty/glfw-wrapper.h\nkitty/glfw-wrapper.c\nkitty/emoji.h') kitty
--- a/gen-wcwidth.py
+++ b/gen-wcwidth.py
@ -130,7 +130,8 @@ def create_header(path):
    p = partial(print, file=f)
    p('// unicode data, built from the unicode standard on:', date.today())
    p('// see gen-wcwidth.py')
-    p('#pragma once')
+    if path.endswith('.h'):
+        p('#pragma once')
    p('#include "data-types.h"\n')
    p('START_ALLOW_CASE_RANGE')
    p()
@ -160,13 +161,38 @@ def gen_emoji():
        p('\treturn false;\n}')


+def category_test(name, p, classes, comment, static=False):
+    static = 'static inline ' if static else ''
+    chars = set()
+    for c in classes:
+        chars |= class_maps[c]
+    p(f'{static}bool\n{name}(char_type code) {{')
+    p(f'\t// {comment} ({len(chars)} codepoints)' + ' {{' '{')
+    p('\tswitch(code) {')
+    for spec in get_ranges(list(chars)):
+        write_case(spec, p)
+        p(f'\t\t\treturn true;')
+    p('\t} // }}}\n')
+    p('\treturn false;\n}\n')
+
+
+def gen_ucd():
+    with create_header('kitty/unicode-data.c') as p:
+        p('#include "unicode-data.h"')
+        category_test('is_combining_char', p, {c for c in class_maps if c.startswith('M')}, 'M category (marks)')
+        category_test('is_ignored_char', p, 'Cc Cf Cs'.split(), 'Control characters (Cc Cf Cs)')
+        category_test('is_word_char', p, {c for c in class_maps if c[0] in 'LN'}, 'L and N categories')
+        category_test('is_CZ_category', p, {c for c in class_maps if c[0] in 'CZ'}, 'C and Z categories')
+        category_test('is_P_category', p, {c for c in class_maps if c[0] == 'P'}, 'P category (punctuation)')
+
+
 def gen_wcwidth():
    seen = set()

    def add(p, comment, chars_, ret):
        chars = chars_ - seen
        seen.update(chars)
-        p(f'\t\t// {comment} ({len(chars)} codepoints)' + ' {{{')
+        p(f'\t\t// {comment} ({len(chars)} codepoints)' + ' {{' '{')
        for spec in get_ranges(list(chars)):
            write_case(spec, p)
            p(f'\t\t\treturn {ret};')
@ -194,5 +220,6 @@ def add(p, comment, chars_, ret):
 parse_ucd()
 parse_emoji()
 parse_eaw()
+gen_ucd()
 gen_wcwidth()
 gen_emoji()
--- a/kitty/emoji.h
+++ b/kitty/emoji.h
@ -1,4 +1,4 @@
-// unicode data, built from the unicode standard on: 2018-01-17
+// unicode data, built from the unicode standard on: 2018-01-18
 // see gen-wcwidth.py
 #pragma once
 #include "data-types.h"
--- a/kitty/line.c
+++ b/kitty/line.c
@ -42,9 +42,7 @@ line_text_at(char_type ch, combining_type cc) {
    } else {
        Py_UCS4 buf[3];
        buf[0] = ch; buf[1] = cc & CC_MASK; buf[2] = cc >> 16;
-        Py_UCS4 normalized = normalize(ch, buf[1], buf[2]);
-        if (normalized) ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &normalized, 1);
-        else ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, buf[2] ? 3 : 2);
+        ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, buf[2] ? 3 : 2);
    }
    return ans;
 }
--- a/kitty/unicode-data.c
+++ b/kitty/unicode-data.c
--- a/kitty/unicode-data.h
+++ b/kitty/unicode-data.h
@ -1,41 +1,22 @@
 #pragma once
+#include "data-types.h"

-#include <unictype.h>
-#include <uninorm.h>
-
-static inline bool
-is_combining_char(uint32_t ch) {
-    return uc_is_general_category_withtable(ch, UC_CATEGORY_MASK_M);
-}
-
-
-static inline bool
-is_ignored_char(uint32_t ch) {
-    return uc_is_general_category_withtable(ch, UC_CATEGORY_MASK_Cc | UC_CATEGORY_MASK_Cf | UC_CATEGORY_MASK_Cs);
-}
-
-static inline bool
-is_word_char(uint32_t ch) {
-    return uc_is_general_category_withtable(ch, UC_CATEGORY_MASK_L | UC_CATEGORY_MASK_N);
-}
+bool is_combining_char(char_type ch);
+bool is_ignored_char(char_type ch);
+bool is_word_char(char_type ch);
+bool is_CZ_category(char_type);
+bool is_P_category(char_type);

 static inline bool
 is_url_char(uint32_t ch) {
-    return ch && !uc_is_general_category_withtable(ch, UC_CATEGORY_MASK_C | UC_CATEGORY_MASK_Z);
-}
-
-static inline uint32_t
-normalize(uint32_t ch, uint32_t cc1, uint32_t cc2) {
-    uint32_t ans = uc_composition(ch, cc1);
-    if (ans && cc2) ans = uc_composition(ans, cc2);
-    return ans;
+    return ch && !is_CZ_category(ch);
 }

 static inline bool
 can_strip_from_end_of_url(uint32_t ch) {
    // remove trailing punctuation
    return (
-        (uc_is_general_category_withtable(ch, UC_CATEGORY_MASK_P) && ch != '/') ||
+        (is_P_category(ch) && ch != '/') ||
        ch == '>'
    ) ? true : false;
 }
--- a/kitty/wcwidth-std.h
+++ b/kitty/wcwidth-std.h
@ -1,4 +1,4 @@
-// unicode data, built from the unicode standard on: 2018-01-17
+// unicode data, built from the unicode standard on: 2018-01-18
 // see gen-wcwidth.py
 #pragma once
 #include "data-types.h"
--- a/setup.py
+++ b/setup.py
@ -220,9 +220,7 @@ def kitty_env():
    pylib = get_python_flags(cflags)
    gl_libs = ['-framework', 'OpenGL'] if is_macos else pkg_config('gl', '--libs')
    libpng = pkg_config('libpng', '--libs')
-    ans.ldpaths += pylib + font_libs + gl_libs + libpng + [
-        '-lunistring'
-    ]
+    ans.ldpaths += pylib + font_libs + gl_libs + libpng
    if is_macos:
        ans.ldpaths.extend('-framework Cocoa'.split())
        if is_travis and 'SW' in os.environ: