From cb7d4aad6c7eb8b4f15071de69c610c1ba264eb8 Mon Sep 17 00:00:00 2001 From: Andrew Dupont Date: Sat, 3 Feb 2024 23:39:12 -0800 Subject: [PATCH] =?UTF-8?q?[language-c]=20C/C++=20highlighting=20fixes?= =?UTF-8?q?=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes: * `#define FOO 1` and `#define FOO` will _always_ scope `FOO` as `constant.other.c`, even when `FOO` is mixed-case or lower-case. * `#define FOO()` will _always_ scope `FOO` as `entity.name.function.preprocessor.c`, even when `FOO` is mixed-case or lower-case. * Usages of bare identifiers in other contexts in preprocessor directives should always scope them as `constant.other.c` — unless I’m made aware of any counterexamples where that shouldn’t happen. For example: in `#if TABLE_SIZE > 200`, `TABLE_SIZE` should always be `constant.other.c`, no matter its casing. * All-caps variable declarations and assignments (`int FOO = 0`, etc.) should scope `FOO` as `variable` rather than `constant`. We should only scope an arbitrary identifier as `constant` if we think it’s been used in the context of a macro definition. However: * When deciding whether something outside of a directive refers to a macro constant, we have no choice but to use the `ALL_CAPS` heuristic. Ideally we’d be able to determine that from some sort of analysis of the source code files — but even if we could keep track of that sort of thing in a single buffer, it’d fall down when macro constants _defined in other files_ are used. All we can do here is make a best guess. --- .../grammars/tree-sitter-c/highlights.scm | 86 +++++++++++++++---- .../grammars/tree-sitter-cpp/highlights.scm | 84 +++++++++++++----- 2 files changed, 131 insertions(+), 39 deletions(-) diff --git a/packages/language-c/grammars/tree-sitter-c/highlights.scm b/packages/language-c/grammars/tree-sitter-c/highlights.scm index 3a2db6c79..096c75956 100644 --- a/packages/language-c/grammars/tree-sitter-c/highlights.scm +++ b/packages/language-c/grammars/tree-sitter-c/highlights.scm @@ -1,3 +1,4 @@ + ; PREPROCESSOR ; ============ @@ -16,21 +17,48 @@ (["#if" "#ifdef" "#ifndef" "#endif" "#elif" "#else" "#define" "#include"] @punctuation.definition.directive.c (#set! adjust.endAfterFirstMatchOf "^#")) - -; This will match if the more specific rules above haven't matched. The -; anonymous nodes will match under ideal conditions, but might not be present -; if the parser is flummoxed. +; `preproc_directive` will be used when the parser doesn't recognize the +; directive as one of the above. It's permissive; `#afdfafsdfdfad` would be +; parsed as a `preproc_directive`. +; +; Hence this rule will match if the more specific rules above haven't matched. +; The anonymous nodes will match under ideal conditions, but might not be +; present even when they ought to be _if_ the parser is flummoxed; so this'll +; sometimes catch `#ifdef` and others. ((preproc_directive) @keyword.control.directive.c (#set! capture.shy true)) -((preproc_ifdef - (identifier) @entity.name.function.preprocessor.c - (#match? @entity.name.function.preprocessor.c "[a-zA-Z_$][\\w$]*"))) +((preproc_directive) @punctuation.definition.directive.c + (#set! capture.shy true) + (#set! adjust.endAfterFirstMatchOf "^#")) +; Macro functions are definitely entities. (preproc_function_def (identifier) @entity.name.function.preprocessor.c (#set! capture.final true)) +; Identifiers in macro definitions are definitely constants. +((preproc_def + name: (identifier) @constant.preprocessor.c)) + +; We can also safely treat identifiers as constants in `#ifdef`… +((preproc_ifdef + (identifier) @constant.preprocessor.c)) + +; …and `#if` and `#elif`… +(preproc_if + (binary_expression + (identifier) @constant.preprocessor.c)) +(preproc_elif + (binary_expression + (identifier) @constant.preprocessor.c)) + +; …and `#undef`. +((preproc_call + directive: (preproc_directive) @_IGNORE_ + argument: (preproc_arg) @constant.preprocessor.c) + (#eq? @_IGNORE_ "#undef")) + (system_lib_string) @string.quoted.other.lt-gt.include.c ((system_lib_string) @punctuation.definition.string.begin.c (#set! adjust.endAfterFirstMatchOf "^<")) @@ -48,6 +76,15 @@ (#set! capture.final true)) (primitive_type) @support.storage.type.builtin.c + +; When the user has typed `#define FOO`, the macro injection thinks that `FOO` +; is a type declaration (for some reason). This node structure seems to exist +; only in that unusual and incorrect scenario, so we'll stop it from happening +; so that it doesn't override the underlying `constant.other.c` scope. +(translation_unit + (type_identifier) @_IGNORE_ + (#set! capture.final)) + (type_identifier) @support.other.storage.type.c ; These types are all reserved words; if we see an identifier with this name, @@ -133,27 +170,31 @@ ; The "x" in `int x;` (declaration - declarator: (identifier) @variable.declaration.c) + declarator: (identifier) @variable.other.declaration.c) ; The "x" in `int x = y;` (init_declarator - declarator: (identifier) @variable.declaration.c) + declarator: (identifier) @variable.other.declaration.c) ; The "x" in `SomeType *x;` ; (Should work no matter how many pointers deep we are.) (pointer_declarator - declarator: [(identifier) (field_identifier)] @variable.declaration.pointer.c + declarator: [(identifier) (field_identifier)] @variable.other.declaration.pointer.c (#is? test.descendantOfType "declaration field_declaration")) +; An array declarator: the "table" in `int table[4];` +(array_declarator + declarator: (identifier) @variable.other.declaration.c) + ; A member of a struct. (field_declaration - (field_identifier) @variable.declaration.member.c) + (field_identifier) @variable.other.declaration.member.c) ; An attribute in a C99 struct designated initializer: ; the "foo" in `MY_TYPE a = { .foo = true }; (initializer_pair (field_designator - (field_identifier) @variable.declaration.member.c)) + (field_identifier) @variable.other.declaration.member.c)) ; (and the associated ".") (initializer_pair @@ -162,15 +203,15 @@ (field_declaration (pointer_declarator - (field_identifier) @variable.declaration.member.c)) + (field_identifier) @variable.other.declaration.member.c)) (field_declaration (array_declarator - (field_identifier) @variable.declaration.member.c)) + (field_identifier) @variable.other.declaration.member.c)) (init_declarator (pointer_declarator - (identifier) @variable.declaration.member.c)) + (identifier) @variable.other.declaration.member.c)) ; The "x" in `x = y;` (assignment_expression @@ -253,8 +294,19 @@ (false) ] @constant.language._TYPE_.c -((identifier) @constant.c - (#match? @constant.c "[_A-Z][_A-Z0-9]*$")) +; Don't try to scope (e.g.) `int FOO = 1` as a constant when the user types `=` +; but has not typed the value yet. +(ERROR + (identifier) @_IGNORE_ + (#set! capture.final)) + +; In most languages we wouldn't be making the assumption that an all-caps +; identifier should be treated as a constant. But those languages don't have +; macro preprocessors. The convention is decently strong in C/C++ that all-caps +; identifiers will refer to `#define`d things. +((identifier) @constant.other.c + (#match? @constant.other.c "[_A-Z][_A-Z0-9]*$") + (#set! capture.shy)) ; COMMENTS diff --git a/packages/language-c/grammars/tree-sitter-cpp/highlights.scm b/packages/language-c/grammars/tree-sitter-cpp/highlights.scm index bc3fc2c3c..ef65e50a4 100644 --- a/packages/language-c/grammars/tree-sitter-cpp/highlights.scm +++ b/packages/language-c/grammars/tree-sitter-cpp/highlights.scm @@ -13,33 +13,55 @@ "#define" @keyword.control.directive.define.cpp "#include" @keyword.control.directive.include.cpp -(["#if" "#ifdef" "#ifndef" "#endif" "#elif" "#else" "#define" "#include"] @punctuation.definition.directive.c +(["#if" "#ifdef" "#ifndef" "#endif" "#elif" "#else" "#define" "#include"] @punctuation.definition.directive.cpp (#set! adjust.endAfterFirstMatchOf "^#")) - -; This will match if the more specific rules above haven't matched. The -; anonymous nodes will match under ideal conditions, but might not be present -; if the parser is flummoxed. -((preproc_directive) @keyword.control.directive.c +; `preproc_directive` will be used when the parser doesn't recognize the +; directive as one of the above. It's permissive; `#afdfafsdfdfad` would be +; parsed as a `preproc_directive`. +; +; Hence this rule will match if the more specific rules above haven't matched. +; The anonymous nodes will match under ideal conditions, but might not be +; present even when they ought to be _if_ the parser is flummoxed; so this'll +; sometimes catch `#ifdef` and others. +((preproc_directive) @keyword.control.directive.cpp (#set! capture.shy true)) -((preproc_ifdef - (identifier) @entity.name.function.preprocessor.c - (#match? @entity.name.function.preprocessor.c "[a-zA-Z_$][\\w$]*"))) - -(preproc_function_def - (identifier) @entity.name.function.preprocessor.c - (#set! capture.final true)) +((preproc_directive) @punctuation.definition.directive.cpp + (#set! capture.shy true) + (#set! adjust.endAfterFirstMatchOf "^#")) +; Macro functions are definitely entities. (preproc_function_def (identifier) @entity.name.function.preprocessor.cpp - (#set! capture.final true) -) + (#set! capture.final true)) -(system_lib_string) @string.quoted.other.lt-gt.include.c -((system_lib_string) @punctuation.definition.string.begin.c +; Identifiers in macro definitions are definitely constants. +((preproc_def + name: (identifier) @constant.preprocessor.cpp)) + +; We can also safely treat identifiers as constants in `#ifdef`… +((preproc_ifdef + (identifier) @constant.preprocessor.cpp)) + +; …and `#if` and `#elif`… +(preproc_if + (binary_expression + (identifier) @constant.preprocessor.cpp)) +(preproc_elif + (binary_expression + (identifier) @constant.preprocessor.cpp)) + +; …and `#undef`. +((preproc_call + directive: (preproc_directive) @_IGNORE_ + argument: (preproc_arg) @constant.preprocessor.cpp) + (#eq? @_IGNORE_ "#undef")) + +(system_lib_string) @string.quoted.other.lt-gt.include.cpp +((system_lib_string) @punctuation.definition.string.begin.cpp (#set! adjust.endAfterFirstMatchOf "^<")) -((system_lib_string) @punctuation.definition.string.end.c +((system_lib_string) @punctuation.definition.string.end.cpp (#set! adjust.startBeforeFirstMatchOf ">$")) @@ -52,6 +74,13 @@ (type_identifier) @_IGNORE_ (#set! capture.final true)) +; When the user has typed `#define FOO`, the macro injection thinks that `FOO` +; is a type declaration (for some reason). This node structure seems to exist +; only in that unusual and incorrect scenario, so we'll stop it from happening +; so that it doesn't override the underlying `constant.other.c` scope. +(translation_unit + (type_identifier) @_IGNORE_ + (#set! capture.final)) (primitive_type) @support.type.builtin.cpp @@ -232,7 +261,7 @@ ; The "x" in `SomeType *x;` ; (Should work no matter how many pointers deep we are.) (pointer_declarator - declarator: [(identifier) (field_identifier)] @variable.declaration.pointer.c + declarator: [(identifier) (field_identifier)] @variable.declaration.pointer.cpp (#is? test.descendantOfType "declaration field_declaration")) ; A member of a struct. @@ -289,7 +318,7 @@ ; The "foo" in `const char *foo` within a parameter list. ; (Should work no matter how many pointers deep we are.) (pointer_declarator - declarator: [(identifier) (field_identifier)] @variable.parameter.pointer.c + declarator: [(identifier) (field_identifier)] @variable.parameter.pointer.cpp (#is? test.descendantOfType "parameter_declaration")) (parameter_declaration @@ -332,8 +361,19 @@ (false) ] @constant.language._TYPE_.cpp -((identifier) @constant.cpp - (#match? @constant.cpp "[_A-Z][_A-Z0-9]*$")) +; Don't try to scope (e.g.) `int FOO = 1` as a constant when the user types `=` +; but has not typed the value yet. +(ERROR + (identifier) @_IGNORE_ + (#set! capture.final)) + +; In most languages we wouldn't be making the assumption that an all-caps +; identifier should be treated as a constant. But those languages don't have +; macro preprocessors. The convention is decently strong in C/C++ that all-caps +; identifiers will refer to `#define`d things. +((identifier) @constant.other.cpp + (#match? @constant.other.cpp "[_A-Z][_A-Z0-9]*$") + (#set! capture.shy)) ; COMMENTS