mirror of
https://github.com/zed-industries/zed.git
synced 2024-11-07 20:39:04 +03:00
Add analyze highlights script (#10855)
Adds a script to print all unique highlight keys for building syntax themes. Usage: - `python script/analyze_highlights.py` OR - `python script/analyze_highlights.py -v` - Using the `-v` or `--verbose` arg will print each language that uses each key. Example output: ``` @attribute (6) @boolean (5) @charset (1) @comment (19) @comment.doc (3) @comment.unused (2) @constant (27) @constant.builtin (15) @constant.character (1) @constructor (4) @embedded (10) @emphasis (1) @emphasis.strong (1) @escape (4) @function (44) @function.builtin (2) @function.definition (2) @function.method (22) @function.method.builtin (3) @function.special (4) @function.special.definition (1) @import (1) @keyframes (1) @keyword (32) @label (2) @link_text (1) @link_uri (1) @media (1) @module (1) @namespace (1) @number (16) @operator (24) @property (11) @property.json_key (1) @punctuation (1) @punctuation.bracket (28) @punctuation.delimiter (12) @punctuation.list_marker (1) @punctuation.special (17) @string (23) @string.doc (1) @string.escape (5) @string.regex (7) @string.special (4) @string.special.symbol (2) @supports (1) @tag (14) @text.literal (2) @title (1) @type (28) @type.builtin (4) @type.super (3) @variable (5) @variable.member (3) @variable.parameter (4) @variable.special (12) Extension-only: @tag.delimiter (1) ``` Verbose example output: ``` Shared: @attribute (6) - [css, heex, javascript, tsx] @boolean (5) - [javascript, proto, tsx, typescript, yaml] @charset (1) - [css] @comment (19) - [bash, c, cpp, css, elixir, erb, go, gomod, gowork, heex, javascript, json, proto, python, ruby, rust, tsx, typescript, yaml] @comment.doc (3) - [elixir] @comment.unused (2) - [elixir] @constant (27) - [bash, c, cpp, elixir, heex, javascript, json, proto, python, ruby, rust, tsx, typescript] @constant.builtin (15) - [elixir, go, javascript, python, ruby, tsx, typescript, yaml] @constant.character (1) - [regex] @constructor (4) - [tsx, typescript] @embedded (10) - [bash, elixir, javascript, python, ruby, tsx, typescript] @emphasis (1) - [markdown] @emphasis.strong (1) - [markdown] @escape (4) - [go, python, regex, ruby] @function (44) - [bash, c, cpp, css, elixir, go, heex, javascript, python, rust, tsx, typescript] @function.builtin (2) - [python] @function.definition (2) - [rust] @function.method (22) - [go, javascript, python, ruby, rust, tsx, typescript] @function.method.builtin (3) - [ruby] @function.special (4) - [c, cpp, rust] @function.special.definition (1) - [rust] @import (1) - [css] @keyframes (1) - [css] @keyword (32) - [bash, c, cpp, css, elixir, erb, go, gomod, gowork, heex, javascript, jsdoc, proto, python, ruby, rust, tsx, typescript] @label (2) - [c, cpp] @link_text (1) - [markdown] @link_uri (1) - [markdown] @media (1) - [css] @module (1) - [heex] @namespace (1) - [css] @number (16) - [bash, c, cpp, css, elixir, go, javascript, json, proto, python, regex, ruby, rust, tsx, typescript, yaml] @operator (24) - [bash, c, cpp, css, elixir, go, gomod, gowork, heex, javascript, proto, python, regex, ruby, tsx, typescript] @property (11) - [bash, c, cpp, css, javascript, python, regex, rust, tsx, typescript, yaml] @property.json_key (1) - [json] @punctuation (1) - [elixir] @punctuation.bracket (28) - [c, cpp, elixir, go, heex, javascript, json, proto, regex, ruby, rust, tsx, typescript, yaml] @punctuation.delimiter (12) - [c, cpp, css, elixir, heex, javascript, proto, regex, ruby, tsx, typescript, yaml] @punctuation.list_marker (1) - [markdown] @punctuation.special (17) - [elixir, javascript, python, ruby, tsx, typescript, yaml] @string (23) - [bash, c, cpp, css, elixir, go, gomod, gowork, heex, javascript, json, proto, python, regex, ruby, rust, tsx, typescript, yaml] @string.doc (1) - [python] @string.escape (5) - [elixir, javascript, tsx, typescript, yaml] @string.regex (7) - [elixir, javascript, ruby, tsx, typescript] @string.special (4) - [css, elixir] @string.special.symbol (2) - [elixir, ruby] @supports (1) - [css] @tag (14) - [css, heex, javascript, tsx] @text.literal (2) - [markdown] @title (1) - [markdown] @type (28) - [c, cpp, css, elixir, go, javascript, jsdoc, proto, python, ruby, rust, tsx, typescript, yaml] @type.builtin (4) - [javascript, rust, tsx, typescript] @type.super (3) - [ruby] @variable (5) - [c, cpp, javascript, tsx, typescript] @variable.member (3) - [go, ruby] @variable.parameter (4) - [ruby] @variable.special (12) - [cpp, css, javascript, ruby, rust, tsx, typescript] Extension-only: @tag.delimiter (1) - [astro] ``` Release Notes: - N/A --------- Co-authored-by: Joseph T. Lyons <JosephTLyons@gmail.com>
This commit is contained in:
parent
ee531b6f4d
commit
189cece03e
68
script/analyze_highlights.py
Normal file
68
script/analyze_highlights.py
Normal file
@ -0,0 +1,68 @@
|
||||
"""
|
||||
This script analyzes all the highlight.scm files in our embedded languages and extensions.
|
||||
It counts the number of unique instances of @{name} and the languages in which they are used.
|
||||
|
||||
This is useful to help avoid accidentally introducing new tags when appropriate ones already exist when adding new languages.
|
||||
|
||||
Flags:
|
||||
-v, --verbose: Include a detailed list of languages for each tag found in the highlight.scm files.
|
||||
"""
|
||||
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
import argparse
|
||||
import re
|
||||
|
||||
pattern = re.compile(r'@(?!_)[a-zA-Z_.]+')
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(description='Analyze highlight.scm files for unique instances and their languages.')
|
||||
parser.add_argument('-v', '--verbose', action='store_true', help='Include a list of languages for each tag.')
|
||||
return parser.parse_args()
|
||||
|
||||
def find_highlight_files(root_dir):
|
||||
for path in Path(root_dir).rglob('highlights.scm'):
|
||||
yield path
|
||||
|
||||
def count_instances(files):
|
||||
instances: defaultdict[list[Any], dict[str, Any]] = defaultdict(lambda: {'count': 0, 'languages': set()})
|
||||
for file_path in files:
|
||||
language = file_path.parent.name
|
||||
with open(file_path, "r") as file:
|
||||
text = file.read()
|
||||
matches = pattern.findall(text)
|
||||
for match in matches:
|
||||
instances[match]['count'] += 1
|
||||
instances[match]['languages'].add(language)
|
||||
return instances
|
||||
|
||||
def print_instances(instances, verbose=False):
|
||||
for item, details in sorted(instances.items(), key=lambda x: x[0]):
|
||||
languages = ', '.join(sorted(details['languages']))
|
||||
if verbose:
|
||||
print(f"{item} ({details['count']}) - [{languages}]")
|
||||
else:
|
||||
print(f"{item} ({details['count']})")
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
|
||||
base_dir = Path(__file__).parent.parent
|
||||
core_path = base_dir / 'crates/languages/src'
|
||||
extension_path = base_dir / 'extensions/astro/languages'
|
||||
|
||||
core_instances = count_instances(find_highlight_files(core_path))
|
||||
extension_instances = count_instances(find_highlight_files(extension_path))
|
||||
|
||||
unique_extension_instances = {k: v for k, v in extension_instances.items() if k not in core_instances}
|
||||
|
||||
print('Shared:\n')
|
||||
print_instances(core_instances, args.verbose)
|
||||
|
||||
if unique_extension_instances:
|
||||
print('\nExtension-only:\n')
|
||||
print_instances(unique_extension_instances, args.verbose)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue
Block a user