Add analyze highlights script (#10855)
Adds a script to print all unique highlight keys for building syntax
themes.
Usage:
- `python script/analyze_highlights.py` OR
- `python script/analyze_highlights.py -v`
- Using the `-v` or `--verbose` arg will print each language that uses
each key.
Example output:
```
@attribute (6)
@boolean (5)
@charset (1)
@comment (19)
@comment.doc (3)
@comment.unused (2)
@constant (27)
@constant.builtin (15)
@constant.character (1)
@constructor (4)
@embedded (10)
@emphasis (1)
@emphasis.strong (1)
@escape (4)
@function (44)
@function.builtin (2)
@function.definition (2)
@function.method (22)
@function.method.builtin (3)
@function.special (4)
@function.special.definition (1)
@import (1)
@keyframes (1)
@keyword (32)
@label (2)
@link_text (1)
@link_uri (1)
@media (1)
@module (1)
@namespace (1)
@number (16)
@operator (24)
@property (11)
@property.json_key (1)
@punctuation (1)
@punctuation.bracket (28)
@punctuation.delimiter (12)
@punctuation.list_marker (1)
@punctuation.special (17)
@string (23)
@string.doc (1)
@string.escape (5)
@string.regex (7)
@string.special (4)
@string.special.symbol (2)
@supports (1)
@tag (14)
@text.literal (2)
@title (1)
@type (28)
@type.builtin (4)
@type.super (3)
@variable (5)
@variable.member (3)
@variable.parameter (4)
@variable.special (12)
Extension-only:
@tag.delimiter (1)
```
Verbose example output:
```
Shared:
@attribute (6) - [css, heex, javascript, tsx]
@boolean (5) - [javascript, proto, tsx, typescript, yaml]
@charset (1) - [css]
@comment (19) - [bash, c, cpp, css, elixir, erb, go, gomod, gowork, heex, javascript, json, proto, python, ruby, rust, tsx, typescript, yaml]
@comment.doc (3) - [elixir]
@comment.unused (2) - [elixir]
@constant (27) - [bash, c, cpp, elixir, heex, javascript, json, proto, python, ruby, rust, tsx, typescript]
@constant.builtin (15) - [elixir, go, javascript, python, ruby, tsx, typescript, yaml]
@constant.character (1) - [regex]
@constructor (4) - [tsx, typescript]
@embedded (10) - [bash, elixir, javascript, python, ruby, tsx, typescript]
@emphasis (1) - [markdown]
@emphasis.strong (1) - [markdown]
@escape (4) - [go, python, regex, ruby]
@function (44) - [bash, c, cpp, css, elixir, go, heex, javascript, python, rust, tsx, typescript]
@function.builtin (2) - [python]
@function.definition (2) - [rust]
@function.method (22) - [go, javascript, python, ruby, rust, tsx, typescript]
@function.method.builtin (3) - [ruby]
@function.special (4) - [c, cpp, rust]
@function.special.definition (1) - [rust]
@import (1) - [css]
@keyframes (1) - [css]
@keyword (32) - [bash, c, cpp, css, elixir, erb, go, gomod, gowork, heex, javascript, jsdoc, proto, python, ruby, rust, tsx, typescript]
@label (2) - [c, cpp]
@link_text (1) - [markdown]
@link_uri (1) - [markdown]
@media (1) - [css]
@module (1) - [heex]
@namespace (1) - [css]
@number (16) - [bash, c, cpp, css, elixir, go, javascript, json, proto, python, regex, ruby, rust, tsx, typescript, yaml]
@operator (24) - [bash, c, cpp, css, elixir, go, gomod, gowork, heex, javascript, proto, python, regex, ruby, tsx, typescript]
@property (11) - [bash, c, cpp, css, javascript, python, regex, rust, tsx, typescript, yaml]
@property.json_key (1) - [json]
@punctuation (1) - [elixir]
@punctuation.bracket (28) - [c, cpp, elixir, go, heex, javascript, json, proto, regex, ruby, rust, tsx, typescript, yaml]
@punctuation.delimiter (12) - [c, cpp, css, elixir, heex, javascript, proto, regex, ruby, tsx, typescript, yaml]
@punctuation.list_marker (1) - [markdown]
@punctuation.special (17) - [elixir, javascript, python, ruby, tsx, typescript, yaml]
@string (23) - [bash, c, cpp, css, elixir, go, gomod, gowork, heex, javascript, json, proto, python, regex, ruby, rust, tsx, typescript, yaml]
@string.doc (1) - [python]
@string.escape (5) - [elixir, javascript, tsx, typescript, yaml]
@string.regex (7) - [elixir, javascript, ruby, tsx, typescript]
@string.special (4) - [css, elixir]
@string.special.symbol (2) - [elixir, ruby]
@supports (1) - [css]
@tag (14) - [css, heex, javascript, tsx]
@text.literal (2) - [markdown]
@title (1) - [markdown]
@type (28) - [c, cpp, css, elixir, go, javascript, jsdoc, proto, python, ruby, rust, tsx, typescript, yaml]
@type.builtin (4) - [javascript, rust, tsx, typescript]
@type.super (3) - [ruby]
@variable (5) - [c, cpp, javascript, tsx, typescript]
@variable.member (3) - [go, ruby]
@variable.parameter (4) - [ruby]
@variable.special (12) - [cpp, css, javascript, ruby, rust, tsx, typescript]
Extension-only:
@tag.delimiter (1) - [astro]
```
Release Notes:
- N/A
---------
Co-authored-by: Joseph T. Lyons <JosephTLyons@gmail.com>
2024-04-22 18:51:06 +03:00
|
|
|
"""
|
|
|
|
This script analyzes all the highlight.scm files in our embedded languages and extensions.
|
|
|
|
It counts the number of unique instances of @{name} and the languages in which they are used.
|
|
|
|
|
|
|
|
This is useful to help avoid accidentally introducing new tags when appropriate ones already exist when adding new languages.
|
|
|
|
|
|
|
|
Flags:
|
|
|
|
-v, --verbose: Include a detailed list of languages for each tag found in the highlight.scm files.
|
|
|
|
"""
|
|
|
|
|
|
|
|
from collections import defaultdict
|
|
|
|
from pathlib import Path
|
|
|
|
from typing import Any
|
|
|
|
import argparse
|
|
|
|
import re
|
|
|
|
|
|
|
|
pattern = re.compile(r'@(?!_)[a-zA-Z_.]+')
|
|
|
|
|
|
|
|
def parse_arguments():
|
|
|
|
parser = argparse.ArgumentParser(description='Analyze highlight.scm files for unique instances and their languages.')
|
|
|
|
parser.add_argument('-v', '--verbose', action='store_true', help='Include a list of languages for each tag.')
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
|
|
def find_highlight_files(root_dir):
|
|
|
|
for path in Path(root_dir).rglob('highlights.scm'):
|
|
|
|
yield path
|
|
|
|
|
|
|
|
def count_instances(files):
|
|
|
|
instances: defaultdict[list[Any], dict[str, Any]] = defaultdict(lambda: {'count': 0, 'languages': set()})
|
|
|
|
for file_path in files:
|
|
|
|
language = file_path.parent.name
|
|
|
|
with open(file_path, "r") as file:
|
|
|
|
text = file.read()
|
|
|
|
matches = pattern.findall(text)
|
|
|
|
for match in matches:
|
|
|
|
instances[match]['count'] += 1
|
|
|
|
instances[match]['languages'].add(language)
|
|
|
|
return instances
|
|
|
|
|
|
|
|
def print_instances(instances, verbose=False):
|
|
|
|
for item, details in sorted(instances.items(), key=lambda x: x[0]):
|
|
|
|
languages = ', '.join(sorted(details['languages']))
|
|
|
|
if verbose:
|
|
|
|
print(f"{item} ({details['count']}) - [{languages}]")
|
|
|
|
else:
|
|
|
|
print(f"{item} ({details['count']})")
|
|
|
|
|
|
|
|
def main():
|
|
|
|
args = parse_arguments()
|
|
|
|
|
|
|
|
base_dir = Path(__file__).parent.parent
|
|
|
|
core_path = base_dir / 'crates/languages/src'
|
2024-04-22 20:05:41 +03:00
|
|
|
extension_path = base_dir / 'extensions/'
|
Add analyze highlights script (#10855)
Adds a script to print all unique highlight keys for building syntax
themes.
Usage:
- `python script/analyze_highlights.py` OR
- `python script/analyze_highlights.py -v`
- Using the `-v` or `--verbose` arg will print each language that uses
each key.
Example output:
```
@attribute (6)
@boolean (5)
@charset (1)
@comment (19)
@comment.doc (3)
@comment.unused (2)
@constant (27)
@constant.builtin (15)
@constant.character (1)
@constructor (4)
@embedded (10)
@emphasis (1)
@emphasis.strong (1)
@escape (4)
@function (44)
@function.builtin (2)
@function.definition (2)
@function.method (22)
@function.method.builtin (3)
@function.special (4)
@function.special.definition (1)
@import (1)
@keyframes (1)
@keyword (32)
@label (2)
@link_text (1)
@link_uri (1)
@media (1)
@module (1)
@namespace (1)
@number (16)
@operator (24)
@property (11)
@property.json_key (1)
@punctuation (1)
@punctuation.bracket (28)
@punctuation.delimiter (12)
@punctuation.list_marker (1)
@punctuation.special (17)
@string (23)
@string.doc (1)
@string.escape (5)
@string.regex (7)
@string.special (4)
@string.special.symbol (2)
@supports (1)
@tag (14)
@text.literal (2)
@title (1)
@type (28)
@type.builtin (4)
@type.super (3)
@variable (5)
@variable.member (3)
@variable.parameter (4)
@variable.special (12)
Extension-only:
@tag.delimiter (1)
```
Verbose example output:
```
Shared:
@attribute (6) - [css, heex, javascript, tsx]
@boolean (5) - [javascript, proto, tsx, typescript, yaml]
@charset (1) - [css]
@comment (19) - [bash, c, cpp, css, elixir, erb, go, gomod, gowork, heex, javascript, json, proto, python, ruby, rust, tsx, typescript, yaml]
@comment.doc (3) - [elixir]
@comment.unused (2) - [elixir]
@constant (27) - [bash, c, cpp, elixir, heex, javascript, json, proto, python, ruby, rust, tsx, typescript]
@constant.builtin (15) - [elixir, go, javascript, python, ruby, tsx, typescript, yaml]
@constant.character (1) - [regex]
@constructor (4) - [tsx, typescript]
@embedded (10) - [bash, elixir, javascript, python, ruby, tsx, typescript]
@emphasis (1) - [markdown]
@emphasis.strong (1) - [markdown]
@escape (4) - [go, python, regex, ruby]
@function (44) - [bash, c, cpp, css, elixir, go, heex, javascript, python, rust, tsx, typescript]
@function.builtin (2) - [python]
@function.definition (2) - [rust]
@function.method (22) - [go, javascript, python, ruby, rust, tsx, typescript]
@function.method.builtin (3) - [ruby]
@function.special (4) - [c, cpp, rust]
@function.special.definition (1) - [rust]
@import (1) - [css]
@keyframes (1) - [css]
@keyword (32) - [bash, c, cpp, css, elixir, erb, go, gomod, gowork, heex, javascript, jsdoc, proto, python, ruby, rust, tsx, typescript]
@label (2) - [c, cpp]
@link_text (1) - [markdown]
@link_uri (1) - [markdown]
@media (1) - [css]
@module (1) - [heex]
@namespace (1) - [css]
@number (16) - [bash, c, cpp, css, elixir, go, javascript, json, proto, python, regex, ruby, rust, tsx, typescript, yaml]
@operator (24) - [bash, c, cpp, css, elixir, go, gomod, gowork, heex, javascript, proto, python, regex, ruby, tsx, typescript]
@property (11) - [bash, c, cpp, css, javascript, python, regex, rust, tsx, typescript, yaml]
@property.json_key (1) - [json]
@punctuation (1) - [elixir]
@punctuation.bracket (28) - [c, cpp, elixir, go, heex, javascript, json, proto, regex, ruby, rust, tsx, typescript, yaml]
@punctuation.delimiter (12) - [c, cpp, css, elixir, heex, javascript, proto, regex, ruby, tsx, typescript, yaml]
@punctuation.list_marker (1) - [markdown]
@punctuation.special (17) - [elixir, javascript, python, ruby, tsx, typescript, yaml]
@string (23) - [bash, c, cpp, css, elixir, go, gomod, gowork, heex, javascript, json, proto, python, regex, ruby, rust, tsx, typescript, yaml]
@string.doc (1) - [python]
@string.escape (5) - [elixir, javascript, tsx, typescript, yaml]
@string.regex (7) - [elixir, javascript, ruby, tsx, typescript]
@string.special (4) - [css, elixir]
@string.special.symbol (2) - [elixir, ruby]
@supports (1) - [css]
@tag (14) - [css, heex, javascript, tsx]
@text.literal (2) - [markdown]
@title (1) - [markdown]
@type (28) - [c, cpp, css, elixir, go, javascript, jsdoc, proto, python, ruby, rust, tsx, typescript, yaml]
@type.builtin (4) - [javascript, rust, tsx, typescript]
@type.super (3) - [ruby]
@variable (5) - [c, cpp, javascript, tsx, typescript]
@variable.member (3) - [go, ruby]
@variable.parameter (4) - [ruby]
@variable.special (12) - [cpp, css, javascript, ruby, rust, tsx, typescript]
Extension-only:
@tag.delimiter (1) - [astro]
```
Release Notes:
- N/A
---------
Co-authored-by: Joseph T. Lyons <JosephTLyons@gmail.com>
2024-04-22 18:51:06 +03:00
|
|
|
|
|
|
|
core_instances = count_instances(find_highlight_files(core_path))
|
|
|
|
extension_instances = count_instances(find_highlight_files(extension_path))
|
|
|
|
|
|
|
|
unique_extension_instances = {k: v for k, v in extension_instances.items() if k not in core_instances}
|
|
|
|
|
|
|
|
print('Shared:\n')
|
|
|
|
print_instances(core_instances, args.verbose)
|
|
|
|
|
|
|
|
if unique_extension_instances:
|
|
|
|
print('\nExtension-only:\n')
|
|
|
|
print_instances(unique_extension_instances, args.verbose)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|