From 55cc7bb8681145fe4feb47cd73119c1d52f5c6f4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 1 Feb 2022 13:41:41 -0800 Subject: [PATCH] Reimplement snippet parsing using recursive descent --- Cargo.lock | 10 - crates/snippet/Cargo.toml | 2 - crates/snippet/grammar/Cargo.toml | 26 - crates/snippet/grammar/binding.gyp | 19 - .../snippet/grammar/bindings/node/binding.cc | 28 - crates/snippet/grammar/bindings/node/index.js | 19 - crates/snippet/grammar/bindings/rust/build.rs | 40 -- crates/snippet/grammar/bindings/rust/lib.rs | 52 -- crates/snippet/grammar/grammar.js | 26 - crates/snippet/grammar/package.json | 19 - crates/snippet/grammar/src/grammar.json | 133 ----- crates/snippet/grammar/src/node-types.json | 84 --- crates/snippet/grammar/src/parser.c | 545 ------------------ .../snippet/grammar/src/tree_sitter/parser.h | 224 ------- crates/snippet/src/snippet.rs | 152 +++-- 15 files changed, 90 insertions(+), 1289 deletions(-) delete mode 100644 crates/snippet/grammar/Cargo.toml delete mode 100644 crates/snippet/grammar/binding.gyp delete mode 100644 crates/snippet/grammar/bindings/node/binding.cc delete mode 100644 crates/snippet/grammar/bindings/node/index.js delete mode 100644 crates/snippet/grammar/bindings/rust/build.rs delete mode 100644 crates/snippet/grammar/bindings/rust/lib.rs delete mode 100644 crates/snippet/grammar/grammar.js delete mode 100644 crates/snippet/grammar/package.json delete mode 100644 crates/snippet/grammar/src/grammar.json delete mode 100644 crates/snippet/grammar/src/node-types.json delete mode 100644 crates/snippet/grammar/src/parser.c delete mode 100644 crates/snippet/grammar/src/tree_sitter/parser.h diff --git a/Cargo.lock b/Cargo.lock index 53d7651e32..8f4ebb8b89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4424,8 +4424,6 @@ version = "0.1.0" dependencies = [ "anyhow", "smallvec", - "tree-sitter", - "tree-sitter-snippet", ] [[package]] @@ -5213,14 +5211,6 @@ dependencies = [ "tree-sitter", ] -[[package]] -name = "tree-sitter-snippet" -version = "0.0.1" -dependencies = [ - "cc", - "tree-sitter", -] - [[package]] name = "ttf-parser" version = "0.9.0" diff --git a/crates/snippet/Cargo.toml b/crates/snippet/Cargo.toml index daba92f848..b3712cd6a0 100644 --- a/crates/snippet/Cargo.toml +++ b/crates/snippet/Cargo.toml @@ -9,5 +9,3 @@ path = "src/snippet.rs" [dependencies] anyhow = "1.0" smallvec = { version = "1.6", features = ["union"] } -tree-sitter = "0.20" -tree-sitter-snippet = { path = "./grammar" } diff --git a/crates/snippet/grammar/Cargo.toml b/crates/snippet/grammar/Cargo.toml deleted file mode 100644 index 83defad879..0000000000 --- a/crates/snippet/grammar/Cargo.toml +++ /dev/null @@ -1,26 +0,0 @@ -[package] -name = "tree-sitter-snippet" -description = "snippet grammar for the tree-sitter parsing library" -version = "0.0.1" -keywords = ["incremental", "parsing", "snippet"] -categories = ["parsing", "text-editors"] -repository = "https://github.com/tree-sitter/tree-sitter-snippet" -edition = "2018" -license = "MIT" - -build = "bindings/rust/build.rs" -include = [ - "bindings/rust/*", - "grammar.js", - "queries/*", - "src/*", -] - -[lib] -path = "bindings/rust/lib.rs" - -[dependencies] -tree-sitter = "~0.20" - -[build-dependencies] -cc = "1.0" diff --git a/crates/snippet/grammar/binding.gyp b/crates/snippet/grammar/binding.gyp deleted file mode 100644 index a99fa70f98..0000000000 --- a/crates/snippet/grammar/binding.gyp +++ /dev/null @@ -1,19 +0,0 @@ -{ - "targets": [ - { - "target_name": "tree_sitter_snippet_binding", - "include_dirs": [ - " -#include "nan.h" - -using namespace v8; - -extern "C" TSLanguage * tree_sitter_snippet(); - -namespace { - -NAN_METHOD(New) {} - -void Init(Local exports, Local module) { - Local tpl = Nan::New(New); - tpl->SetClassName(Nan::New("Language").ToLocalChecked()); - tpl->InstanceTemplate()->SetInternalFieldCount(1); - - Local constructor = Nan::GetFunction(tpl).ToLocalChecked(); - Local instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); - Nan::SetInternalFieldPointer(instance, 0, tree_sitter_snippet()); - - Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("snippet").ToLocalChecked()); - Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance); -} - -NODE_MODULE(tree_sitter_snippet_binding, Init) - -} // namespace diff --git a/crates/snippet/grammar/bindings/node/index.js b/crates/snippet/grammar/bindings/node/index.js deleted file mode 100644 index c5ea00c7af..0000000000 --- a/crates/snippet/grammar/bindings/node/index.js +++ /dev/null @@ -1,19 +0,0 @@ -try { - module.exports = require("../../build/Release/tree_sitter_snippet_binding"); -} catch (error1) { - if (error1.code !== 'MODULE_NOT_FOUND') { - throw error1; - } - try { - module.exports = require("../../build/Debug/tree_sitter_snippet_binding"); - } catch (error2) { - if (error2.code !== 'MODULE_NOT_FOUND') { - throw error2; - } - throw error1 - } -} - -try { - module.exports.nodeTypeInfo = require("../../src/node-types.json"); -} catch (_) {} diff --git a/crates/snippet/grammar/bindings/rust/build.rs b/crates/snippet/grammar/bindings/rust/build.rs deleted file mode 100644 index c6061f0995..0000000000 --- a/crates/snippet/grammar/bindings/rust/build.rs +++ /dev/null @@ -1,40 +0,0 @@ -fn main() { - let src_dir = std::path::Path::new("src"); - - let mut c_config = cc::Build::new(); - c_config.include(&src_dir); - c_config - .flag_if_supported("-Wno-unused-parameter") - .flag_if_supported("-Wno-unused-but-set-variable") - .flag_if_supported("-Wno-trigraphs"); - let parser_path = src_dir.join("parser.c"); - c_config.file(&parser_path); - - // If your language uses an external scanner written in C, - // then include this block of code: - - /* - let scanner_path = src_dir.join("scanner.c"); - c_config.file(&scanner_path); - println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); - */ - - c_config.compile("parser"); - println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); - - // If your language uses an external scanner written in C++, - // then include this block of code: - - /* - let mut cpp_config = cc::Build::new(); - cpp_config.cpp(true); - cpp_config.include(&src_dir); - cpp_config - .flag_if_supported("-Wno-unused-parameter") - .flag_if_supported("-Wno-unused-but-set-variable"); - let scanner_path = src_dir.join("scanner.cc"); - cpp_config.file(&scanner_path); - cpp_config.compile("scanner"); - println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); - */ -} diff --git a/crates/snippet/grammar/bindings/rust/lib.rs b/crates/snippet/grammar/bindings/rust/lib.rs deleted file mode 100644 index 6809635413..0000000000 --- a/crates/snippet/grammar/bindings/rust/lib.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! This crate provides snippet language support for the [tree-sitter][] parsing library. -//! -//! Typically, you will use the [language][language func] function to add this language to a -//! tree-sitter [Parser][], and then use the parser to parse some code: -//! -//! ``` -//! let code = ""; -//! let mut parser = tree_sitter::Parser::new(); -//! parser.set_language(tree_sitter_snippet::language()).expect("Error loading snippet grammar"); -//! let tree = parser.parse(code, None).unwrap(); -//! ``` -//! -//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html -//! [language func]: fn.language.html -//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html -//! [tree-sitter]: https://tree-sitter.github.io/ - -use tree_sitter::Language; - -extern "C" { - fn tree_sitter_snippet() -> Language; -} - -/// Get the tree-sitter [Language][] for this grammar. -/// -/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html -pub fn language() -> Language { - unsafe { tree_sitter_snippet() } -} - -/// The content of the [`node-types.json`][] file for this grammar. -/// -/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types -pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json"); - -// Uncomment these to include any queries that this grammar contains - -// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm"); -// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm"); -// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm"); -// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm"); - -#[cfg(test)] -mod tests { - #[test] - fn test_can_load_grammar() { - let mut parser = tree_sitter::Parser::new(); - parser - .set_language(super::language()) - .expect("Error loading snippet language"); - } -} diff --git a/crates/snippet/grammar/grammar.js b/crates/snippet/grammar/grammar.js deleted file mode 100644 index 25dedf4aa0..0000000000 --- a/crates/snippet/grammar/grammar.js +++ /dev/null @@ -1,26 +0,0 @@ -module.exports = grammar({ - name: 'snippet', - - rules: { - snippet: $ => repeat1($._any), - - _any: $ => choice( - $.tabstop, - $.placeholder, - $.text - ), - - tabstop: $ => choice( - seq('$', $.int), - seq('${', $.int, '}'), - ), - - placeholder: $ => seq('${', $.int, ':', $.snippet, '}'), - - int: $ => /[0-9]+/, - - text: $ => choice($._raw_curly, $._plain_text), - _raw_curly: $ => token(prec(-1, /}+/)), - _plain_text: $ => /([^$}]|\\[$\\}])+/, - } -}) \ No newline at end of file diff --git a/crates/snippet/grammar/package.json b/crates/snippet/grammar/package.json deleted file mode 100644 index 817cb1cae1..0000000000 --- a/crates/snippet/grammar/package.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "name": "tree-sitter-snippet", - "version": "0.0.1", - "description": "snippet grammar for tree-sitter", - "main": "bindings/node", - "keywords": [ - "parsing", - "incremental" - ], - "dependencies": { - "nan": "^2.12.1" - }, - "devDependencies": { - "tree-sitter-cli": "^0.20.4" - }, - "scripts": { - "test": "tree-sitter test" - } -} diff --git a/crates/snippet/grammar/src/grammar.json b/crates/snippet/grammar/src/grammar.json deleted file mode 100644 index 0d6aa3acf3..0000000000 --- a/crates/snippet/grammar/src/grammar.json +++ /dev/null @@ -1,133 +0,0 @@ -{ - "name": "snippet", - "rules": { - "snippet": { - "type": "REPEAT1", - "content": { - "type": "SYMBOL", - "name": "_any" - } - }, - "_any": { - "type": "CHOICE", - "members": [ - { - "type": "SYMBOL", - "name": "tabstop" - }, - { - "type": "SYMBOL", - "name": "placeholder" - }, - { - "type": "SYMBOL", - "name": "text" - } - ] - }, - "tabstop": { - "type": "CHOICE", - "members": [ - { - "type": "SEQ", - "members": [ - { - "type": "STRING", - "value": "$" - }, - { - "type": "SYMBOL", - "name": "int" - } - ] - }, - { - "type": "SEQ", - "members": [ - { - "type": "STRING", - "value": "${" - }, - { - "type": "SYMBOL", - "name": "int" - }, - { - "type": "STRING", - "value": "}" - } - ] - } - ] - }, - "placeholder": { - "type": "SEQ", - "members": [ - { - "type": "STRING", - "value": "${" - }, - { - "type": "SYMBOL", - "name": "int" - }, - { - "type": "STRING", - "value": ":" - }, - { - "type": "SYMBOL", - "name": "snippet" - }, - { - "type": "STRING", - "value": "}" - } - ] - }, - "int": { - "type": "PATTERN", - "value": "[0-9]+" - }, - "text": { - "type": "CHOICE", - "members": [ - { - "type": "SYMBOL", - "name": "_raw_curly" - }, - { - "type": "SYMBOL", - "name": "_plain_text" - } - ] - }, - "_raw_curly": { - "type": "TOKEN", - "content": { - "type": "PREC", - "value": -1, - "content": { - "type": "PATTERN", - "value": "}+" - } - } - }, - "_plain_text": { - "type": "PATTERN", - "value": "([^$}]|\\\\[$\\\\}])+" - } - }, - "extras": [ - { - "type": "PATTERN", - "value": "\\s" - } - ], - "conflicts": [], - "precedences": [], - "externals": [], - "inline": [], - "supertypes": [] -} - diff --git a/crates/snippet/grammar/src/node-types.json b/crates/snippet/grammar/src/node-types.json deleted file mode 100644 index ea5dde3575..0000000000 --- a/crates/snippet/grammar/src/node-types.json +++ /dev/null @@ -1,84 +0,0 @@ -[ - { - "type": "placeholder", - "named": true, - "fields": {}, - "children": { - "multiple": true, - "required": true, - "types": [ - { - "type": "int", - "named": true - }, - { - "type": "snippet", - "named": true - } - ] - } - }, - { - "type": "snippet", - "named": true, - "fields": {}, - "children": { - "multiple": true, - "required": true, - "types": [ - { - "type": "placeholder", - "named": true - }, - { - "type": "tabstop", - "named": true - }, - { - "type": "text", - "named": true - } - ] - } - }, - { - "type": "tabstop", - "named": true, - "fields": {}, - "children": { - "multiple": false, - "required": true, - "types": [ - { - "type": "int", - "named": true - } - ] - } - }, - { - "type": "text", - "named": true, - "fields": {} - }, - { - "type": "$", - "named": false - }, - { - "type": "${", - "named": false - }, - { - "type": ":", - "named": false - }, - { - "type": "int", - "named": true - }, - { - "type": "}", - "named": false - } -] \ No newline at end of file diff --git a/crates/snippet/grammar/src/parser.c b/crates/snippet/grammar/src/parser.c deleted file mode 100644 index 00c34b67dc..0000000000 --- a/crates/snippet/grammar/src/parser.c +++ /dev/null @@ -1,545 +0,0 @@ -#include - -#if defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmissing-field-initializers" -#endif - -#define LANGUAGE_VERSION 13 -#define STATE_COUNT 25 -#define LARGE_STATE_COUNT 8 -#define SYMBOL_COUNT 14 -#define ALIAS_COUNT 0 -#define TOKEN_COUNT 8 -#define EXTERNAL_TOKEN_COUNT 0 -#define FIELD_COUNT 0 -#define MAX_ALIAS_SEQUENCE_LENGTH 5 -#define PRODUCTION_ID_COUNT 1 - -enum { - anon_sym_DOLLAR = 1, - anon_sym_DOLLAR_LBRACE = 2, - anon_sym_RBRACE = 3, - anon_sym_COLON = 4, - sym_int = 5, - sym__raw_curly = 6, - sym__plain_text = 7, - sym_snippet = 8, - sym__any = 9, - sym_tabstop = 10, - sym_placeholder = 11, - sym_text = 12, - aux_sym_snippet_repeat1 = 13, -}; - -static const char * const ts_symbol_names[] = { - [ts_builtin_sym_end] = "end", - [anon_sym_DOLLAR] = "$", - [anon_sym_DOLLAR_LBRACE] = "${", - [anon_sym_RBRACE] = "}", - [anon_sym_COLON] = ":", - [sym_int] = "int", - [sym__raw_curly] = "_raw_curly", - [sym__plain_text] = "_plain_text", - [sym_snippet] = "snippet", - [sym__any] = "_any", - [sym_tabstop] = "tabstop", - [sym_placeholder] = "placeholder", - [sym_text] = "text", - [aux_sym_snippet_repeat1] = "snippet_repeat1", -}; - -static const TSSymbol ts_symbol_map[] = { - [ts_builtin_sym_end] = ts_builtin_sym_end, - [anon_sym_DOLLAR] = anon_sym_DOLLAR, - [anon_sym_DOLLAR_LBRACE] = anon_sym_DOLLAR_LBRACE, - [anon_sym_RBRACE] = anon_sym_RBRACE, - [anon_sym_COLON] = anon_sym_COLON, - [sym_int] = sym_int, - [sym__raw_curly] = sym__raw_curly, - [sym__plain_text] = sym__plain_text, - [sym_snippet] = sym_snippet, - [sym__any] = sym__any, - [sym_tabstop] = sym_tabstop, - [sym_placeholder] = sym_placeholder, - [sym_text] = sym_text, - [aux_sym_snippet_repeat1] = aux_sym_snippet_repeat1, -}; - -static const TSSymbolMetadata ts_symbol_metadata[] = { - [ts_builtin_sym_end] = { - .visible = false, - .named = true, - }, - [anon_sym_DOLLAR] = { - .visible = true, - .named = false, - }, - [anon_sym_DOLLAR_LBRACE] = { - .visible = true, - .named = false, - }, - [anon_sym_RBRACE] = { - .visible = true, - .named = false, - }, - [anon_sym_COLON] = { - .visible = true, - .named = false, - }, - [sym_int] = { - .visible = true, - .named = true, - }, - [sym__raw_curly] = { - .visible = false, - .named = true, - }, - [sym__plain_text] = { - .visible = false, - .named = true, - }, - [sym_snippet] = { - .visible = true, - .named = true, - }, - [sym__any] = { - .visible = false, - .named = true, - }, - [sym_tabstop] = { - .visible = true, - .named = true, - }, - [sym_placeholder] = { - .visible = true, - .named = true, - }, - [sym_text] = { - .visible = true, - .named = true, - }, - [aux_sym_snippet_repeat1] = { - .visible = false, - .named = false, - }, -}; - -static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = { - [0] = {0}, -}; - -static const uint16_t ts_non_terminal_alias_map[] = { - 0, -}; - -static bool ts_lex(TSLexer *lexer, TSStateId state) { - START_LEXER(); - eof = lexer->eof(lexer); - switch (state) { - case 0: - if (eof) ADVANCE(3); - if (lookahead == '$') ADVANCE(4); - if (lookahead == ':') ADVANCE(7); - if (lookahead == '}') ADVANCE(6); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') SKIP(0) - if (('0' <= lookahead && lookahead <= '9')) ADVANCE(8); - END_STATE(); - case 1: - if (lookahead == '$') ADVANCE(4); - if (lookahead == '\\') ADVANCE(12); - if (lookahead == '}') ADVANCE(6); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') ADVANCE(10); - if (lookahead != 0) ADVANCE(11); - END_STATE(); - case 2: - if (eof) ADVANCE(3); - if (lookahead == '$') ADVANCE(4); - if (lookahead == '\\') ADVANCE(12); - if (lookahead == '}') ADVANCE(9); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') ADVANCE(10); - if (lookahead != 0) ADVANCE(11); - END_STATE(); - case 3: - ACCEPT_TOKEN(ts_builtin_sym_end); - END_STATE(); - case 4: - ACCEPT_TOKEN(anon_sym_DOLLAR); - if (lookahead == '{') ADVANCE(5); - END_STATE(); - case 5: - ACCEPT_TOKEN(anon_sym_DOLLAR_LBRACE); - END_STATE(); - case 6: - ACCEPT_TOKEN(anon_sym_RBRACE); - END_STATE(); - case 7: - ACCEPT_TOKEN(anon_sym_COLON); - END_STATE(); - case 8: - ACCEPT_TOKEN(sym_int); - if (('0' <= lookahead && lookahead <= '9')) ADVANCE(8); - END_STATE(); - case 9: - ACCEPT_TOKEN(sym__raw_curly); - if (lookahead == '}') ADVANCE(9); - END_STATE(); - case 10: - ACCEPT_TOKEN(sym__plain_text); - if (lookahead == '\\') ADVANCE(12); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') ADVANCE(10); - if (lookahead != 0 && - lookahead != '$' && - lookahead != '}') ADVANCE(11); - END_STATE(); - case 11: - ACCEPT_TOKEN(sym__plain_text); - if (lookahead == '\\') ADVANCE(12); - if (lookahead != 0 && - lookahead != '$' && - lookahead != '}') ADVANCE(11); - END_STATE(); - case 12: - ACCEPT_TOKEN(sym__plain_text); - if (lookahead == '\\') ADVANCE(12); - if (lookahead != 0) ADVANCE(11); - END_STATE(); - default: - return false; - } -} - -static const TSLexMode ts_lex_modes[STATE_COUNT] = { - [0] = {.lex_state = 0}, - [1] = {.lex_state = 2}, - [2] = {.lex_state = 2}, - [3] = {.lex_state = 2}, - [4] = {.lex_state = 1}, - [5] = {.lex_state = 1}, - [6] = {.lex_state = 2}, - [7] = {.lex_state = 2}, - [8] = {.lex_state = 1}, - [9] = {.lex_state = 2}, - [10] = {.lex_state = 2}, - [11] = {.lex_state = 2}, - [12] = {.lex_state = 1}, - [13] = {.lex_state = 1}, - [14] = {.lex_state = 2}, - [15] = {.lex_state = 1}, - [16] = {.lex_state = 0}, - [17] = {.lex_state = 0}, - [18] = {.lex_state = 0}, - [19] = {.lex_state = 0}, - [20] = {.lex_state = 0}, - [21] = {.lex_state = 0}, - [22] = {.lex_state = 0}, - [23] = {.lex_state = 0}, - [24] = {.lex_state = 0}, -}; - -static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { - [0] = { - [ts_builtin_sym_end] = ACTIONS(1), - [anon_sym_DOLLAR] = ACTIONS(1), - [anon_sym_DOLLAR_LBRACE] = ACTIONS(1), - [anon_sym_RBRACE] = ACTIONS(1), - [anon_sym_COLON] = ACTIONS(1), - [sym_int] = ACTIONS(1), - [sym__raw_curly] = ACTIONS(1), - }, - [1] = { - [sym_snippet] = STATE(20), - [sym__any] = STATE(6), - [sym_tabstop] = STATE(6), - [sym_placeholder] = STATE(6), - [sym_text] = STATE(6), - [aux_sym_snippet_repeat1] = STATE(6), - [anon_sym_DOLLAR] = ACTIONS(3), - [anon_sym_DOLLAR_LBRACE] = ACTIONS(5), - [sym__raw_curly] = ACTIONS(7), - [sym__plain_text] = ACTIONS(9), - }, - [2] = { - [sym_snippet] = STATE(18), - [sym__any] = STATE(5), - [sym_tabstop] = STATE(5), - [sym_placeholder] = STATE(5), - [sym_text] = STATE(5), - [aux_sym_snippet_repeat1] = STATE(5), - [anon_sym_DOLLAR] = ACTIONS(11), - [anon_sym_DOLLAR_LBRACE] = ACTIONS(13), - [sym__raw_curly] = ACTIONS(15), - [sym__plain_text] = ACTIONS(17), - }, - [3] = { - [sym_snippet] = STATE(22), - [sym__any] = STATE(5), - [sym_tabstop] = STATE(5), - [sym_placeholder] = STATE(5), - [sym_text] = STATE(5), - [aux_sym_snippet_repeat1] = STATE(5), - [anon_sym_DOLLAR] = ACTIONS(11), - [anon_sym_DOLLAR_LBRACE] = ACTIONS(13), - [sym__raw_curly] = ACTIONS(15), - [sym__plain_text] = ACTIONS(17), - }, - [4] = { - [sym__any] = STATE(4), - [sym_tabstop] = STATE(4), - [sym_placeholder] = STATE(4), - [sym_text] = STATE(4), - [aux_sym_snippet_repeat1] = STATE(4), - [anon_sym_DOLLAR] = ACTIONS(19), - [anon_sym_DOLLAR_LBRACE] = ACTIONS(22), - [anon_sym_RBRACE] = ACTIONS(25), - [sym__raw_curly] = ACTIONS(27), - [sym__plain_text] = ACTIONS(30), - }, - [5] = { - [sym__any] = STATE(4), - [sym_tabstop] = STATE(4), - [sym_placeholder] = STATE(4), - [sym_text] = STATE(4), - [aux_sym_snippet_repeat1] = STATE(4), - [anon_sym_DOLLAR] = ACTIONS(11), - [anon_sym_DOLLAR_LBRACE] = ACTIONS(13), - [anon_sym_RBRACE] = ACTIONS(33), - [sym__raw_curly] = ACTIONS(15), - [sym__plain_text] = ACTIONS(17), - }, - [6] = { - [sym__any] = STATE(7), - [sym_tabstop] = STATE(7), - [sym_placeholder] = STATE(7), - [sym_text] = STATE(7), - [aux_sym_snippet_repeat1] = STATE(7), - [ts_builtin_sym_end] = ACTIONS(35), - [anon_sym_DOLLAR] = ACTIONS(3), - [anon_sym_DOLLAR_LBRACE] = ACTIONS(5), - [sym__raw_curly] = ACTIONS(7), - [sym__plain_text] = ACTIONS(9), - }, - [7] = { - [sym__any] = STATE(7), - [sym_tabstop] = STATE(7), - [sym_placeholder] = STATE(7), - [sym_text] = STATE(7), - [aux_sym_snippet_repeat1] = STATE(7), - [ts_builtin_sym_end] = ACTIONS(37), - [anon_sym_DOLLAR] = ACTIONS(39), - [anon_sym_DOLLAR_LBRACE] = ACTIONS(42), - [sym__raw_curly] = ACTIONS(45), - [sym__plain_text] = ACTIONS(48), - }, -}; - -static const uint16_t ts_small_parse_table[] = { - [0] = 2, - ACTIONS(53), 1, - sym__plain_text, - ACTIONS(51), 4, - anon_sym_DOLLAR, - anon_sym_DOLLAR_LBRACE, - anon_sym_RBRACE, - sym__raw_curly, - [10] = 2, - ACTIONS(55), 2, - ts_builtin_sym_end, - sym__plain_text, - ACTIONS(57), 3, - anon_sym_DOLLAR, - anon_sym_DOLLAR_LBRACE, - sym__raw_curly, - [20] = 2, - ACTIONS(53), 2, - ts_builtin_sym_end, - sym__plain_text, - ACTIONS(51), 3, - anon_sym_DOLLAR, - anon_sym_DOLLAR_LBRACE, - sym__raw_curly, - [30] = 2, - ACTIONS(59), 2, - ts_builtin_sym_end, - sym__plain_text, - ACTIONS(61), 3, - anon_sym_DOLLAR, - anon_sym_DOLLAR_LBRACE, - sym__raw_curly, - [40] = 2, - ACTIONS(65), 1, - sym__plain_text, - ACTIONS(63), 4, - anon_sym_DOLLAR, - anon_sym_DOLLAR_LBRACE, - anon_sym_RBRACE, - sym__raw_curly, - [50] = 2, - ACTIONS(55), 1, - sym__plain_text, - ACTIONS(57), 4, - anon_sym_DOLLAR, - anon_sym_DOLLAR_LBRACE, - anon_sym_RBRACE, - sym__raw_curly, - [60] = 2, - ACTIONS(65), 2, - ts_builtin_sym_end, - sym__plain_text, - ACTIONS(63), 3, - anon_sym_DOLLAR, - anon_sym_DOLLAR_LBRACE, - sym__raw_curly, - [70] = 2, - ACTIONS(59), 1, - sym__plain_text, - ACTIONS(61), 4, - anon_sym_DOLLAR, - anon_sym_DOLLAR_LBRACE, - anon_sym_RBRACE, - sym__raw_curly, - [80] = 2, - ACTIONS(67), 1, - anon_sym_RBRACE, - ACTIONS(69), 1, - anon_sym_COLON, - [87] = 2, - ACTIONS(71), 1, - anon_sym_RBRACE, - ACTIONS(73), 1, - anon_sym_COLON, - [94] = 1, - ACTIONS(75), 1, - anon_sym_RBRACE, - [98] = 1, - ACTIONS(77), 1, - sym_int, - [102] = 1, - ACTIONS(79), 1, - ts_builtin_sym_end, - [106] = 1, - ACTIONS(81), 1, - sym_int, - [110] = 1, - ACTIONS(83), 1, - anon_sym_RBRACE, - [114] = 1, - ACTIONS(85), 1, - sym_int, - [118] = 1, - ACTIONS(87), 1, - sym_int, -}; - -static const uint32_t ts_small_parse_table_map[] = { - [SMALL_STATE(8)] = 0, - [SMALL_STATE(9)] = 10, - [SMALL_STATE(10)] = 20, - [SMALL_STATE(11)] = 30, - [SMALL_STATE(12)] = 40, - [SMALL_STATE(13)] = 50, - [SMALL_STATE(14)] = 60, - [SMALL_STATE(15)] = 70, - [SMALL_STATE(16)] = 80, - [SMALL_STATE(17)] = 87, - [SMALL_STATE(18)] = 94, - [SMALL_STATE(19)] = 98, - [SMALL_STATE(20)] = 102, - [SMALL_STATE(21)] = 106, - [SMALL_STATE(22)] = 110, - [SMALL_STATE(23)] = 114, - [SMALL_STATE(24)] = 118, -}; - -static const TSParseActionEntry ts_parse_actions[] = { - [0] = {.entry = {.count = 0, .reusable = false}}, - [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), - [3] = {.entry = {.count = 1, .reusable = false}}, SHIFT(19), - [5] = {.entry = {.count = 1, .reusable = false}}, SHIFT(24), - [7] = {.entry = {.count = 1, .reusable = false}}, SHIFT(14), - [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(14), - [11] = {.entry = {.count = 1, .reusable = false}}, SHIFT(21), - [13] = {.entry = {.count = 1, .reusable = false}}, SHIFT(23), - [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(12), - [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), - [19] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(21), - [22] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(23), - [25] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), - [27] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(12), - [30] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(12), - [33] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_snippet, 1), - [35] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_snippet, 1), - [37] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_snippet_repeat1, 2), - [39] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(19), - [42] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(24), - [45] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(14), - [48] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(14), - [51] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_tabstop, 3), - [53] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_tabstop, 3), - [55] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_tabstop, 2), - [57] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_tabstop, 2), - [59] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_placeholder, 5), - [61] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_placeholder, 5), - [63] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_text, 1), - [65] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_text, 1), - [67] = {.entry = {.count = 1, .reusable = true}}, SHIFT(10), - [69] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), - [71] = {.entry = {.count = 1, .reusable = true}}, SHIFT(8), - [73] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), - [75] = {.entry = {.count = 1, .reusable = true}}, SHIFT(11), - [77] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), - [79] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), - [81] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), - [83] = {.entry = {.count = 1, .reusable = true}}, SHIFT(15), - [85] = {.entry = {.count = 1, .reusable = true}}, SHIFT(17), - [87] = {.entry = {.count = 1, .reusable = true}}, SHIFT(16), -}; - -#ifdef __cplusplus -extern "C" { -#endif -#ifdef _WIN32 -#define extern __declspec(dllexport) -#endif - -extern const TSLanguage *tree_sitter_snippet(void) { - static const TSLanguage language = { - .version = LANGUAGE_VERSION, - .symbol_count = SYMBOL_COUNT, - .alias_count = ALIAS_COUNT, - .token_count = TOKEN_COUNT, - .external_token_count = EXTERNAL_TOKEN_COUNT, - .state_count = STATE_COUNT, - .large_state_count = LARGE_STATE_COUNT, - .production_id_count = PRODUCTION_ID_COUNT, - .field_count = FIELD_COUNT, - .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, - .parse_table = &ts_parse_table[0][0], - .small_parse_table = ts_small_parse_table, - .small_parse_table_map = ts_small_parse_table_map, - .parse_actions = ts_parse_actions, - .symbol_names = ts_symbol_names, - .symbol_metadata = ts_symbol_metadata, - .public_symbol_map = ts_symbol_map, - .alias_map = ts_non_terminal_alias_map, - .alias_sequences = &ts_alias_sequences[0][0], - .lex_modes = ts_lex_modes, - .lex_fn = ts_lex, - }; - return &language; -} -#ifdef __cplusplus -} -#endif diff --git a/crates/snippet/grammar/src/tree_sitter/parser.h b/crates/snippet/grammar/src/tree_sitter/parser.h deleted file mode 100644 index 2b14ac1046..0000000000 --- a/crates/snippet/grammar/src/tree_sitter/parser.h +++ /dev/null @@ -1,224 +0,0 @@ -#ifndef TREE_SITTER_PARSER_H_ -#define TREE_SITTER_PARSER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -#define ts_builtin_sym_error ((TSSymbol)-1) -#define ts_builtin_sym_end 0 -#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 - -typedef uint16_t TSStateId; - -#ifndef TREE_SITTER_API_H_ -typedef uint16_t TSSymbol; -typedef uint16_t TSFieldId; -typedef struct TSLanguage TSLanguage; -#endif - -typedef struct { - TSFieldId field_id; - uint8_t child_index; - bool inherited; -} TSFieldMapEntry; - -typedef struct { - uint16_t index; - uint16_t length; -} TSFieldMapSlice; - -typedef struct { - bool visible; - bool named; - bool supertype; -} TSSymbolMetadata; - -typedef struct TSLexer TSLexer; - -struct TSLexer { - int32_t lookahead; - TSSymbol result_symbol; - void (*advance)(TSLexer *, bool); - void (*mark_end)(TSLexer *); - uint32_t (*get_column)(TSLexer *); - bool (*is_at_included_range_start)(const TSLexer *); - bool (*eof)(const TSLexer *); -}; - -typedef enum { - TSParseActionTypeShift, - TSParseActionTypeReduce, - TSParseActionTypeAccept, - TSParseActionTypeRecover, -} TSParseActionType; - -typedef union { - struct { - uint8_t type; - TSStateId state; - bool extra; - bool repetition; - } shift; - struct { - uint8_t type; - uint8_t child_count; - TSSymbol symbol; - int16_t dynamic_precedence; - uint16_t production_id; - } reduce; - uint8_t type; -} TSParseAction; - -typedef struct { - uint16_t lex_state; - uint16_t external_lex_state; -} TSLexMode; - -typedef union { - TSParseAction action; - struct { - uint8_t count; - bool reusable; - } entry; -} TSParseActionEntry; - -struct TSLanguage { - uint32_t version; - uint32_t symbol_count; - uint32_t alias_count; - uint32_t token_count; - uint32_t external_token_count; - uint32_t state_count; - uint32_t large_state_count; - uint32_t production_id_count; - uint32_t field_count; - uint16_t max_alias_sequence_length; - const uint16_t *parse_table; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map; - const TSParseActionEntry *parse_actions; - const char * const *symbol_names; - const char * const *field_names; - const TSFieldMapSlice *field_map_slices; - const TSFieldMapEntry *field_map_entries; - const TSSymbolMetadata *symbol_metadata; - const TSSymbol *public_symbol_map; - const uint16_t *alias_map; - const TSSymbol *alias_sequences; - const TSLexMode *lex_modes; - bool (*lex_fn)(TSLexer *, TSStateId); - bool (*keyword_lex_fn)(TSLexer *, TSStateId); - TSSymbol keyword_capture_token; - struct { - const bool *states; - const TSSymbol *symbol_map; - void *(*create)(void); - void (*destroy)(void *); - bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); - unsigned (*serialize)(void *, char *); - void (*deserialize)(void *, const char *, unsigned); - } external_scanner; - const TSStateId *primary_state_ids; -}; - -/* - * Lexer Macros - */ - -#define START_LEXER() \ - bool result = false; \ - bool skip = false; \ - bool eof = false; \ - int32_t lookahead; \ - goto start; \ - next_state: \ - lexer->advance(lexer, skip); \ - start: \ - skip = false; \ - lookahead = lexer->lookahead; - -#define ADVANCE(state_value) \ - { \ - state = state_value; \ - goto next_state; \ - } - -#define SKIP(state_value) \ - { \ - skip = true; \ - state = state_value; \ - goto next_state; \ - } - -#define ACCEPT_TOKEN(symbol_value) \ - result = true; \ - lexer->result_symbol = symbol_value; \ - lexer->mark_end(lexer); - -#define END_STATE() return result; - -/* - * Parse Table Macros - */ - -#define SMALL_STATE(id) id - LARGE_STATE_COUNT - -#define STATE(id) id - -#define ACTIONS(id) id - -#define SHIFT(state_value) \ - {{ \ - .shift = { \ - .type = TSParseActionTypeShift, \ - .state = state_value \ - } \ - }} - -#define SHIFT_REPEAT(state_value) \ - {{ \ - .shift = { \ - .type = TSParseActionTypeShift, \ - .state = state_value, \ - .repetition = true \ - } \ - }} - -#define SHIFT_EXTRA() \ - {{ \ - .shift = { \ - .type = TSParseActionTypeShift, \ - .extra = true \ - } \ - }} - -#define REDUCE(symbol_val, child_count_val, ...) \ - {{ \ - .reduce = { \ - .type = TSParseActionTypeReduce, \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - __VA_ARGS__ \ - }, \ - }} - -#define RECOVER() \ - {{ \ - .type = TSParseActionTypeRecover \ - }} - -#define ACCEPT_INPUT() \ - {{ \ - .type = TSParseActionTypeAccept \ - }} - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_PARSER_H_ diff --git a/crates/snippet/src/snippet.rs b/crates/snippet/src/snippet.rs index 51d58aadeb..f838721ad5 100644 --- a/crates/snippet/src/snippet.rs +++ b/crates/snippet/src/snippet.rs @@ -1,31 +1,21 @@ -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use smallvec::SmallVec; use std::{collections::BTreeMap, ops::Range}; -use tree_sitter::{Parser, TreeCursor}; #[derive(Default)] pub struct Snippet { pub text: String, - pub tabstops: Vec; 2]>>, + pub tabstops: Vec, } +type TabStop = SmallVec<[Range; 2]>; + impl Snippet { pub fn parse(source: &str) -> Result { - let mut parser = Parser::new(); - parser - .set_language(tree_sitter_snippet::language()) - .unwrap(); - - let tree = parser.parse(source, None).unwrap(); - if tree.root_node().has_error() { - return Err(anyhow!("invalid snippet")); - } - let mut text = String::new(); let mut tabstops = BTreeMap::new(); - let mut cursor = tree.root_node().walk(); - parse_snippet_node(&mut cursor, &mut text, &mut tabstops, source)?; - + parse_snippet(source, false, &mut text, &mut tabstops) + .context("failed to parse snippet")?; Ok(Snippet { text, tabstops: tabstops.into_values().collect(), @@ -33,53 +23,79 @@ impl Snippet { } } -fn parse_snippet_node( - cursor: &mut TreeCursor, +fn parse_snippet<'a>( + mut source: &'a str, + nested: bool, text: &mut String, - tabstops: &mut BTreeMap; 2]>>, - source: &str, -) -> Result<()> { - cursor.goto_first_child(); + tabstops: &mut BTreeMap, +) -> Result<&'a str> { loop { - let node = cursor.node(); - match node.kind() { - "text" => text.push_str(&source[node.byte_range()]), - "tabstop" => { - if let Some(int_node) = node.named_child(0) { - let index = source[int_node.byte_range()].parse::()?; - tabstops - .entry(index) - .or_insert(SmallVec::new()) - .push(text.len()..text.len()); + match source.chars().next() { + None => return Ok(""), + Some('$') => { + source = parse_tabstop(&source[1..], text, tabstops)?; + } + Some('}') => { + if nested { + return Ok(source); + } else { + text.push('}'); + source = &source[1..]; } } - "placeholder" => { - cursor.goto_first_child(); - cursor.goto_next_sibling(); - let int_node = cursor.node(); - let index = source[int_node.byte_range()].parse::()?; - - cursor.goto_next_sibling(); - cursor.goto_next_sibling(); - let range_start = text.len(); - - parse_snippet_node(cursor, text, tabstops, source)?; - tabstops - .entry(index) - .or_insert(SmallVec::new()) - .push(range_start..text.len()); - - cursor.goto_parent(); + Some(_) => { + let chunk_end = source.find(&['}', '$']).unwrap_or(source.len()); + let (chunk, rest) = source.split_at(chunk_end); + text.push_str(chunk); + source = rest; } - _ => {} - } - - if !cursor.goto_next_sibling() { - break; } } - cursor.goto_parent(); - Ok(()) +} + +fn parse_tabstop<'a>( + mut source: &'a str, + text: &mut String, + tabstops: &mut BTreeMap, +) -> Result<&'a str> { + let tabstop_start = text.len(); + let tabstop_index; + if source.chars().next() == Some('{') { + let (index, rest) = parse_int(&source[1..])?; + tabstop_index = index; + source = rest; + + if source.chars().next() == Some(':') { + source = parse_snippet(&source[1..], true, text, tabstops)?; + } + + if source.chars().next() == Some('}') { + source = &source[1..]; + } else { + return Err(anyhow!("expected a closing brace")); + } + } else { + let (index, rest) = parse_int(&source)?; + tabstop_index = index; + source = rest; + } + + tabstops + .entry(tabstop_index) + .or_default() + .push(tabstop_start..text.len()); + Ok(source) +} + +fn parse_int(source: &str) -> Result<(usize, &str)> { + let len = source + .find(|c: char| !c.is_ascii_digit()) + .unwrap_or(source.len()); + if len == 0 { + return Err(anyhow!("expected an integer")); + } + let (prefix, suffix) = source.split_at(len); + Ok((prefix.parse()?, suffix)) } #[cfg(test)] @@ -98,19 +114,31 @@ mod tests { .collect::>(), &[vec![3..3]] ); - } - #[test] - fn test_parse_snippet_with_placeholders() { - let snippet = Snippet::parse("one${1:two}three").unwrap(); - assert_eq!(snippet.text, "onetwothree"); + // Multi-digit numbers + let snippet = Snippet::parse("one$123 $99").unwrap(); + assert_eq!(snippet.text, "one "); assert_eq!( snippet .tabstops .iter() .map(SmallVec::as_slice) .collect::>(), - &[vec![3..6]] + &[vec![4..4], vec![3..3]] + ); + } + + #[test] + fn test_parse_snippet_with_placeholders() { + let snippet = Snippet::parse("one${1:two}three${2:four}").unwrap(); + assert_eq!(snippet.text, "onetwothreefour"); + assert_eq!( + snippet + .tabstops + .iter() + .map(SmallVec::as_slice) + .collect::>(), + &[vec![3..6], vec![11..15]] ); }