From 956748e10cec8711cc5e6e02f6e83bbc994f02da Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 1 Feb 2022 10:42:14 -0800 Subject: [PATCH] Add snippet crate Co-Authored-By: Nathan Sobo Co-Authored-By: Antonio Scandurra --- Cargo.lock | 18 + crates/snippet/Cargo.toml | 13 + crates/snippet/grammar/Cargo.toml | 26 + crates/snippet/grammar/binding.gyp | 19 + .../snippet/grammar/bindings/node/binding.cc | 28 + crates/snippet/grammar/bindings/node/index.js | 19 + crates/snippet/grammar/bindings/rust/build.rs | 40 ++ crates/snippet/grammar/bindings/rust/lib.rs | 52 ++ crates/snippet/grammar/grammar.js | 26 + crates/snippet/grammar/package.json | 19 + crates/snippet/grammar/src/grammar.json | 133 +++++ crates/snippet/grammar/src/node-types.json | 84 +++ crates/snippet/grammar/src/parser.c | 545 ++++++++++++++++++ .../snippet/grammar/src/tree_sitter/parser.h | 224 +++++++ crates/snippet/src/snippet.rs | 139 +++++ 15 files changed, 1385 insertions(+) create mode 100644 crates/snippet/Cargo.toml create mode 100644 crates/snippet/grammar/Cargo.toml create mode 100644 crates/snippet/grammar/binding.gyp create mode 100644 crates/snippet/grammar/bindings/node/binding.cc create mode 100644 crates/snippet/grammar/bindings/node/index.js create mode 100644 crates/snippet/grammar/bindings/rust/build.rs create mode 100644 crates/snippet/grammar/bindings/rust/lib.rs create mode 100644 crates/snippet/grammar/grammar.js create mode 100644 crates/snippet/grammar/package.json create mode 100644 crates/snippet/grammar/src/grammar.json create mode 100644 crates/snippet/grammar/src/node-types.json create mode 100644 crates/snippet/grammar/src/parser.c create mode 100644 crates/snippet/grammar/src/tree_sitter/parser.h create mode 100644 crates/snippet/src/snippet.rs diff --git a/Cargo.lock b/Cargo.lock index 5de5b7ce1c..53d7651e32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4418,6 +4418,16 @@ dependencies = [ "pin-project-lite 0.1.12", ] +[[package]] +name = "snippet" +version = "0.1.0" +dependencies = [ + "anyhow", + "smallvec", + "tree-sitter", + "tree-sitter-snippet", +] + [[package]] name = "socket2" version = "0.3.19" @@ -5203,6 +5213,14 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-snippet" +version = "0.0.1" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "ttf-parser" version = "0.9.0" diff --git a/crates/snippet/Cargo.toml b/crates/snippet/Cargo.toml new file mode 100644 index 0000000000..daba92f848 --- /dev/null +++ b/crates/snippet/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "snippet" +version = "0.1.0" +edition = "2021" + +[lib] +path = "src/snippet.rs" + +[dependencies] +anyhow = "1.0" +smallvec = { version = "1.6", features = ["union"] } +tree-sitter = "0.20" +tree-sitter-snippet = { path = "./grammar" } diff --git a/crates/snippet/grammar/Cargo.toml b/crates/snippet/grammar/Cargo.toml new file mode 100644 index 0000000000..83defad879 --- /dev/null +++ b/crates/snippet/grammar/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "tree-sitter-snippet" +description = "snippet grammar for the tree-sitter parsing library" +version = "0.0.1" +keywords = ["incremental", "parsing", "snippet"] +categories = ["parsing", "text-editors"] +repository = "https://github.com/tree-sitter/tree-sitter-snippet" +edition = "2018" +license = "MIT" + +build = "bindings/rust/build.rs" +include = [ + "bindings/rust/*", + "grammar.js", + "queries/*", + "src/*", +] + +[lib] +path = "bindings/rust/lib.rs" + +[dependencies] +tree-sitter = "~0.20" + +[build-dependencies] +cc = "1.0" diff --git a/crates/snippet/grammar/binding.gyp b/crates/snippet/grammar/binding.gyp new file mode 100644 index 0000000000..a99fa70f98 --- /dev/null +++ b/crates/snippet/grammar/binding.gyp @@ -0,0 +1,19 @@ +{ + "targets": [ + { + "target_name": "tree_sitter_snippet_binding", + "include_dirs": [ + " +#include "nan.h" + +using namespace v8; + +extern "C" TSLanguage * tree_sitter_snippet(); + +namespace { + +NAN_METHOD(New) {} + +void Init(Local exports, Local module) { + Local tpl = Nan::New(New); + tpl->SetClassName(Nan::New("Language").ToLocalChecked()); + tpl->InstanceTemplate()->SetInternalFieldCount(1); + + Local constructor = Nan::GetFunction(tpl).ToLocalChecked(); + Local instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); + Nan::SetInternalFieldPointer(instance, 0, tree_sitter_snippet()); + + Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("snippet").ToLocalChecked()); + Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance); +} + +NODE_MODULE(tree_sitter_snippet_binding, Init) + +} // namespace diff --git a/crates/snippet/grammar/bindings/node/index.js b/crates/snippet/grammar/bindings/node/index.js new file mode 100644 index 0000000000..c5ea00c7af --- /dev/null +++ b/crates/snippet/grammar/bindings/node/index.js @@ -0,0 +1,19 @@ +try { + module.exports = require("../../build/Release/tree_sitter_snippet_binding"); +} catch (error1) { + if (error1.code !== 'MODULE_NOT_FOUND') { + throw error1; + } + try { + module.exports = require("../../build/Debug/tree_sitter_snippet_binding"); + } catch (error2) { + if (error2.code !== 'MODULE_NOT_FOUND') { + throw error2; + } + throw error1 + } +} + +try { + module.exports.nodeTypeInfo = require("../../src/node-types.json"); +} catch (_) {} diff --git a/crates/snippet/grammar/bindings/rust/build.rs b/crates/snippet/grammar/bindings/rust/build.rs new file mode 100644 index 0000000000..c6061f0995 --- /dev/null +++ b/crates/snippet/grammar/bindings/rust/build.rs @@ -0,0 +1,40 @@ +fn main() { + let src_dir = std::path::Path::new("src"); + + let mut c_config = cc::Build::new(); + c_config.include(&src_dir); + c_config + .flag_if_supported("-Wno-unused-parameter") + .flag_if_supported("-Wno-unused-but-set-variable") + .flag_if_supported("-Wno-trigraphs"); + let parser_path = src_dir.join("parser.c"); + c_config.file(&parser_path); + + // If your language uses an external scanner written in C, + // then include this block of code: + + /* + let scanner_path = src_dir.join("scanner.c"); + c_config.file(&scanner_path); + println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); + */ + + c_config.compile("parser"); + println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); + + // If your language uses an external scanner written in C++, + // then include this block of code: + + /* + let mut cpp_config = cc::Build::new(); + cpp_config.cpp(true); + cpp_config.include(&src_dir); + cpp_config + .flag_if_supported("-Wno-unused-parameter") + .flag_if_supported("-Wno-unused-but-set-variable"); + let scanner_path = src_dir.join("scanner.cc"); + cpp_config.file(&scanner_path); + cpp_config.compile("scanner"); + println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); + */ +} diff --git a/crates/snippet/grammar/bindings/rust/lib.rs b/crates/snippet/grammar/bindings/rust/lib.rs new file mode 100644 index 0000000000..6809635413 --- /dev/null +++ b/crates/snippet/grammar/bindings/rust/lib.rs @@ -0,0 +1,52 @@ +//! This crate provides snippet language support for the [tree-sitter][] parsing library. +//! +//! Typically, you will use the [language][language func] function to add this language to a +//! tree-sitter [Parser][], and then use the parser to parse some code: +//! +//! ``` +//! let code = ""; +//! let mut parser = tree_sitter::Parser::new(); +//! parser.set_language(tree_sitter_snippet::language()).expect("Error loading snippet grammar"); +//! let tree = parser.parse(code, None).unwrap(); +//! ``` +//! +//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html +//! [language func]: fn.language.html +//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html +//! [tree-sitter]: https://tree-sitter.github.io/ + +use tree_sitter::Language; + +extern "C" { + fn tree_sitter_snippet() -> Language; +} + +/// Get the tree-sitter [Language][] for this grammar. +/// +/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html +pub fn language() -> Language { + unsafe { tree_sitter_snippet() } +} + +/// The content of the [`node-types.json`][] file for this grammar. +/// +/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types +pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json"); + +// Uncomment these to include any queries that this grammar contains + +// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm"); +// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm"); +// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm"); +// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm"); + +#[cfg(test)] +mod tests { + #[test] + fn test_can_load_grammar() { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(super::language()) + .expect("Error loading snippet language"); + } +} diff --git a/crates/snippet/grammar/grammar.js b/crates/snippet/grammar/grammar.js new file mode 100644 index 0000000000..25dedf4aa0 --- /dev/null +++ b/crates/snippet/grammar/grammar.js @@ -0,0 +1,26 @@ +module.exports = grammar({ + name: 'snippet', + + rules: { + snippet: $ => repeat1($._any), + + _any: $ => choice( + $.tabstop, + $.placeholder, + $.text + ), + + tabstop: $ => choice( + seq('$', $.int), + seq('${', $.int, '}'), + ), + + placeholder: $ => seq('${', $.int, ':', $.snippet, '}'), + + int: $ => /[0-9]+/, + + text: $ => choice($._raw_curly, $._plain_text), + _raw_curly: $ => token(prec(-1, /}+/)), + _plain_text: $ => /([^$}]|\\[$\\}])+/, + } +}) \ No newline at end of file diff --git a/crates/snippet/grammar/package.json b/crates/snippet/grammar/package.json new file mode 100644 index 0000000000..817cb1cae1 --- /dev/null +++ b/crates/snippet/grammar/package.json @@ -0,0 +1,19 @@ +{ + "name": "tree-sitter-snippet", + "version": "0.0.1", + "description": "snippet grammar for tree-sitter", + "main": "bindings/node", + "keywords": [ + "parsing", + "incremental" + ], + "dependencies": { + "nan": "^2.12.1" + }, + "devDependencies": { + "tree-sitter-cli": "^0.20.4" + }, + "scripts": { + "test": "tree-sitter test" + } +} diff --git a/crates/snippet/grammar/src/grammar.json b/crates/snippet/grammar/src/grammar.json new file mode 100644 index 0000000000..0d6aa3acf3 --- /dev/null +++ b/crates/snippet/grammar/src/grammar.json @@ -0,0 +1,133 @@ +{ + "name": "snippet", + "rules": { + "snippet": { + "type": "REPEAT1", + "content": { + "type": "SYMBOL", + "name": "_any" + } + }, + "_any": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "tabstop" + }, + { + "type": "SYMBOL", + "name": "placeholder" + }, + { + "type": "SYMBOL", + "name": "text" + } + ] + }, + "tabstop": { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "$" + }, + { + "type": "SYMBOL", + "name": "int" + } + ] + }, + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "${" + }, + { + "type": "SYMBOL", + "name": "int" + }, + { + "type": "STRING", + "value": "}" + } + ] + } + ] + }, + "placeholder": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "${" + }, + { + "type": "SYMBOL", + "name": "int" + }, + { + "type": "STRING", + "value": ":" + }, + { + "type": "SYMBOL", + "name": "snippet" + }, + { + "type": "STRING", + "value": "}" + } + ] + }, + "int": { + "type": "PATTERN", + "value": "[0-9]+" + }, + "text": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "_raw_curly" + }, + { + "type": "SYMBOL", + "name": "_plain_text" + } + ] + }, + "_raw_curly": { + "type": "TOKEN", + "content": { + "type": "PREC", + "value": -1, + "content": { + "type": "PATTERN", + "value": "}+" + } + } + }, + "_plain_text": { + "type": "PATTERN", + "value": "([^$}]|\\\\[$\\\\}])+" + } + }, + "extras": [ + { + "type": "PATTERN", + "value": "\\s" + } + ], + "conflicts": [], + "precedences": [], + "externals": [], + "inline": [], + "supertypes": [] +} + diff --git a/crates/snippet/grammar/src/node-types.json b/crates/snippet/grammar/src/node-types.json new file mode 100644 index 0000000000..ea5dde3575 --- /dev/null +++ b/crates/snippet/grammar/src/node-types.json @@ -0,0 +1,84 @@ +[ + { + "type": "placeholder", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "int", + "named": true + }, + { + "type": "snippet", + "named": true + } + ] + } + }, + { + "type": "snippet", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "placeholder", + "named": true + }, + { + "type": "tabstop", + "named": true + }, + { + "type": "text", + "named": true + } + ] + } + }, + { + "type": "tabstop", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "int", + "named": true + } + ] + } + }, + { + "type": "text", + "named": true, + "fields": {} + }, + { + "type": "$", + "named": false + }, + { + "type": "${", + "named": false + }, + { + "type": ":", + "named": false + }, + { + "type": "int", + "named": true + }, + { + "type": "}", + "named": false + } +] \ No newline at end of file diff --git a/crates/snippet/grammar/src/parser.c b/crates/snippet/grammar/src/parser.c new file mode 100644 index 0000000000..00c34b67dc --- /dev/null +++ b/crates/snippet/grammar/src/parser.c @@ -0,0 +1,545 @@ +#include + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#define LANGUAGE_VERSION 13 +#define STATE_COUNT 25 +#define LARGE_STATE_COUNT 8 +#define SYMBOL_COUNT 14 +#define ALIAS_COUNT 0 +#define TOKEN_COUNT 8 +#define EXTERNAL_TOKEN_COUNT 0 +#define FIELD_COUNT 0 +#define MAX_ALIAS_SEQUENCE_LENGTH 5 +#define PRODUCTION_ID_COUNT 1 + +enum { + anon_sym_DOLLAR = 1, + anon_sym_DOLLAR_LBRACE = 2, + anon_sym_RBRACE = 3, + anon_sym_COLON = 4, + sym_int = 5, + sym__raw_curly = 6, + sym__plain_text = 7, + sym_snippet = 8, + sym__any = 9, + sym_tabstop = 10, + sym_placeholder = 11, + sym_text = 12, + aux_sym_snippet_repeat1 = 13, +}; + +static const char * const ts_symbol_names[] = { + [ts_builtin_sym_end] = "end", + [anon_sym_DOLLAR] = "$", + [anon_sym_DOLLAR_LBRACE] = "${", + [anon_sym_RBRACE] = "}", + [anon_sym_COLON] = ":", + [sym_int] = "int", + [sym__raw_curly] = "_raw_curly", + [sym__plain_text] = "_plain_text", + [sym_snippet] = "snippet", + [sym__any] = "_any", + [sym_tabstop] = "tabstop", + [sym_placeholder] = "placeholder", + [sym_text] = "text", + [aux_sym_snippet_repeat1] = "snippet_repeat1", +}; + +static const TSSymbol ts_symbol_map[] = { + [ts_builtin_sym_end] = ts_builtin_sym_end, + [anon_sym_DOLLAR] = anon_sym_DOLLAR, + [anon_sym_DOLLAR_LBRACE] = anon_sym_DOLLAR_LBRACE, + [anon_sym_RBRACE] = anon_sym_RBRACE, + [anon_sym_COLON] = anon_sym_COLON, + [sym_int] = sym_int, + [sym__raw_curly] = sym__raw_curly, + [sym__plain_text] = sym__plain_text, + [sym_snippet] = sym_snippet, + [sym__any] = sym__any, + [sym_tabstop] = sym_tabstop, + [sym_placeholder] = sym_placeholder, + [sym_text] = sym_text, + [aux_sym_snippet_repeat1] = aux_sym_snippet_repeat1, +}; + +static const TSSymbolMetadata ts_symbol_metadata[] = { + [ts_builtin_sym_end] = { + .visible = false, + .named = true, + }, + [anon_sym_DOLLAR] = { + .visible = true, + .named = false, + }, + [anon_sym_DOLLAR_LBRACE] = { + .visible = true, + .named = false, + }, + [anon_sym_RBRACE] = { + .visible = true, + .named = false, + }, + [anon_sym_COLON] = { + .visible = true, + .named = false, + }, + [sym_int] = { + .visible = true, + .named = true, + }, + [sym__raw_curly] = { + .visible = false, + .named = true, + }, + [sym__plain_text] = { + .visible = false, + .named = true, + }, + [sym_snippet] = { + .visible = true, + .named = true, + }, + [sym__any] = { + .visible = false, + .named = true, + }, + [sym_tabstop] = { + .visible = true, + .named = true, + }, + [sym_placeholder] = { + .visible = true, + .named = true, + }, + [sym_text] = { + .visible = true, + .named = true, + }, + [aux_sym_snippet_repeat1] = { + .visible = false, + .named = false, + }, +}; + +static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = { + [0] = {0}, +}; + +static const uint16_t ts_non_terminal_alias_map[] = { + 0, +}; + +static bool ts_lex(TSLexer *lexer, TSStateId state) { + START_LEXER(); + eof = lexer->eof(lexer); + switch (state) { + case 0: + if (eof) ADVANCE(3); + if (lookahead == '$') ADVANCE(4); + if (lookahead == ':') ADVANCE(7); + if (lookahead == '}') ADVANCE(6); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(0) + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(8); + END_STATE(); + case 1: + if (lookahead == '$') ADVANCE(4); + if (lookahead == '\\') ADVANCE(12); + if (lookahead == '}') ADVANCE(6); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(10); + if (lookahead != 0) ADVANCE(11); + END_STATE(); + case 2: + if (eof) ADVANCE(3); + if (lookahead == '$') ADVANCE(4); + if (lookahead == '\\') ADVANCE(12); + if (lookahead == '}') ADVANCE(9); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(10); + if (lookahead != 0) ADVANCE(11); + END_STATE(); + case 3: + ACCEPT_TOKEN(ts_builtin_sym_end); + END_STATE(); + case 4: + ACCEPT_TOKEN(anon_sym_DOLLAR); + if (lookahead == '{') ADVANCE(5); + END_STATE(); + case 5: + ACCEPT_TOKEN(anon_sym_DOLLAR_LBRACE); + END_STATE(); + case 6: + ACCEPT_TOKEN(anon_sym_RBRACE); + END_STATE(); + case 7: + ACCEPT_TOKEN(anon_sym_COLON); + END_STATE(); + case 8: + ACCEPT_TOKEN(sym_int); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(8); + END_STATE(); + case 9: + ACCEPT_TOKEN(sym__raw_curly); + if (lookahead == '}') ADVANCE(9); + END_STATE(); + case 10: + ACCEPT_TOKEN(sym__plain_text); + if (lookahead == '\\') ADVANCE(12); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(10); + if (lookahead != 0 && + lookahead != '$' && + lookahead != '}') ADVANCE(11); + END_STATE(); + case 11: + ACCEPT_TOKEN(sym__plain_text); + if (lookahead == '\\') ADVANCE(12); + if (lookahead != 0 && + lookahead != '$' && + lookahead != '}') ADVANCE(11); + END_STATE(); + case 12: + ACCEPT_TOKEN(sym__plain_text); + if (lookahead == '\\') ADVANCE(12); + if (lookahead != 0) ADVANCE(11); + END_STATE(); + default: + return false; + } +} + +static const TSLexMode ts_lex_modes[STATE_COUNT] = { + [0] = {.lex_state = 0}, + [1] = {.lex_state = 2}, + [2] = {.lex_state = 2}, + [3] = {.lex_state = 2}, + [4] = {.lex_state = 1}, + [5] = {.lex_state = 1}, + [6] = {.lex_state = 2}, + [7] = {.lex_state = 2}, + [8] = {.lex_state = 1}, + [9] = {.lex_state = 2}, + [10] = {.lex_state = 2}, + [11] = {.lex_state = 2}, + [12] = {.lex_state = 1}, + [13] = {.lex_state = 1}, + [14] = {.lex_state = 2}, + [15] = {.lex_state = 1}, + [16] = {.lex_state = 0}, + [17] = {.lex_state = 0}, + [18] = {.lex_state = 0}, + [19] = {.lex_state = 0}, + [20] = {.lex_state = 0}, + [21] = {.lex_state = 0}, + [22] = {.lex_state = 0}, + [23] = {.lex_state = 0}, + [24] = {.lex_state = 0}, +}; + +static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { + [0] = { + [ts_builtin_sym_end] = ACTIONS(1), + [anon_sym_DOLLAR] = ACTIONS(1), + [anon_sym_DOLLAR_LBRACE] = ACTIONS(1), + [anon_sym_RBRACE] = ACTIONS(1), + [anon_sym_COLON] = ACTIONS(1), + [sym_int] = ACTIONS(1), + [sym__raw_curly] = ACTIONS(1), + }, + [1] = { + [sym_snippet] = STATE(20), + [sym__any] = STATE(6), + [sym_tabstop] = STATE(6), + [sym_placeholder] = STATE(6), + [sym_text] = STATE(6), + [aux_sym_snippet_repeat1] = STATE(6), + [anon_sym_DOLLAR] = ACTIONS(3), + [anon_sym_DOLLAR_LBRACE] = ACTIONS(5), + [sym__raw_curly] = ACTIONS(7), + [sym__plain_text] = ACTIONS(9), + }, + [2] = { + [sym_snippet] = STATE(18), + [sym__any] = STATE(5), + [sym_tabstop] = STATE(5), + [sym_placeholder] = STATE(5), + [sym_text] = STATE(5), + [aux_sym_snippet_repeat1] = STATE(5), + [anon_sym_DOLLAR] = ACTIONS(11), + [anon_sym_DOLLAR_LBRACE] = ACTIONS(13), + [sym__raw_curly] = ACTIONS(15), + [sym__plain_text] = ACTIONS(17), + }, + [3] = { + [sym_snippet] = STATE(22), + [sym__any] = STATE(5), + [sym_tabstop] = STATE(5), + [sym_placeholder] = STATE(5), + [sym_text] = STATE(5), + [aux_sym_snippet_repeat1] = STATE(5), + [anon_sym_DOLLAR] = ACTIONS(11), + [anon_sym_DOLLAR_LBRACE] = ACTIONS(13), + [sym__raw_curly] = ACTIONS(15), + [sym__plain_text] = ACTIONS(17), + }, + [4] = { + [sym__any] = STATE(4), + [sym_tabstop] = STATE(4), + [sym_placeholder] = STATE(4), + [sym_text] = STATE(4), + [aux_sym_snippet_repeat1] = STATE(4), + [anon_sym_DOLLAR] = ACTIONS(19), + [anon_sym_DOLLAR_LBRACE] = ACTIONS(22), + [anon_sym_RBRACE] = ACTIONS(25), + [sym__raw_curly] = ACTIONS(27), + [sym__plain_text] = ACTIONS(30), + }, + [5] = { + [sym__any] = STATE(4), + [sym_tabstop] = STATE(4), + [sym_placeholder] = STATE(4), + [sym_text] = STATE(4), + [aux_sym_snippet_repeat1] = STATE(4), + [anon_sym_DOLLAR] = ACTIONS(11), + [anon_sym_DOLLAR_LBRACE] = ACTIONS(13), + [anon_sym_RBRACE] = ACTIONS(33), + [sym__raw_curly] = ACTIONS(15), + [sym__plain_text] = ACTIONS(17), + }, + [6] = { + [sym__any] = STATE(7), + [sym_tabstop] = STATE(7), + [sym_placeholder] = STATE(7), + [sym_text] = STATE(7), + [aux_sym_snippet_repeat1] = STATE(7), + [ts_builtin_sym_end] = ACTIONS(35), + [anon_sym_DOLLAR] = ACTIONS(3), + [anon_sym_DOLLAR_LBRACE] = ACTIONS(5), + [sym__raw_curly] = ACTIONS(7), + [sym__plain_text] = ACTIONS(9), + }, + [7] = { + [sym__any] = STATE(7), + [sym_tabstop] = STATE(7), + [sym_placeholder] = STATE(7), + [sym_text] = STATE(7), + [aux_sym_snippet_repeat1] = STATE(7), + [ts_builtin_sym_end] = ACTIONS(37), + [anon_sym_DOLLAR] = ACTIONS(39), + [anon_sym_DOLLAR_LBRACE] = ACTIONS(42), + [sym__raw_curly] = ACTIONS(45), + [sym__plain_text] = ACTIONS(48), + }, +}; + +static const uint16_t ts_small_parse_table[] = { + [0] = 2, + ACTIONS(53), 1, + sym__plain_text, + ACTIONS(51), 4, + anon_sym_DOLLAR, + anon_sym_DOLLAR_LBRACE, + anon_sym_RBRACE, + sym__raw_curly, + [10] = 2, + ACTIONS(55), 2, + ts_builtin_sym_end, + sym__plain_text, + ACTIONS(57), 3, + anon_sym_DOLLAR, + anon_sym_DOLLAR_LBRACE, + sym__raw_curly, + [20] = 2, + ACTIONS(53), 2, + ts_builtin_sym_end, + sym__plain_text, + ACTIONS(51), 3, + anon_sym_DOLLAR, + anon_sym_DOLLAR_LBRACE, + sym__raw_curly, + [30] = 2, + ACTIONS(59), 2, + ts_builtin_sym_end, + sym__plain_text, + ACTIONS(61), 3, + anon_sym_DOLLAR, + anon_sym_DOLLAR_LBRACE, + sym__raw_curly, + [40] = 2, + ACTIONS(65), 1, + sym__plain_text, + ACTIONS(63), 4, + anon_sym_DOLLAR, + anon_sym_DOLLAR_LBRACE, + anon_sym_RBRACE, + sym__raw_curly, + [50] = 2, + ACTIONS(55), 1, + sym__plain_text, + ACTIONS(57), 4, + anon_sym_DOLLAR, + anon_sym_DOLLAR_LBRACE, + anon_sym_RBRACE, + sym__raw_curly, + [60] = 2, + ACTIONS(65), 2, + ts_builtin_sym_end, + sym__plain_text, + ACTIONS(63), 3, + anon_sym_DOLLAR, + anon_sym_DOLLAR_LBRACE, + sym__raw_curly, + [70] = 2, + ACTIONS(59), 1, + sym__plain_text, + ACTIONS(61), 4, + anon_sym_DOLLAR, + anon_sym_DOLLAR_LBRACE, + anon_sym_RBRACE, + sym__raw_curly, + [80] = 2, + ACTIONS(67), 1, + anon_sym_RBRACE, + ACTIONS(69), 1, + anon_sym_COLON, + [87] = 2, + ACTIONS(71), 1, + anon_sym_RBRACE, + ACTIONS(73), 1, + anon_sym_COLON, + [94] = 1, + ACTIONS(75), 1, + anon_sym_RBRACE, + [98] = 1, + ACTIONS(77), 1, + sym_int, + [102] = 1, + ACTIONS(79), 1, + ts_builtin_sym_end, + [106] = 1, + ACTIONS(81), 1, + sym_int, + [110] = 1, + ACTIONS(83), 1, + anon_sym_RBRACE, + [114] = 1, + ACTIONS(85), 1, + sym_int, + [118] = 1, + ACTIONS(87), 1, + sym_int, +}; + +static const uint32_t ts_small_parse_table_map[] = { + [SMALL_STATE(8)] = 0, + [SMALL_STATE(9)] = 10, + [SMALL_STATE(10)] = 20, + [SMALL_STATE(11)] = 30, + [SMALL_STATE(12)] = 40, + [SMALL_STATE(13)] = 50, + [SMALL_STATE(14)] = 60, + [SMALL_STATE(15)] = 70, + [SMALL_STATE(16)] = 80, + [SMALL_STATE(17)] = 87, + [SMALL_STATE(18)] = 94, + [SMALL_STATE(19)] = 98, + [SMALL_STATE(20)] = 102, + [SMALL_STATE(21)] = 106, + [SMALL_STATE(22)] = 110, + [SMALL_STATE(23)] = 114, + [SMALL_STATE(24)] = 118, +}; + +static const TSParseActionEntry ts_parse_actions[] = { + [0] = {.entry = {.count = 0, .reusable = false}}, + [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), + [3] = {.entry = {.count = 1, .reusable = false}}, SHIFT(19), + [5] = {.entry = {.count = 1, .reusable = false}}, SHIFT(24), + [7] = {.entry = {.count = 1, .reusable = false}}, SHIFT(14), + [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(14), + [11] = {.entry = {.count = 1, .reusable = false}}, SHIFT(21), + [13] = {.entry = {.count = 1, .reusable = false}}, SHIFT(23), + [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(12), + [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), + [19] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(21), + [22] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(23), + [25] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), + [27] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(12), + [30] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(12), + [33] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_snippet, 1), + [35] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_snippet, 1), + [37] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_snippet_repeat1, 2), + [39] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(19), + [42] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(24), + [45] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(14), + [48] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(14), + [51] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_tabstop, 3), + [53] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_tabstop, 3), + [55] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_tabstop, 2), + [57] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_tabstop, 2), + [59] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_placeholder, 5), + [61] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_placeholder, 5), + [63] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_text, 1), + [65] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_text, 1), + [67] = {.entry = {.count = 1, .reusable = true}}, SHIFT(10), + [69] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [71] = {.entry = {.count = 1, .reusable = true}}, SHIFT(8), + [73] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [75] = {.entry = {.count = 1, .reusable = true}}, SHIFT(11), + [77] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), + [79] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [81] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), + [83] = {.entry = {.count = 1, .reusable = true}}, SHIFT(15), + [85] = {.entry = {.count = 1, .reusable = true}}, SHIFT(17), + [87] = {.entry = {.count = 1, .reusable = true}}, SHIFT(16), +}; + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef _WIN32 +#define extern __declspec(dllexport) +#endif + +extern const TSLanguage *tree_sitter_snippet(void) { + static const TSLanguage language = { + .version = LANGUAGE_VERSION, + .symbol_count = SYMBOL_COUNT, + .alias_count = ALIAS_COUNT, + .token_count = TOKEN_COUNT, + .external_token_count = EXTERNAL_TOKEN_COUNT, + .state_count = STATE_COUNT, + .large_state_count = LARGE_STATE_COUNT, + .production_id_count = PRODUCTION_ID_COUNT, + .field_count = FIELD_COUNT, + .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, + .parse_table = &ts_parse_table[0][0], + .small_parse_table = ts_small_parse_table, + .small_parse_table_map = ts_small_parse_table_map, + .parse_actions = ts_parse_actions, + .symbol_names = ts_symbol_names, + .symbol_metadata = ts_symbol_metadata, + .public_symbol_map = ts_symbol_map, + .alias_map = ts_non_terminal_alias_map, + .alias_sequences = &ts_alias_sequences[0][0], + .lex_modes = ts_lex_modes, + .lex_fn = ts_lex, + }; + return &language; +} +#ifdef __cplusplus +} +#endif diff --git a/crates/snippet/grammar/src/tree_sitter/parser.h b/crates/snippet/grammar/src/tree_sitter/parser.h new file mode 100644 index 0000000000..2b14ac1046 --- /dev/null +++ b/crates/snippet/grammar/src/tree_sitter/parser.h @@ -0,0 +1,224 @@ +#ifndef TREE_SITTER_PARSER_H_ +#define TREE_SITTER_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +typedef uint16_t TSStateId; + +#ifndef TREE_SITTER_API_H_ +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +#endif + +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +typedef struct { + uint16_t index; + uint16_t length; +} TSFieldMapSlice; + +typedef struct { + bool visible; + bool named; + bool supertype; +} TSSymbolMetadata; + +typedef struct TSLexer TSLexer; + +struct TSLexer { + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); +}; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable; + } entry; +} TSParseActionEntry; + +struct TSLanguage { + uint32_t version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char * const *symbol_names; + const char * const *field_names; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexMode *lex_modes; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + const TSStateId *primary_state_ids; +}; + +/* + * Lexer Macros + */ + +#define START_LEXER() \ + bool result = false; \ + bool skip = false; \ + bool eof = false; \ + int32_t lookahead; \ + goto start; \ + next_state: \ + lexer->advance(lexer, skip); \ + start: \ + skip = false; \ + lookahead = lexer->lookahead; + +#define ADVANCE(state_value) \ + { \ + state = state_value; \ + goto next_state; \ + } + +#define SKIP(state_value) \ + { \ + skip = true; \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); + +#define END_STATE() return result; + +/* + * Parse Table Macros + */ + +#define SMALL_STATE(id) id - LARGE_STATE_COUNT + +#define STATE(id) id + +#define ACTIONS(id) id + +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value \ + } \ + }} + +#define SHIFT_REPEAT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value, \ + .repetition = true \ + } \ + }} + +#define SHIFT_EXTRA() \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ + }} + +#define REDUCE(symbol_val, child_count_val, ...) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_val, \ + .child_count = child_count_val, \ + __VA_ARGS__ \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_PARSER_H_ diff --git a/crates/snippet/src/snippet.rs b/crates/snippet/src/snippet.rs new file mode 100644 index 0000000000..51d58aadeb --- /dev/null +++ b/crates/snippet/src/snippet.rs @@ -0,0 +1,139 @@ +use anyhow::{anyhow, Result}; +use smallvec::SmallVec; +use std::{collections::BTreeMap, ops::Range}; +use tree_sitter::{Parser, TreeCursor}; + +#[derive(Default)] +pub struct Snippet { + pub text: String, + pub tabstops: Vec; 2]>>, +} + +impl Snippet { + pub fn parse(source: &str) -> Result { + let mut parser = Parser::new(); + parser + .set_language(tree_sitter_snippet::language()) + .unwrap(); + + let tree = parser.parse(source, None).unwrap(); + if tree.root_node().has_error() { + return Err(anyhow!("invalid snippet")); + } + + let mut text = String::new(); + let mut tabstops = BTreeMap::new(); + let mut cursor = tree.root_node().walk(); + parse_snippet_node(&mut cursor, &mut text, &mut tabstops, source)?; + + Ok(Snippet { + text, + tabstops: tabstops.into_values().collect(), + }) + } +} + +fn parse_snippet_node( + cursor: &mut TreeCursor, + text: &mut String, + tabstops: &mut BTreeMap; 2]>>, + source: &str, +) -> Result<()> { + cursor.goto_first_child(); + loop { + let node = cursor.node(); + match node.kind() { + "text" => text.push_str(&source[node.byte_range()]), + "tabstop" => { + if let Some(int_node) = node.named_child(0) { + let index = source[int_node.byte_range()].parse::()?; + tabstops + .entry(index) + .or_insert(SmallVec::new()) + .push(text.len()..text.len()); + } + } + "placeholder" => { + cursor.goto_first_child(); + cursor.goto_next_sibling(); + let int_node = cursor.node(); + let index = source[int_node.byte_range()].parse::()?; + + cursor.goto_next_sibling(); + cursor.goto_next_sibling(); + let range_start = text.len(); + + parse_snippet_node(cursor, text, tabstops, source)?; + tabstops + .entry(index) + .or_insert(SmallVec::new()) + .push(range_start..text.len()); + + cursor.goto_parent(); + } + _ => {} + } + + if !cursor.goto_next_sibling() { + break; + } + } + cursor.goto_parent(); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_snippet_with_tabstops() { + let snippet = Snippet::parse("one$1two").unwrap(); + assert_eq!(snippet.text, "onetwo"); + assert_eq!( + snippet + .tabstops + .iter() + .map(SmallVec::as_slice) + .collect::>(), + &[vec![3..3]] + ); + } + + #[test] + fn test_parse_snippet_with_placeholders() { + let snippet = Snippet::parse("one${1:two}three").unwrap(); + assert_eq!(snippet.text, "onetwothree"); + assert_eq!( + snippet + .tabstops + .iter() + .map(SmallVec::as_slice) + .collect::>(), + &[vec![3..6]] + ); + } + + #[test] + fn test_parse_snippet_with_nested_placeholders() { + let snippet = Snippet::parse( + "for (${1:var ${2:i} = 0; ${2:i} < ${3:${4:array}.length}; ${2:i}++}) {$5}", + ) + .unwrap(); + assert_eq!(snippet.text, "for (var i = 0; i < array.length; i++) {}"); + assert_eq!( + snippet + .tabstops + .iter() + .map(SmallVec::as_slice) + .collect::>(), + &[ + vec![5..37], + vec![9..10, 16..17, 34..35], + vec![20..32], + vec![20..25], + vec![40..40], + ] + ); + } +}