From f6a817a0f387767b0a6c3206980c8185cf0fa02b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 5 Aug 2022 14:58:45 -0700 Subject: [PATCH 01/22] Start work on a SyntaxMap data structure --- crates/language/src/buffer.rs | 2 +- crates/language/src/language.rs | 44 ++ crates/language/src/syntax_map.rs | 418 +++++++++++++++++++ crates/sum_tree/src/cursor.rs | 6 +- crates/sum_tree/src/sum_tree.rs | 2 + crates/zed/src/languages.rs | 5 + crates/zed/src/languages/rust/injections.scm | 3 + 7 files changed, 476 insertions(+), 4 deletions(-) create mode 100644 crates/language/src/syntax_map.rs create mode 100644 crates/zed/src/languages/rust/injections.scm diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 7c616762d8..b7a1bd30fc 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -2496,7 +2496,7 @@ impl Drop for QueryCursorHandle { } } -trait ToTreeSitterPoint { +pub(crate) trait ToTreeSitterPoint { fn to_ts_point(self) -> tree_sitter::Point; fn from_ts_point(point: tree_sitter::Point) -> Self; } diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index fbcc983df9..8dcfc8fffd 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -3,6 +3,7 @@ mod diagnostic_set; mod highlight_map; mod outline; pub mod proto; +mod syntax_map; #[cfg(test)] mod tests; @@ -290,9 +291,17 @@ pub struct Grammar { pub(crate) brackets_query: Option, pub(crate) indents_query: Option, pub(crate) outline_query: Option, + pub(crate) injection_config: Option, pub(crate) highlight_map: Mutex, } +struct InjectionConfig { + query: Query, + content_capture_ix: u32, + language_capture_ix: Option, + languages_by_pattern_ix: Vec>>, +} + #[derive(Clone)] pub enum LanguageServerBinaryStatus { CheckingForUpdate, @@ -571,6 +580,7 @@ impl Language { brackets_query: None, indents_query: None, outline_query: None, + injection_config: None, ts_language, highlight_map: Default::default(), }) @@ -610,6 +620,40 @@ impl Language { Ok(self) } + pub fn with_injection_query(mut self, source: &str) -> Result { + let grammar = self.grammar_mut(); + let query = Query::new(grammar.ts_language, source)?; + let mut language_capture_ix = None; + let mut content_capture_ix = None; + for (ix, name) in query.capture_names().iter().enumerate() { + *match name.as_str() { + "language" => &mut language_capture_ix, + "content" => &mut content_capture_ix, + _ => continue, + } = Some(ix as u32); + } + let languages_by_pattern_ix = (0..query.pattern_count()) + .map(|ix| { + query.property_settings(ix).iter().find_map(|setting| { + if setting.key.as_ref() == "language" { + return setting.value.clone(); + } else { + None + } + }) + }) + .collect(); + if let Some(content_capture_ix) = content_capture_ix { + grammar.injection_config = Some(InjectionConfig { + query, + language_capture_ix, + content_capture_ix, + languages_by_pattern_ix, + }); + } + Ok(self) + } + fn grammar_mut(&mut self) -> &mut Grammar { Arc::get_mut(self.grammar.as_mut().unwrap()).unwrap() } diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs new file mode 100644 index 0000000000..01ff0e50a1 --- /dev/null +++ b/crates/language/src/syntax_map.rs @@ -0,0 +1,418 @@ +use crate::{ + Grammar, Language, LanguageRegistry, QueryCursorHandle, TextProvider, ToTreeSitterPoint, +}; +use collections::VecDeque; +use gpui::executor::Background; +use std::{borrow::Cow, cell::RefCell, cmp::Ordering, ops::Range, sync::Arc}; +use sum_tree::{SeekTarget, SumTree}; +use text::{Anchor, BufferSnapshot, Point, Rope, ToOffset}; +use tree_sitter::{Parser, Tree}; +use util::post_inc; + +thread_local! { + static PARSER: RefCell = RefCell::new(Parser::new()); +} + +#[derive(Default)] +pub struct SyntaxMap { + next_layer_id: usize, + snapshot: SyntaxMapSnapshot, +} + +#[derive(Clone, Default)] +pub struct SyntaxMapSnapshot { + version: clock::Global, + layers: SumTree, +} + +#[derive(Clone)] +struct SyntaxLayer { + id: usize, + parent_id: Option, + range: SyntaxLayerRange, + tree: tree_sitter::Tree, + language: Arc, +} + +#[derive(Debug, Clone)] +struct SyntaxLayerSummary { + range: Range, + last_layer_range: Range, +} + +#[derive(Clone, Debug)] +struct SyntaxLayerRange(Range); + +impl SyntaxMap { + pub fn new( + executor: Arc, + registry: Arc, + language: Arc, + text: BufferSnapshot, + prev_set: Option, + ) -> Self { + let mut next_layer_id = 0; + let mut layers = Vec::new(); + let mut injections = VecDeque::<(Option, _, Vec)>::new(); + + injections.push_back((None, language, vec![])); + while let Some((parent_id, language, ranges)) = injections.pop_front() { + if let Some(grammar) = &language.grammar.as_deref() { + let id = post_inc(&mut next_layer_id); + let range = if let Some((first, last)) = ranges.first().zip(ranges.last()) { + text.anchor_before(first.start_byte)..text.anchor_after(last.end_byte) + } else { + Anchor::MIN..Anchor::MAX + }; + let tree = Self::parse_text(grammar, text.as_rope(), None, ranges); + Self::get_injections(grammar, &text, &tree, id, ®istry, &mut injections); + layers.push(SyntaxLayer { + id, + parent_id, + range: SyntaxLayerRange(range), + tree, + language, + }); + } + } + + layers.sort_unstable_by(|a, b| SeekTarget::cmp(&a.range, &b.range, &text)); + + Self { + next_layer_id, + snapshot: SyntaxMapSnapshot { + layers: SumTree::from_iter(layers, &text), + version: text.version, + }, + } + } + + pub fn snapshot(&self) -> SyntaxMapSnapshot { + self.snapshot.clone() + } + + fn interpolate(&mut self, text: &BufferSnapshot) { + let edits = text + .edits_since::<(Point, usize)>(&self.version) + .map(|edit| { + let (lines, bytes) = edit.flatten(); + tree_sitter::InputEdit { + start_byte: bytes.new.start, + old_end_byte: bytes.new.start + bytes.old.len(), + new_end_byte: bytes.new.end, + start_position: lines.new.start.to_ts_point(), + old_end_position: (lines.new.start + (lines.old.end - lines.old.start)) + .to_ts_point(), + new_end_position: lines.new.end.to_ts_point(), + } + }) + .collect::>(); + if edits.is_empty() { + return; + } + } + + fn get_injections( + grammar: &Grammar, + text: &BufferSnapshot, + tree: &Tree, + id: usize, + registry: &Arc, + output: &mut VecDeque<(Option, Arc, Vec)>, + ) { + let config = if let Some(config) = &grammar.injection_config { + config + } else { + return; + }; + + let mut query_cursor = QueryCursorHandle::new(); + for mat in query_cursor.matches( + &config.query, + tree.root_node(), + TextProvider(text.as_rope()), + ) { + let content_ranges = mat + .nodes_for_capture_index(config.content_capture_ix) + .map(|node| node.range()) + .collect::>(); + if content_ranges.is_empty() { + continue; + } + let language_name = config.languages_by_pattern_ix[mat.pattern_index] + .as_ref() + .map(|s| Cow::Borrowed(s.as_ref())) + .or_else(|| { + let ix = config.language_capture_ix?; + let node = mat.nodes_for_capture_index(ix).next()?; + Some(Cow::Owned(text.text_for_range(node.byte_range()).collect())) + }); + if let Some(language_name) = language_name { + if let Some(language) = registry.get_language(language_name.as_ref()) { + output.push_back((Some(id), language, content_ranges)) + } + } + } + } + + fn parse_text( + grammar: &Grammar, + text: &Rope, + old_tree: Option, + ranges: Vec, + ) -> Tree { + PARSER.with(|parser| { + let mut parser = parser.borrow_mut(); + let mut chunks = text.chunks_in_range(0..text.len()); + parser + .set_included_ranges(&ranges) + .expect("overlapping ranges"); + parser + .set_language(grammar.ts_language) + .expect("incompatible grammar"); + parser + .parse_with( + &mut move |offset, _| { + chunks.seek(offset); + chunks.next().unwrap_or("").as_bytes() + }, + old_tree.as_ref(), + ) + .expect("invalid language") + }) + } +} + +impl SyntaxMapSnapshot { + pub fn layers_for_range<'a, T: ToOffset>( + &self, + range: Range, + buffer: &BufferSnapshot, + ) -> Vec<(Tree, &Grammar)> { + let start = buffer.anchor_before(range.start.to_offset(buffer)); + let end = buffer.anchor_after(range.end.to_offset(buffer)); + + let mut cursor = self.layers.filter::<_, ()>(|summary| { + let is_before_start = summary.range.end.cmp(&start, buffer).is_lt(); + let is_after_end = summary.range.start.cmp(&end, buffer).is_gt(); + !is_before_start && !is_after_end + }); + + let mut result = Vec::new(); + cursor.next(buffer); + while let Some(item) = cursor.item() { + if let Some(grammar) = &item.language.grammar { + result.push((item.tree.clone(), grammar.as_ref())); + } + cursor.next(buffer) + } + + result + } +} + +impl std::ops::Deref for SyntaxMap { + type Target = SyntaxMapSnapshot; + + fn deref(&self) -> &Self::Target { + &self.snapshot + } +} + +impl Default for SyntaxLayerSummary { + fn default() -> Self { + Self { + range: Anchor::MAX..Anchor::MIN, + last_layer_range: Anchor::MIN..Anchor::MAX, + } + } +} + +impl sum_tree::Summary for SyntaxLayerSummary { + type Context = BufferSnapshot; + + fn add_summary(&mut self, other: &Self, buffer: &Self::Context) { + if other.range.start.cmp(&self.range.start, buffer).is_lt() { + self.range.start = other.range.start; + } + if other.range.end.cmp(&self.range.end, buffer).is_gt() { + self.range.end = other.range.end; + } + self.last_layer_range = other.last_layer_range.clone(); + } +} + +impl Default for SyntaxLayerRange { + fn default() -> Self { + Self(Anchor::MIN..Anchor::MAX) + } +} + +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerRange> for SyntaxLayerRange { + fn cmp(&self, cursor_location: &Self, buffer: &BufferSnapshot) -> Ordering { + self.0 + .start + .cmp(&cursor_location.0.start, buffer) + .then_with(|| cursor_location.0.end.cmp(&self.0.end, buffer)) + } +} + +impl<'a> sum_tree::Dimension<'a, SyntaxLayerSummary> for SyntaxLayerRange { + fn add_summary( + &mut self, + summary: &'a SyntaxLayerSummary, + _: &::Context, + ) { + self.0 = summary.last_layer_range.clone(); + } +} + +impl sum_tree::Item for SyntaxLayer { + type Summary = SyntaxLayerSummary; + + fn summary(&self) -> Self::Summary { + SyntaxLayerSummary { + range: self.range.0.clone(), + last_layer_range: self.range.0.clone(), + } + } +} + +impl std::fmt::Debug for SyntaxLayer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SyntaxLayer") + .field("id", &self.id) + .field("parent_id", &self.parent_id) + .field("range", &self.range) + .field("tree", &self.tree) + .finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::LanguageConfig; + use gpui::MutableAppContext; + use text::{Buffer, Point}; + use unindent::Unindent as _; + + #[gpui::test] + fn test_syntax_map(cx: &mut MutableAppContext) { + let buffer = Buffer::new( + 0, + 0, + r#" + fn a() { + assert_eq!( + b(vec![C {}]), + vec![d.e], + ); + println!("{}", f(|_| true)); + } + "# + .unindent(), + ); + + let executor = cx.background().clone(); + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + let snapshot = buffer.snapshot(); + registry.add(language.clone()); + + let syntax_map = SyntaxMap::new(executor, registry, language, snapshot.clone(), None); + + let layers = syntax_map.layers_for_range(Point::new(0, 0)..Point::new(0, 1), &snapshot); + assert_layers( + &layers, + &["(source_file (function_item name: (identifier)..."], + ); + + let layers = syntax_map.layers_for_range(Point::new(2, 0)..Point::new(2, 0), &snapshot); + assert_layers( + &layers, + &[ + "...(function_item ... (block (expression_statement (macro_invocation...", + "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", + ], + ); + + let layers = syntax_map.layers_for_range(Point::new(2, 14)..Point::new(2, 16), &snapshot); + assert_layers( + &layers, + &[ + "...(function_item ...", + "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", + "...(array_expression (struct_expression ...", + ], + ); + + let layers = syntax_map.layers_for_range(Point::new(3, 14)..Point::new(3, 16), &snapshot); + assert_layers( + &layers, + &[ + "...(function_item ...", + "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", + "...(array_expression (field_expression ...", + ], + ); + + let layers = syntax_map.layers_for_range(Point::new(5, 12)..Point::new(5, 16), &snapshot); + assert_layers( + &layers, + &[ + "...(function_item ...", + "...(call_expression ... (arguments (closure_expression ...", + ], + ); + } + + fn rust_lang() -> Language { + Language::new( + LanguageConfig { + name: "Rust".into(), + path_suffixes: vec!["rs".to_string()], + ..Default::default() + }, + Some(tree_sitter_rust::language()), + ) + .with_injection_query( + r#" + (macro_invocation + (token_tree) @content + (#set! "language" "rust")) + "#, + ) + .unwrap() + } + + fn assert_layers(layers: &[(Tree, &Grammar)], expected_layers: &[&str]) { + assert_eq!( + layers.len(), + expected_layers.len(), + "wrong number of layers" + ); + for (i, (layer, expected_s_exp)) in layers.iter().zip(expected_layers.iter()).enumerate() { + let actual_s_exp = layer.0.root_node().to_sexp(); + assert!( + string_contains_sequence( + &actual_s_exp, + &expected_s_exp.split("...").collect::>() + ), + "layer {i}:\n\nexpected: {expected_s_exp}\nactual: {actual_s_exp}", + ); + } + } + + pub fn string_contains_sequence(text: &str, parts: &[&str]) -> bool { + let mut last_part_end = 0; + for part in parts { + if let Some(start_ix) = text[last_part_end..].find(part) { + last_part_end = start_ix + part.len(); + } else { + return false; + } + } + true + } +} diff --git a/crates/sum_tree/src/cursor.rs b/crates/sum_tree/src/cursor.rs index 09f253d432..52200d64cf 100644 --- a/crates/sum_tree/src/cursor.rs +++ b/crates/sum_tree/src/cursor.rs @@ -608,9 +608,9 @@ where impl<'a, F, T, S, U> Iterator for FilterCursor<'a, F, T, U> where - F: Fn(&T::Summary) -> bool, + F: FnMut(&T::Summary) -> bool, T: Item, - S: Summary, + S: Summary, //Context for the summary must be unit type, as .next() doesn't take arguments U: Dimension<'a, T::Summary>, { type Item = &'a T; @@ -621,7 +621,7 @@ where } if let Some(item) = self.item() { - self.cursor.next_internal(&self.filter_node, &()); + self.cursor.next_internal(&mut self.filter_node, &()); Some(item) } else { None diff --git a/crates/sum_tree/src/sum_tree.rs b/crates/sum_tree/src/sum_tree.rs index fdfd5d9de2..cb05dff967 100644 --- a/crates/sum_tree/src/sum_tree.rs +++ b/crates/sum_tree/src/sum_tree.rs @@ -168,6 +168,8 @@ impl SumTree { Cursor::new(self) } + /// Note: If the summary type requires a non `()` context, then the filter cursor + /// that is returned cannot be used with Rust's iterators. pub fn filter<'a, F, U>(&'a self, filter_node: F) -> FilterCursor where F: FnMut(&T::Summary) -> bool, diff --git a/crates/zed/src/languages.rs b/crates/zed/src/languages.rs index 8dc20bdbd1..b7057bdd13 100644 --- a/crates/zed/src/languages.rs +++ b/crates/zed/src/languages.rs @@ -128,6 +128,11 @@ pub(crate) fn language( .with_outline_query(query.as_ref()) .expect("failed to load outline query"); } + if let Some(query) = load_query(name, "/injections") { + language = language + .with_injection_query(query.as_ref()) + .expect("failed to load injection query"); + } if let Some(lsp_adapter) = lsp_adapter { language = language.with_lsp_adapter(lsp_adapter) } diff --git a/crates/zed/src/languages/rust/injections.scm b/crates/zed/src/languages/rust/injections.scm new file mode 100644 index 0000000000..9d8c03c889 --- /dev/null +++ b/crates/zed/src/languages/rust/injections.scm @@ -0,0 +1,3 @@ +(macro_invocation + (token_tree) @content) + (#set! "language" "rust")) \ No newline at end of file From 02f8705f2e5c8a1b9ca47dd4a61d03d7dd6f8b60 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 18 Aug 2022 18:01:04 -0700 Subject: [PATCH 02/22] Start work on handling edits in the SyntaxMap --- crates/language/src/syntax_map.rs | 825 +++++++++++++++++++++++------- 1 file changed, 633 insertions(+), 192 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 01ff0e50a1..41ea579047 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1,11 +1,13 @@ use crate::{ - Grammar, Language, LanguageRegistry, QueryCursorHandle, TextProvider, ToTreeSitterPoint, + Grammar, InjectionConfig, Language, LanguageRegistry, QueryCursorHandle, TextProvider, + ToTreeSitterPoint, }; -use collections::VecDeque; -use gpui::executor::Background; -use std::{borrow::Cow, cell::RefCell, cmp::Ordering, ops::Range, sync::Arc}; -use sum_tree::{SeekTarget, SumTree}; -use text::{Anchor, BufferSnapshot, Point, Rope, ToOffset}; +use collections::HashMap; +use std::{ + borrow::Cow, cell::RefCell, cmp::Ordering, collections::BinaryHeap, ops::Range, sync::Arc, +}; +use sum_tree::{Bias, SeekTarget, SumTree}; +use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset}; use tree_sitter::{Parser, Tree}; use util::post_inc; @@ -15,175 +17,399 @@ thread_local! { #[derive(Default)] pub struct SyntaxMap { - next_layer_id: usize, - snapshot: SyntaxMapSnapshot, + version: clock::Global, + snapshot: SyntaxSnapshot, + language_registry: Option>, } #[derive(Clone, Default)] -pub struct SyntaxMapSnapshot { - version: clock::Global, +pub struct SyntaxSnapshot { layers: SumTree, } #[derive(Clone)] struct SyntaxLayer { - id: usize, - parent_id: Option, - range: SyntaxLayerRange, + depth: usize, + range: Range, tree: tree_sitter::Tree, language: Arc, } #[derive(Debug, Clone)] struct SyntaxLayerSummary { + max_depth: usize, range: Range, last_layer_range: Range, } #[derive(Clone, Debug)] -struct SyntaxLayerRange(Range); +struct Depth(usize); + +#[derive(Clone, Debug)] +struct MaxPosition(Anchor); + +enum ReparseStep { + CreateLayer { + depth: usize, + language: Arc, + ranges: Vec, + }, + EnterChangedRange { + id: usize, + depth: usize, + range: Range, + }, + LeaveChangedRange { + id: usize, + depth: usize, + range: Range, + }, +} impl SyntaxMap { - pub fn new( - executor: Arc, - registry: Arc, - language: Arc, - text: BufferSnapshot, - prev_set: Option, - ) -> Self { - let mut next_layer_id = 0; - let mut layers = Vec::new(); - let mut injections = VecDeque::<(Option, _, Vec)>::new(); - - injections.push_back((None, language, vec![])); - while let Some((parent_id, language, ranges)) = injections.pop_front() { - if let Some(grammar) = &language.grammar.as_deref() { - let id = post_inc(&mut next_layer_id); - let range = if let Some((first, last)) = ranges.first().zip(ranges.last()) { - text.anchor_before(first.start_byte)..text.anchor_after(last.end_byte) - } else { - Anchor::MIN..Anchor::MAX - }; - let tree = Self::parse_text(grammar, text.as_rope(), None, ranges); - Self::get_injections(grammar, &text, &tree, id, ®istry, &mut injections); - layers.push(SyntaxLayer { - id, - parent_id, - range: SyntaxLayerRange(range), - tree, - language, - }); - } - } - - layers.sort_unstable_by(|a, b| SeekTarget::cmp(&a.range, &b.range, &text)); - - Self { - next_layer_id, - snapshot: SyntaxMapSnapshot { - layers: SumTree::from_iter(layers, &text), - version: text.version, - }, - } + pub fn new() -> Self { + Self::default() } - pub fn snapshot(&self) -> SyntaxMapSnapshot { + pub fn set_language_registry(&mut self, registry: Arc) { + self.language_registry = Some(registry); + } + + pub fn snapshot(&self) -> SyntaxSnapshot { self.snapshot.clone() } - fn interpolate(&mut self, text: &BufferSnapshot) { + pub fn interpolate(&mut self, text: &BufferSnapshot) { + self.snapshot.interpolate(&self.version, text); + self.version = text.version.clone(); + } + + pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { + self.version = text.version.clone(); + self.snapshot + .reparse(self.language_registry.clone(), language, text); + } +} + +// Assumptions: +// * The maximum depth is small (< 5) +// * For a given depth, the number of layers that touch a given range +// is small (usually only 1) + +// |change| +// 0 (............................................................) +// 1 (...............................................) +// 1 (................) +// 1 (.......) +// 2 (....) +// 2 (....) +// 2 (.......) +// 2 (...) +// 2 (.........) +// 2 (...) +// 3 (.) +// 3 (.) +// 3 (..) +// 3 (..) +// 3 (..) +// 3 (.) + +impl SyntaxSnapshot { + pub fn interpolate(&mut self, current_version: &clock::Global, text: &BufferSnapshot) { let edits = text - .edits_since::<(Point, usize)>(&self.version) - .map(|edit| { - let (lines, bytes) = edit.flatten(); - tree_sitter::InputEdit { - start_byte: bytes.new.start, - old_end_byte: bytes.new.start + bytes.old.len(), - new_end_byte: bytes.new.end, - start_position: lines.new.start.to_ts_point(), - old_end_position: (lines.new.start + (lines.old.end - lines.old.start)) - .to_ts_point(), - new_end_position: lines.new.end.to_ts_point(), - } - }) + .edits_since::<(usize, Point)>(¤t_version) .collect::>(); if edits.is_empty() { return; } + + let mut layers = SumTree::new(); + let max_depth = self.layers.summary().max_depth; + let mut cursor = self.layers.cursor::(); + cursor.next(&text); + + for depth in 0..max_depth { + let mut edits = &edits[..]; + layers.push_tree(cursor.slice(&Depth(depth), Bias::Left, text), text); + + while let Some(layer) = cursor.item() { + let mut endpoints = text.summaries_for_anchors::<(usize, Point), _>([ + &layer.range.start, + &layer.range.end, + ]); + let layer_range = endpoints.next().unwrap()..endpoints.next().unwrap(); + let start_byte = layer_range.start.0; + let start_point = layer_range.start.1; + + // Preserve any layers at this depth that precede the first edit. + let first_edit = if let Some(edit) = edits.first() { + edit + } else { + break; + }; + if first_edit.new.start.0 > layer_range.end.0 { + layers.push_tree( + cursor.slice( + &( + Depth(depth), + MaxPosition(text.anchor_before(first_edit.new.start.0)), + ), + Bias::Left, + text, + ), + text, + ); + continue; + } + + // Preserve any layers at this depth that follow the last edit. + let last_edit = edits.last().unwrap(); + if last_edit.new.end.0 < layer_range.start.0 { + break; + } + + let mut layer = layer.clone(); + for (i, edit) in edits.iter().enumerate().rev() { + // Ignore any edits that start after the end of this layer. + if edit.new.start.0 > layer_range.end.0 { + continue; + } + + // Ignore edits that end before the start of this layer, and don't consider them + // for any subsequent layers at this same depth. + if edit.new.end.0 <= start_byte { + edits = &edits[i + 1..]; + break; + } + + // Apply any edits that intersect this layer to the layer's syntax tree. + if edit.new.start.0 >= start_byte { + layer.tree.edit(&tree_sitter::InputEdit { + start_byte: edit.new.start.0 - start_byte, + old_end_byte: edit.new.start.0 - start_byte + + (edit.old.end.0 - edit.old.start.0), + new_end_byte: edit.new.end.0 - start_byte, + start_position: (edit.new.start.1 - start_point).to_ts_point(), + old_end_position: (edit.new.start.1 - start_point + + (edit.old.end.1 - edit.old.start.1)) + .to_ts_point(), + new_end_position: (edit.new.end.1 - start_point).to_ts_point(), + }); + } else { + layer.tree.edit(&tree_sitter::InputEdit { + start_byte: 0, + old_end_byte: edit.new.end.0 - start_byte, + new_end_byte: 0, + start_position: Default::default(), + old_end_position: (edit.new.end.1 - start_point).to_ts_point(), + new_end_position: Default::default(), + }); + break; + } + } + + layers.push(layer, text); + cursor.next(text); + } + } + + layers.push_tree(cursor.suffix(&text), &text); + drop(cursor); + self.layers = layers; } - fn get_injections( - grammar: &Grammar, + pub fn reparse( + &mut self, + registry: Option>, + language: Arc, text: &BufferSnapshot, - tree: &Tree, - id: usize, - registry: &Arc, - output: &mut VecDeque<(Option, Arc, Vec)>, ) { - let config = if let Some(config) = &grammar.injection_config { - config - } else { - return; - }; + let mut cursor = self.layers.cursor::(); + cursor.next(&text); + let mut layers = SumTree::new(); - let mut query_cursor = QueryCursorHandle::new(); - for mat in query_cursor.matches( - &config.query, - tree.root_node(), - TextProvider(text.as_rope()), - ) { - let content_ranges = mat - .nodes_for_capture_index(config.content_capture_ix) - .map(|node| node.range()) - .collect::>(); - if content_ranges.is_empty() { - continue; - } - let language_name = config.languages_by_pattern_ix[mat.pattern_index] - .as_ref() - .map(|s| Cow::Borrowed(s.as_ref())) - .or_else(|| { - let ix = config.language_capture_ix?; - let node = mat.nodes_for_capture_index(ix).next()?; - Some(Cow::Owned(text.text_for_range(node.byte_range()).collect())) - }); - if let Some(language_name) = language_name { - if let Some(language) = registry.get_language(language_name.as_ref()) { - output.push_back((Some(id), language, content_ranges)) + let mut next_change_id = 0; + let mut current_changes = HashMap::default(); + let mut queue = BinaryHeap::new(); + queue.push(ReparseStep::CreateLayer { + depth: 0, + language: language.clone(), + ranges: Vec::new(), + }); + + while let Some(step) = queue.pop() { + match step { + ReparseStep::CreateLayer { + depth, + language, + ranges, + } => { + let range; + let start_point; + let start_byte; + let end_byte; + if let Some((first, last)) = ranges.first().zip(ranges.last()) { + start_point = first.start_point; + start_byte = first.start_byte; + end_byte = last.end_byte; + range = text.anchor_before(start_byte)..text.anchor_after(end_byte); + } else { + start_point = Point::zero().to_ts_point(); + start_byte = 0; + end_byte = text.len(); + range = Anchor::MIN..Anchor::MAX; + }; + + let target = (Depth(depth), range.clone()); + if target.cmp(cursor.start(), &text).is_gt() { + if current_changes.is_empty() { + let slice = cursor.slice(&target, Bias::Left, text); + layers.push_tree(slice, &text); + } else { + while let Some(layer) = cursor.item() { + if layer.depth > depth + || layer.depth == depth + && layer.range.start.cmp(&range.end, text).is_ge() + { + break; + } + if !layer_is_changed(layer, text, ¤t_changes) { + layers.push(layer.clone(), text); + } + cursor.next(text); + } + } + } + + let mut old_layer = cursor.item(); + if let Some(layer) = old_layer { + if layer.range.to_offset(text) == (start_byte..end_byte) { + cursor.next(&text); + } else { + old_layer = None; + } + } + + let grammar = if let Some(grammar) = language.grammar.as_deref() { + grammar + } else { + continue; + }; + + let tree; + let changed_ranges; + if let Some(old_layer) = old_layer { + tree = parse_text( + grammar, + text.as_rope(), + Some(old_layer.tree.clone()), + ranges, + ); + + changed_ranges = old_layer + .tree + .changed_ranges(&tree) + .map(|r| r.start_byte..r.end_byte) + .collect(); + } else { + tree = parse_text(grammar, text.as_rope(), None, ranges); + changed_ranges = vec![0..end_byte - start_byte]; + } + + layers.push( + SyntaxLayer { + depth, + range, + tree: tree.clone(), + language: language.clone(), + }, + &text, + ); + + if let (Some((config, registry)), false) = ( + grammar.injection_config.as_ref().zip(registry.as_ref()), + changed_ranges.is_empty(), + ) { + let depth = depth + 1; + queue.extend(changed_ranges.iter().flat_map(|range| { + let id = post_inc(&mut next_change_id); + let range = start_byte + range.start..start_byte + range.end; + [ + ReparseStep::EnterChangedRange { + id, + depth, + range: range.clone(), + }, + ReparseStep::LeaveChangedRange { + id, + depth, + range: range.clone(), + }, + ] + })); + + get_injections( + config, + text, + &tree, + registry, + depth, + start_byte, + Point::from_ts_point(start_point), + &changed_ranges, + &mut queue, + ); + } + } + ReparseStep::EnterChangedRange { id, depth, range } => { + let range = text.anchor_before(range.start)..text.anchor_after(range.end); + if current_changes.is_empty() { + let target = (Depth(depth), range.start..Anchor::MAX); + let slice = cursor.slice(&target, Bias::Left, text); + layers.push_tree(slice, text); + } else { + while let Some(layer) = cursor.item() { + if layer.depth > depth + || layer.depth == depth + && layer.range.end.cmp(&range.start, text).is_gt() + { + break; + } + if !layer_is_changed(layer, text, ¤t_changes) { + layers.push(layer.clone(), text); + } + cursor.next(text); + } + } + + current_changes.insert(id, range); + } + ReparseStep::LeaveChangedRange { id, depth, range } => { + let range = text.anchor_before(range.start)..text.anchor_after(range.end); + while let Some(layer) = cursor.item() { + if layer.depth > depth + || layer.depth == depth + && layer.range.start.cmp(&range.end, text).is_ge() + { + break; + } + if !layer_is_changed(layer, text, ¤t_changes) { + layers.push(layer.clone(), text); + } + cursor.next(text); + } + + current_changes.remove(&id); } } } + + let slice = cursor.suffix(&text); + layers.push_tree(slice, &text); + drop(cursor); + self.layers = layers; } - fn parse_text( - grammar: &Grammar, - text: &Rope, - old_tree: Option, - ranges: Vec, - ) -> Tree { - PARSER.with(|parser| { - let mut parser = parser.borrow_mut(); - let mut chunks = text.chunks_in_range(0..text.len()); - parser - .set_included_ranges(&ranges) - .expect("overlapping ranges"); - parser - .set_language(grammar.ts_language) - .expect("incompatible grammar"); - parser - .parse_with( - &mut move |offset, _| { - chunks.seek(offset); - chunks.next().unwrap_or("").as_bytes() - }, - old_tree.as_ref(), - ) - .expect("invalid language") - }) - } -} - -impl SyntaxMapSnapshot { pub fn layers_for_range<'a, T: ToOffset>( &self, range: Range, @@ -211,17 +437,184 @@ impl SyntaxMapSnapshot { } } +fn parse_text( + grammar: &Grammar, + text: &Rope, + old_tree: Option, + mut ranges: Vec, +) -> Tree { + let (start_byte, start_point) = ranges + .first() + .map(|range| (range.start_byte, Point::from_ts_point(range.start_point))) + .unwrap_or_default(); + + for range in &mut ranges { + range.start_byte -= start_byte; + range.end_byte -= start_byte; + range.start_point = (Point::from_ts_point(range.start_point) - start_point).to_ts_point(); + range.end_point = (Point::from_ts_point(range.end_point) - start_point).to_ts_point(); + } + + PARSER.with(|parser| { + let mut parser = parser.borrow_mut(); + let mut chunks = text.chunks_in_range(start_byte..text.len()); + parser + .set_included_ranges(&ranges) + .expect("overlapping ranges"); + parser + .set_language(grammar.ts_language) + .expect("incompatible grammar"); + parser + .parse_with( + &mut move |offset, _| { + chunks.seek(start_byte + offset); + chunks.next().unwrap_or("").as_bytes() + }, + old_tree.as_ref(), + ) + .expect("invalid language") + }) +} + +fn get_injections( + config: &InjectionConfig, + text: &BufferSnapshot, + tree: &Tree, + language_registry: &LanguageRegistry, + depth: usize, + start_byte: usize, + start_point: Point, + query_ranges: &[Range], + stack: &mut BinaryHeap, +) -> bool { + let mut result = false; + let mut query_cursor = QueryCursorHandle::new(); + let mut prev_match = None; + for query_range in query_ranges { + query_cursor.set_byte_range(query_range.start..query_range.end); + for mat in query_cursor.matches( + &config.query, + tree.root_node(), + TextProvider(text.as_rope()), + ) { + let content_ranges = mat + .nodes_for_capture_index(config.content_capture_ix) + .map(|node| tree_sitter::Range { + start_byte: start_byte + node.start_byte(), + end_byte: start_byte + node.end_byte(), + start_point: (start_point + Point::from_ts_point(node.start_position())) + .to_ts_point(), + end_point: (start_point + Point::from_ts_point(node.end_position())) + .to_ts_point(), + }) + .collect::>(); + if content_ranges.is_empty() { + continue; + } + + // Avoid duplicate matches if two changed ranges intersect the same injection. + let content_range = + content_ranges.first().unwrap().start_byte..content_ranges.last().unwrap().end_byte; + if let Some((last_pattern_ix, last_range)) = &prev_match { + if mat.pattern_index == *last_pattern_ix && content_range == *last_range { + continue; + } + } + prev_match = Some((mat.pattern_index, content_range)); + + let language_name = config.languages_by_pattern_ix[mat.pattern_index] + .as_ref() + .map(|s| Cow::Borrowed(s.as_ref())) + .or_else(|| { + let ix = config.language_capture_ix?; + let node = mat.nodes_for_capture_index(ix).next()?; + Some(Cow::Owned( + text.text_for_range( + start_byte + node.start_byte()..start_byte + node.end_byte(), + ) + .collect(), + )) + }); + + if let Some(language_name) = language_name { + if let Some(language) = language_registry.get_language(language_name.as_ref()) { + result = true; + stack.push(ReparseStep::CreateLayer { + depth, + language, + ranges: content_ranges, + }) + } + } + } + } + result +} + +fn layer_is_changed( + layer: &SyntaxLayer, + text: &BufferSnapshot, + changed_ranges: &HashMap>, +) -> bool { + changed_ranges.values().any(|range| { + let is_before_layer = range.end.cmp(&layer.range.start, text).is_le(); + let is_after_layer = range.start.cmp(&layer.range.end, text).is_ge(); + !is_before_layer && !is_after_layer + }) +} + impl std::ops::Deref for SyntaxMap { - type Target = SyntaxMapSnapshot; + type Target = SyntaxSnapshot; fn deref(&self) -> &Self::Target { &self.snapshot } } +impl ReparseStep { + fn sort_key(&self) -> (usize, Range) { + match self { + ReparseStep::CreateLayer { depth, ranges, .. } => ( + *depth, + ranges.first().map_or(0, |r| r.start_byte) + ..ranges.last().map_or(usize::MAX, |r| r.end_byte), + ), + ReparseStep::EnterChangedRange { depth, range, .. } => { + (*depth, range.start..usize::MAX) + } + ReparseStep::LeaveChangedRange { depth, range, .. } => (*depth, range.end..usize::MAX), + } + } +} + +impl PartialEq for ReparseStep { + fn eq(&self, _: &Self) -> bool { + false + } +} + +impl Eq for ReparseStep {} + +impl PartialOrd for ReparseStep { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(&other)) + } +} + +impl Ord for ReparseStep { + fn cmp(&self, other: &Self) -> Ordering { + let (depth_a, range_a) = self.sort_key(); + let (depth_b, range_b) = other.sort_key(); + Ord::cmp(&depth_b, &depth_a) + .then_with(|| Ord::cmp(&range_b.start, &range_a.start)) + .then_with(|| Ord::cmp(&range_a.end, &range_b.end)) + } +} + impl Default for SyntaxLayerSummary { fn default() -> Self { Self { + max_depth: 0, range: Anchor::MAX..Anchor::MIN, last_layer_range: Anchor::MIN..Anchor::MAX, } @@ -232,38 +625,49 @@ impl sum_tree::Summary for SyntaxLayerSummary { type Context = BufferSnapshot; fn add_summary(&mut self, other: &Self, buffer: &Self::Context) { - if other.range.start.cmp(&self.range.start, buffer).is_lt() { - self.range.start = other.range.start; + if other.max_depth > self.max_depth { + *self = other.clone(); + } else { + if other.range.start.cmp(&self.range.start, buffer).is_lt() { + self.range.start = other.range.start; + } + if other.range.end.cmp(&self.range.end, buffer).is_gt() { + self.range.end = other.range.end; + } + self.last_layer_range = other.last_layer_range.clone(); } - if other.range.end.cmp(&self.range.end, buffer).is_gt() { - self.range.end = other.range.end; - } - self.last_layer_range = other.last_layer_range.clone(); } } -impl Default for SyntaxLayerRange { - fn default() -> Self { - Self(Anchor::MIN..Anchor::MAX) +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for Depth { + fn cmp(&self, cursor_location: &SyntaxLayerSummary, _: &BufferSnapshot) -> Ordering { + Ord::cmp(&self.0, &cursor_location.max_depth) } } -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerRange> for SyntaxLayerRange { - fn cmp(&self, cursor_location: &Self, buffer: &BufferSnapshot) -> Ordering { +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, MaxPosition) { + fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering { self.0 - .start - .cmp(&cursor_location.0.start, buffer) - .then_with(|| cursor_location.0.end.cmp(&self.0.end, buffer)) + .cmp(&cursor_location, text) + .then_with(|| (self.1).0.cmp(&cursor_location.range.end, text)) } } -impl<'a> sum_tree::Dimension<'a, SyntaxLayerSummary> for SyntaxLayerRange { - fn add_summary( - &mut self, - summary: &'a SyntaxLayerSummary, - _: &::Context, - ) { - self.0 = summary.last_layer_range.clone(); +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, Range) { + fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { + self.0 + .cmp(&cursor_location, buffer) + .then_with(|| { + self.1 + .start + .cmp(&cursor_location.last_layer_range.start, buffer) + }) + .then_with(|| { + cursor_location + .last_layer_range + .end + .cmp(&self.1.end, buffer) + }) } } @@ -272,8 +676,9 @@ impl sum_tree::Item for SyntaxLayer { fn summary(&self) -> Self::Summary { SyntaxLayerSummary { - range: self.range.0.clone(), - last_layer_range: self.range.0.clone(), + max_depth: self.depth, + range: self.range.clone(), + last_layer_range: self.range.clone(), } } } @@ -281,8 +686,7 @@ impl sum_tree::Item for SyntaxLayer { impl std::fmt::Debug for SyntaxLayer { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SyntaxLayer") - .field("id", &self.id) - .field("parent_id", &self.parent_id) + .field("depth", &self.depth) .field("range", &self.range) .field("tree", &self.tree) .finish() @@ -293,13 +697,16 @@ impl std::fmt::Debug for SyntaxLayer { mod tests { use super::*; use crate::LanguageConfig; - use gpui::MutableAppContext; use text::{Buffer, Point}; use unindent::Unindent as _; #[gpui::test] - fn test_syntax_map(cx: &mut MutableAppContext) { - let buffer = Buffer::new( + fn test_syntax_map_layers_for_range() { + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + registry.add(language.clone()); + + let mut buffer = Buffer::new( 0, 0, r#" @@ -314,57 +721,80 @@ mod tests { .unindent(), ); - let executor = cx.background().clone(); - let registry = Arc::new(LanguageRegistry::test()); - let language = Arc::new(rust_lang()); - let snapshot = buffer.snapshot(); - registry.add(language.clone()); + let mut syntax_map = SyntaxMap::new(); + syntax_map.set_language_registry(registry.clone()); + syntax_map.reparse(language.clone(), &buffer); - let syntax_map = SyntaxMap::new(executor, registry, language, snapshot.clone(), None); - - let layers = syntax_map.layers_for_range(Point::new(0, 0)..Point::new(0, 1), &snapshot); - assert_layers( - &layers, - &["(source_file (function_item name: (identifier)..."], - ); - - let layers = syntax_map.layers_for_range(Point::new(2, 0)..Point::new(2, 0), &snapshot); - assert_layers( - &layers, + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(2, 0)..Point::new(2, 0), &[ "...(function_item ... (block (expression_statement (macro_invocation...", "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", ], ); - - let layers = syntax_map.layers_for_range(Point::new(2, 14)..Point::new(2, 16), &snapshot); - assert_layers( - &layers, + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(2, 14)..Point::new(2, 16), &[ "...(function_item ...", "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", "...(array_expression (struct_expression ...", ], ); - - let layers = syntax_map.layers_for_range(Point::new(3, 14)..Point::new(3, 16), &snapshot); - assert_layers( - &layers, + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(3, 14)..Point::new(3, 16), &[ "...(function_item ...", "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", "...(array_expression (field_expression ...", ], ); - - let layers = syntax_map.layers_for_range(Point::new(5, 12)..Point::new(5, 16), &snapshot); - assert_layers( - &layers, + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(5, 12)..Point::new(5, 16), &[ "...(function_item ...", "...(call_expression ... (arguments (closure_expression ...", ], ); + + // Replace a vec! macro invocation with a plain slice, removing a syntactic layer. + let macro_name_range = range_for_text(&buffer, "vec!"); + buffer.edit([(macro_name_range, "&")]); + syntax_map.interpolate(&buffer); + syntax_map.reparse(language.clone(), &buffer); + + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(2, 14)..Point::new(2, 16), + &[ + "...(function_item ...", + "...(tuple_expression (call_expression ... arguments: (arguments (reference_expression value: (array_expression...", + ], + ); + + // Put the vec! macro back, adding back the syntactic layer. + buffer.undo(); + syntax_map.interpolate(&buffer); + syntax_map.reparse(language.clone(), &buffer); + + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(2, 14)..Point::new(2, 16), + &[ + "...(function_item ...", + "...(tuple_expression (call_expression ... arguments: (arguments (macro_invocation...", + "...(array_expression (struct_expression ...", + ], + ); } fn rust_lang() -> Language { @@ -386,7 +816,18 @@ mod tests { .unwrap() } - fn assert_layers(layers: &[(Tree, &Grammar)], expected_layers: &[&str]) { + fn range_for_text(buffer: &Buffer, text: &str) -> Range { + let start = buffer.as_rope().to_string().find(text).unwrap(); + start..start + text.len() + } + + fn assert_layers_for_range( + syntax_map: &SyntaxMap, + buffer: &BufferSnapshot, + range: Range, + expected_layers: &[&str], + ) { + let layers = syntax_map.layers_for_range(range, &buffer); assert_eq!( layers.len(), expected_layers.len(), From 5209e2d68cc1db6e9d39c43432cce6595102e69a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 19 Aug 2022 17:43:33 -0700 Subject: [PATCH 03/22] Fix off-by-one in handling edits, start on more edit unit tests --- crates/language/src/syntax_map.rs | 142 ++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 8 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 41ea579047..fe84265ec2 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -7,7 +7,7 @@ use std::{ borrow::Cow, cell::RefCell, cmp::Ordering, collections::BinaryHeap, ops::Range, sync::Arc, }; use sum_tree::{Bias, SeekTarget, SumTree}; -use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset}; +use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; use tree_sitter::{Parser, Tree}; use util::post_inc; @@ -128,7 +128,7 @@ impl SyntaxSnapshot { let mut cursor = self.layers.cursor::(); cursor.next(&text); - for depth in 0..max_depth { + for depth in 0..=max_depth { let mut edits = &edits[..]; layers.push_tree(cursor.slice(&Depth(depth), Bias::Left, text), text); @@ -410,11 +410,31 @@ impl SyntaxSnapshot { self.layers = layers; } + pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, &Tree, (usize, Point))> { + self.layers + .iter() + .filter_map(|layer| { + if let Some(grammar) = &layer.language.grammar { + Some(( + grammar.as_ref(), + &layer.tree, + ( + layer.range.start.to_offset(buffer), + layer.range.start.to_point(buffer), + ), + )) + } else { + None + } + }) + .collect() + } + pub fn layers_for_range<'a, T: ToOffset>( &self, range: Range, buffer: &BufferSnapshot, - ) -> Vec<(Tree, &Grammar)> { + ) -> Vec<(&Grammar, &Tree, (usize, Point))> { let start = buffer.anchor_before(range.start.to_offset(buffer)); let end = buffer.anchor_after(range.end.to_offset(buffer)); @@ -426,9 +446,16 @@ impl SyntaxSnapshot { let mut result = Vec::new(); cursor.next(buffer); - while let Some(item) = cursor.item() { - if let Some(grammar) = &item.language.grammar { - result.push((item.tree.clone(), grammar.as_ref())); + while let Some(layer) = cursor.item() { + if let Some(grammar) = &layer.language.grammar { + result.push(( + grammar.as_ref(), + &layer.tree, + ( + layer.range.start.to_offset(buffer), + layer.range.start.to_point(buffer), + ), + )); } cursor.next(buffer) } @@ -698,7 +725,9 @@ mod tests { use super::*; use crate::LanguageConfig; use text::{Buffer, Point}; + use tree_sitter::Query; use unindent::Unindent as _; + use util::test::marked_text_ranges; #[gpui::test] fn test_syntax_map_layers_for_range() { @@ -797,6 +826,47 @@ mod tests { ); } + #[gpui::test] + fn test_syntax_map_edits() { + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + let mut syntax_map = SyntaxMap::new(); + syntax_map.set_language_registry(registry.clone()); + registry.add(language.clone()); + + let mut buffer = Buffer::new(0, 0, "".into()); + syntax_map.reparse(language.clone(), &buffer); + + edit_buffer_n( + &mut buffer, + &[ + "«fn a() { dbg }»", + "fn a() { dbg«!» }", + "fn a() { dbg!«()» }", + "fn a() { dbg!(«b») }", + "fn a() { dbg!(b«.») }", + "fn a() { dbg!(b.«c») }", + "fn a() { dbg!(b.c«()») }", + "fn a() { dbg!(b.c(«vec»)) }", + "fn a() { dbg!(b.c(vec«!»)) }", + "fn a() { dbg!(b.c(vec!«[]»)) }", + "fn a() { dbg!(b.c(vec![«d»])) }", + "fn a() { dbg!(b.c(vec![d«.»])) }", + "fn a() { dbg!(b.c(vec![d.«e»])) }", + ], + ); + + syntax_map.interpolate(&buffer); + syntax_map.reparse(language.clone(), &buffer); + + assert_node_ranges( + &syntax_map, + &buffer, + "(field_identifier) @_", + "fn a() { dbg!(b.«c»(vec![d.«e»])) }", + ); + } + fn rust_lang() -> Language { Language::new( LanguageConfig { @@ -833,8 +903,10 @@ mod tests { expected_layers.len(), "wrong number of layers" ); - for (i, (layer, expected_s_exp)) in layers.iter().zip(expected_layers.iter()).enumerate() { - let actual_s_exp = layer.0.root_node().to_sexp(); + for (i, ((_, tree, _), expected_s_exp)) in + layers.iter().zip(expected_layers.iter()).enumerate() + { + let actual_s_exp = tree.root_node().to_sexp(); assert!( string_contains_sequence( &actual_s_exp, @@ -845,6 +917,60 @@ mod tests { } } + fn assert_node_ranges( + syntax_map: &SyntaxMap, + buffer: &BufferSnapshot, + query: &str, + marked_string: &str, + ) { + let mut cursor = QueryCursorHandle::new(); + let mut actual_ranges = Vec::>::new(); + for (grammar, tree, (start_byte, _)) in syntax_map.layers(buffer) { + let query = Query::new(grammar.ts_language, query).unwrap(); + for (mat, ix) in + cursor.captures(&query, tree.root_node(), TextProvider(buffer.as_rope())) + { + let range = mat.captures[ix].node.byte_range(); + actual_ranges.push(start_byte + range.start..start_byte + range.end); + } + } + + let (text, expected_ranges) = marked_text_ranges(marked_string, false); + assert_eq!(text, buffer.text()); + assert_eq!(actual_ranges, expected_ranges); + } + + fn edit_buffer_n(buffer: &mut Buffer, marked_strings: &[&str]) { + for marked_string in marked_strings { + edit_buffer(buffer, marked_string); + } + } + + fn edit_buffer(buffer: &mut Buffer, marked_string: &str) { + let old_text = buffer.text(); + let (new_text, mut ranges) = marked_text_ranges(marked_string, false); + assert_eq!(ranges.len(), 1); + + let inserted_range = ranges.pop().unwrap(); + let inserted_text = new_text[inserted_range.clone()].to_string(); + let deleted_len = (inserted_range.len() as isize + old_text.len() as isize + - new_text.len() as isize) as usize; + let deleted_range = inserted_range.start..inserted_range.start + deleted_len; + + assert_eq!( + old_text[..deleted_range.start], + new_text[..inserted_range.start], + "invalid edit", + ); + assert_eq!( + old_text[deleted_range.end..], + new_text[inserted_range.end..], + "invalid edit", + ); + + buffer.edit([(deleted_range, inserted_text)]); + } + pub fn string_contains_sequence(text: &str, parts: &[&str]) -> bool { let mut last_part_end = 0; for part in parts { From e8548e7732e4f48d9f5c5249d75fca0cb0ec2441 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 21 Aug 2022 11:36:17 -0700 Subject: [PATCH 04/22] Restructure handling of changed regions when reparsing --- crates/language/src/syntax_map.rs | 474 +++++++++++++++--------------- 1 file changed, 237 insertions(+), 237 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index fe84265ec2..71ac4d2959 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -2,14 +2,12 @@ use crate::{ Grammar, InjectionConfig, Language, LanguageRegistry, QueryCursorHandle, TextProvider, ToTreeSitterPoint, }; -use collections::HashMap; use std::{ borrow::Cow, cell::RefCell, cmp::Ordering, collections::BinaryHeap, ops::Range, sync::Arc, }; use sum_tree::{Bias, SeekTarget, SumTree}; use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; use tree_sitter::{Parser, Tree}; -use util::post_inc; thread_local! { static PARSER: RefCell = RefCell::new(Parser::new()); @@ -42,28 +40,26 @@ struct SyntaxLayerSummary { last_layer_range: Range, } -#[derive(Clone, Debug)] -struct Depth(usize); +#[derive(Debug)] +struct DepthAndRange(usize, Range); -#[derive(Clone, Debug)] -struct MaxPosition(Anchor); +#[derive(Debug)] +struct DepthAndMaxPosition(usize, Anchor); -enum ReparseStep { - CreateLayer { - depth: usize, - language: Arc, - ranges: Vec, - }, - EnterChangedRange { - id: usize, - depth: usize, - range: Range, - }, - LeaveChangedRange { - id: usize, - depth: usize, - range: Range, - }, +#[derive(Debug)] +struct DepthAndRangeOrMaxPosition(usize, Range, Anchor); + +struct ReparseStep { + depth: usize, + language: Arc, + ranges: Vec, + range: Range, +} + +#[derive(Debug, PartialEq, Eq)] +struct ChangedRegion { + depth: usize, + range: Range, } impl SyntaxMap { @@ -130,7 +126,16 @@ impl SyntaxSnapshot { for depth in 0..=max_depth { let mut edits = &edits[..]; - layers.push_tree(cursor.slice(&Depth(depth), Bias::Left, text), text); + if cursor.start().max_depth < depth { + layers.push_tree( + cursor.slice( + &DepthAndRange(depth, Anchor::MIN..Anchor::MAX), + Bias::Left, + text, + ), + text, + ); + } while let Some(layer) = cursor.item() { let mut endpoints = text.summaries_for_anchors::<(usize, Point), _>([ @@ -150,10 +155,7 @@ impl SyntaxSnapshot { if first_edit.new.start.0 > layer_range.end.0 { layers.push_tree( cursor.slice( - &( - Depth(depth), - MaxPosition(text.anchor_before(first_edit.new.start.0)), - ), + &DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)), Bias::Left, text, ), @@ -183,8 +185,8 @@ impl SyntaxSnapshot { } // Apply any edits that intersect this layer to the layer's syntax tree. - if edit.new.start.0 >= start_byte { - layer.tree.edit(&tree_sitter::InputEdit { + let tree_edit = if edit.new.start.0 >= start_byte { + tree_sitter::InputEdit { start_byte: edit.new.start.0 - start_byte, old_end_byte: edit.new.start.0 - start_byte + (edit.old.end.0 - edit.old.start.0), @@ -194,16 +196,20 @@ impl SyntaxSnapshot { + (edit.old.end.1 - edit.old.start.1)) .to_ts_point(), new_end_position: (edit.new.end.1 - start_point).to_ts_point(), - }); + } } else { - layer.tree.edit(&tree_sitter::InputEdit { + tree_sitter::InputEdit { start_byte: 0, old_end_byte: edit.new.end.0 - start_byte, new_end_byte: 0, start_position: Default::default(), old_end_position: (edit.new.end.1 - start_point).to_ts_point(), new_end_position: Default::default(), - }); + } + }; + + layer.tree.edit(&tree_edit); + if edit.new.start.0 < start_byte { break; } } @@ -228,184 +234,157 @@ impl SyntaxSnapshot { cursor.next(&text); let mut layers = SumTree::new(); - let mut next_change_id = 0; - let mut current_changes = HashMap::default(); + let mut changed_regions = Vec::::new(); let mut queue = BinaryHeap::new(); - queue.push(ReparseStep::CreateLayer { + queue.push(ReparseStep { depth: 0, language: language.clone(), ranges: Vec::new(), + range: Anchor::MIN..Anchor::MAX, }); - while let Some(step) = queue.pop() { - match step { - ReparseStep::CreateLayer { - depth, - language, - ranges, - } => { - let range; - let start_point; - let start_byte; - let end_byte; - if let Some((first, last)) = ranges.first().zip(ranges.last()) { - start_point = first.start_point; - start_byte = first.start_byte; - end_byte = last.end_byte; - range = text.anchor_before(start_byte)..text.anchor_after(end_byte); - } else { - start_point = Point::zero().to_ts_point(); - start_byte = 0; - end_byte = text.len(); - range = Anchor::MIN..Anchor::MAX; - }; + loop { + let step = queue.pop(); + let (depth, range) = if let Some(step) = &step { + (step.depth, step.range.clone()) + } else { + (cursor.start().max_depth, Anchor::MAX..Anchor::MAX) + }; - let target = (Depth(depth), range.clone()); - if target.cmp(cursor.start(), &text).is_gt() { - if current_changes.is_empty() { - let slice = cursor.slice(&target, Bias::Left, text); - layers.push_tree(slice, &text); - } else { - while let Some(layer) = cursor.item() { - if layer.depth > depth - || layer.depth == depth - && layer.range.start.cmp(&range.end, text).is_ge() - { - break; - } - if !layer_is_changed(layer, text, ¤t_changes) { - layers.push(layer.clone(), text); - } - cursor.next(text); - } - } + let target = DepthAndRange(depth, range.clone()); + if target.cmp(cursor.start(), &text).is_gt() { + let change_start_anchor = changed_regions + .first() + .map_or(Anchor::MAX, |region| region.range.start); + let seek_target = + DepthAndRangeOrMaxPosition(depth, range.clone(), change_start_anchor); + let slice = cursor.slice(&seek_target, Bias::Left, text); + layers.push_tree(slice, &text); + + while let Some(layer) = cursor.item() { + if target.cmp(&cursor.end(text), text).is_le() { + break; } - - let mut old_layer = cursor.item(); - if let Some(layer) = old_layer { - if layer.range.to_offset(text) == (start_byte..end_byte) { - cursor.next(&text); - } else { - old_layer = None; - } - } - - let grammar = if let Some(grammar) = language.grammar.as_deref() { - grammar - } else { - continue; - }; - - let tree; - let changed_ranges; - if let Some(old_layer) = old_layer { - tree = parse_text( - grammar, - text.as_rope(), - Some(old_layer.tree.clone()), - ranges, - ); - - changed_ranges = old_layer - .tree - .changed_ranges(&tree) - .map(|r| r.start_byte..r.end_byte) - .collect(); - } else { - tree = parse_text(grammar, text.as_rope(), None, ranges); - changed_ranges = vec![0..end_byte - start_byte]; - } - - layers.push( - SyntaxLayer { - depth, - range, - tree: tree.clone(), - language: language.clone(), - }, - &text, - ); - - if let (Some((config, registry)), false) = ( - grammar.injection_config.as_ref().zip(registry.as_ref()), - changed_ranges.is_empty(), - ) { - let depth = depth + 1; - queue.extend(changed_ranges.iter().flat_map(|range| { - let id = post_inc(&mut next_change_id); - let range = start_byte + range.start..start_byte + range.end; - [ - ReparseStep::EnterChangedRange { - id, - depth, - range: range.clone(), - }, - ReparseStep::LeaveChangedRange { - id, - depth, - range: range.clone(), - }, - ] - })); - - get_injections( - config, - text, - &tree, - registry, - depth, - start_byte, - Point::from_ts_point(start_point), - &changed_ranges, - &mut queue, - ); - } - } - ReparseStep::EnterChangedRange { id, depth, range } => { - let range = text.anchor_before(range.start)..text.anchor_after(range.end); - if current_changes.is_empty() { - let target = (Depth(depth), range.start..Anchor::MAX); - let slice = cursor.slice(&target, Bias::Left, text); - layers.push_tree(slice, text); - } else { - while let Some(layer) = cursor.item() { - if layer.depth > depth - || layer.depth == depth - && layer.range.end.cmp(&range.start, text).is_gt() - { - break; - } - if !layer_is_changed(layer, text, ¤t_changes) { - layers.push(layer.clone(), text); - } - cursor.next(text); - } - } - - current_changes.insert(id, range); - } - ReparseStep::LeaveChangedRange { id, depth, range } => { - let range = text.anchor_before(range.start)..text.anchor_after(range.end); - while let Some(layer) = cursor.item() { - if layer.depth > depth - || layer.depth == depth - && layer.range.start.cmp(&range.end, text).is_ge() + if layer_is_changed(layer, text, &changed_regions) { + let region = ChangedRegion { + depth: depth + 1, + range: layer.range.clone(), + }; + if let Err(i) = + changed_regions.binary_search_by(|probe| probe.cmp(®ion, text)) { - break; + changed_regions.insert(i, region); } - if !layer_is_changed(layer, text, ¤t_changes) { - layers.push(layer.clone(), text); - } - cursor.next(text); + } else { + layers.push(layer.clone(), text); } - current_changes.remove(&id); + cursor.next(text); } + + changed_regions.retain(|region| { + region.depth > depth + || (region.depth == depth + && region.range.end.cmp(&range.start, text).is_gt()) + }); + } + + let (ranges, language) = if let Some(step) = step { + (step.ranges, step.language) + } else { + break; + }; + + let start_point; + let start_byte; + let end_byte; + if let Some((first, last)) = ranges.first().zip(ranges.last()) { + start_point = first.start_point; + start_byte = first.start_byte; + end_byte = last.end_byte; + } else { + start_point = Point::zero().to_ts_point(); + start_byte = 0; + end_byte = text.len(); + }; + + let mut old_layer = cursor.item(); + if let Some(layer) = old_layer { + if layer.range.to_offset(text) == (start_byte..end_byte) { + cursor.next(&text); + } else { + old_layer = None; + } + } + + let grammar = if let Some(grammar) = language.grammar.as_deref() { + grammar + } else { + continue; + }; + + let tree; + let changed_ranges; + if let Some(old_layer) = old_layer { + tree = parse_text( + grammar, + text.as_rope(), + Some(old_layer.tree.clone()), + ranges, + ); + + changed_ranges = old_layer + .tree + .changed_ranges(&tree) + .map(|r| r.start_byte..r.end_byte) + .collect(); + } else { + tree = parse_text(grammar, text.as_rope(), None, ranges); + changed_ranges = vec![0..end_byte - start_byte]; + } + + layers.push( + SyntaxLayer { + depth, + range, + tree: tree.clone(), + language: language.clone(), + }, + &text, + ); + + if let (Some((config, registry)), false) = ( + grammar.injection_config.as_ref().zip(registry.as_ref()), + changed_ranges.is_empty(), + ) { + let depth = depth + 1; + + for range in &changed_ranges { + let region = ChangedRegion { + depth, + range: text.anchor_before(range.start)..text.anchor_after(range.end), + }; + if let Err(i) = + changed_regions.binary_search_by(|probe| probe.cmp(®ion, text)) + { + changed_regions.insert(i, region); + } + } + + get_injections( + config, + text, + &tree, + registry, + depth, + start_byte, + Point::from_ts_point(start_point), + &changed_ranges, + &mut queue, + ); } } - let slice = cursor.suffix(&text); - layers.push_tree(slice, &text); drop(cursor); self.layers = layers; } @@ -512,7 +491,7 @@ fn get_injections( start_byte: usize, start_point: Point, query_ranges: &[Range], - stack: &mut BinaryHeap, + queue: &mut BinaryHeap, ) -> bool { let mut result = false; let mut query_cursor = QueryCursorHandle::new(); @@ -547,7 +526,7 @@ fn get_injections( continue; } } - prev_match = Some((mat.pattern_index, content_range)); + prev_match = Some((mat.pattern_index, content_range.clone())); let language_name = config.languages_by_pattern_ix[mat.pattern_index] .as_ref() @@ -566,10 +545,13 @@ fn get_injections( if let Some(language_name) = language_name { if let Some(language) = language_registry.get_language(language_name.as_ref()) { result = true; - stack.push(ReparseStep::CreateLayer { + let range = text.anchor_before(content_range.start) + ..text.anchor_after(content_range.end); + queue.push(ReparseStep { depth, language, ranges: content_ranges, + range, }) } } @@ -581,11 +563,11 @@ fn get_injections( fn layer_is_changed( layer: &SyntaxLayer, text: &BufferSnapshot, - changed_ranges: &HashMap>, + changed_regions: &[ChangedRegion], ) -> bool { - changed_ranges.values().any(|range| { - let is_before_layer = range.end.cmp(&layer.range.start, text).is_le(); - let is_after_layer = range.start.cmp(&layer.range.end, text).is_ge(); + changed_regions.iter().any(|region| { + let is_before_layer = region.range.end.cmp(&layer.range.start, text).is_le(); + let is_after_layer = region.range.start.cmp(&layer.range.end, text).is_ge(); !is_before_layer && !is_after_layer }) } @@ -598,22 +580,6 @@ impl std::ops::Deref for SyntaxMap { } } -impl ReparseStep { - fn sort_key(&self) -> (usize, Range) { - match self { - ReparseStep::CreateLayer { depth, ranges, .. } => ( - *depth, - ranges.first().map_or(0, |r| r.start_byte) - ..ranges.last().map_or(usize::MAX, |r| r.end_byte), - ), - ReparseStep::EnterChangedRange { depth, range, .. } => { - (*depth, range.start..usize::MAX) - } - ReparseStep::LeaveChangedRange { depth, range, .. } => (*depth, range.end..usize::MAX), - } - } -} - impl PartialEq for ReparseStep { fn eq(&self, _: &Self) -> bool { false @@ -630,14 +596,32 @@ impl PartialOrd for ReparseStep { impl Ord for ReparseStep { fn cmp(&self, other: &Self) -> Ordering { - let (depth_a, range_a) = self.sort_key(); - let (depth_b, range_b) = other.sort_key(); - Ord::cmp(&depth_b, &depth_a) + let range_a = self.range(); + let range_b = other.range(); + Ord::cmp(&other.depth, &self.depth) .then_with(|| Ord::cmp(&range_b.start, &range_a.start)) .then_with(|| Ord::cmp(&range_a.end, &range_b.end)) } } +impl ReparseStep { + fn range(&self) -> Range { + let start = self.ranges.first().map_or(0, |r| r.start_byte); + let end = self.ranges.last().map_or(0, |r| r.end_byte); + start..end + } +} + +impl ChangedRegion { + fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering { + let range_a = &self.range; + let range_b = &other.range; + Ord::cmp(&self.depth, &other.depth) + .then_with(|| range_a.start.cmp(&range_b.start, buffer)) + .then_with(|| range_b.end.cmp(&range_a.end, buffer)) + } +} + impl Default for SyntaxLayerSummary { fn default() -> Self { Self { @@ -666,24 +650,9 @@ impl sum_tree::Summary for SyntaxLayerSummary { } } -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for Depth { - fn cmp(&self, cursor_location: &SyntaxLayerSummary, _: &BufferSnapshot) -> Ordering { - Ord::cmp(&self.0, &cursor_location.max_depth) - } -} - -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, MaxPosition) { - fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering { - self.0 - .cmp(&cursor_location, text) - .then_with(|| (self.1).0.cmp(&cursor_location.range.end, text)) - } -} - -impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, Range) { +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRange { fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { - self.0 - .cmp(&cursor_location, buffer) + Ord::cmp(&self.0, &cursor_location.max_depth) .then_with(|| { self.1 .start @@ -698,6 +667,37 @@ impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, Rang } } +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndMaxPosition { + fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering { + Ord::cmp(&self.0, &cursor_location.max_depth) + .then_with(|| self.1.cmp(&cursor_location.range.end, text)) + } +} + +impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRangeOrMaxPosition { + fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { + let cmp = Ord::cmp(&self.0, &cursor_location.max_depth); + if cmp.is_ne() { + return cmp; + } + + let cmp = self.2.cmp(&cursor_location.range.end, buffer); + if cmp.is_gt() { + return Ordering::Greater; + } + + self.1 + .start + .cmp(&cursor_location.last_layer_range.start, buffer) + .then_with(|| { + cursor_location + .last_layer_range + .end + .cmp(&self.1.end, buffer) + }) + } +} + impl sum_tree::Item for SyntaxLayer { type Summary = SyntaxLayerSummary; From 58fda5ac1c0c05377f2a846adb88b8b335ef0732 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 22 Aug 2022 14:49:16 -0700 Subject: [PATCH 05/22] Test more editing patterns of SyntaxMap, fix bugs --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/language/src/syntax_map.rs | 534 +++++++++++++++++++----------- 3 files changed, 341 insertions(+), 197 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 680e40a7f9..2a6d594f66 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5842,7 +5842,7 @@ dependencies = [ [[package]] name = "tree-sitter" version = "0.20.8" -source = "git+https://github.com/tree-sitter/tree-sitter?rev=1f1b1eb4501ed0a2d195d37f7de15f72aa10acd0#1f1b1eb4501ed0a2d195d37f7de15f72aa10acd0" +source = "git+https://github.com/tree-sitter/tree-sitter?rev=477b6677537e89c7bdff14ce84dad6d23a6415bb#477b6677537e89c7bdff14ce84dad6d23a6415bb" dependencies = [ "cc", "regex", diff --git a/Cargo.toml b/Cargo.toml index 74c36d7006..b4df3fd101 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ default-members = ["crates/zed"] resolver = "2" [patch.crates-io] -tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "1f1b1eb4501ed0a2d195d37f7de15f72aa10acd0" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "477b6677537e89c7bdff14ce84dad6d23a6415bb" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457 diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 71ac4d2959..e6198ccb5b 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -7,7 +7,7 @@ use std::{ }; use sum_tree::{Bias, SeekTarget, SumTree}; use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; -use tree_sitter::{Parser, Tree}; +use tree_sitter::{Node, Parser, Tree}; thread_local! { static PARSER: RefCell = RefCell::new(Parser::new()); @@ -15,7 +15,8 @@ thread_local! { #[derive(Default)] pub struct SyntaxMap { - version: clock::Global, + parsed_version: clock::Global, + interpolated_version: clock::Global, snapshot: SyntaxSnapshot, language_registry: Option>, } @@ -40,14 +41,14 @@ struct SyntaxLayerSummary { last_layer_range: Range, } -#[derive(Debug)] +#[derive(Clone, Debug)] struct DepthAndRange(usize, Range); -#[derive(Debug)] +#[derive(Clone, Debug)] struct DepthAndMaxPosition(usize, Anchor); -#[derive(Debug)] -struct DepthAndRangeOrMaxPosition(usize, Range, Anchor); +#[derive(Clone, Debug)] +struct DepthAndRangeOrMaxPosition(DepthAndRange, DepthAndMaxPosition); struct ReparseStep { depth: usize, @@ -76,44 +77,29 @@ impl SyntaxMap { } pub fn interpolate(&mut self, text: &BufferSnapshot) { - self.snapshot.interpolate(&self.version, text); - self.version = text.version.clone(); + self.snapshot.interpolate(&self.interpolated_version, text); + self.interpolated_version = text.version.clone(); } pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { - self.version = text.version.clone(); - self.snapshot - .reparse(self.language_registry.clone(), language, text); + if !self.interpolated_version.observed_all(&text.version) { + self.interpolate(text); + } + + self.snapshot.reparse( + &self.parsed_version, + text, + self.language_registry.clone(), + language, + ); + self.parsed_version = text.version.clone(); } } -// Assumptions: -// * The maximum depth is small (< 5) -// * For a given depth, the number of layers that touch a given range -// is small (usually only 1) - -// |change| -// 0 (............................................................) -// 1 (...............................................) -// 1 (................) -// 1 (.......) -// 2 (....) -// 2 (....) -// 2 (.......) -// 2 (...) -// 2 (.........) -// 2 (...) -// 3 (.) -// 3 (.) -// 3 (..) -// 3 (..) -// 3 (..) -// 3 (.) - impl SyntaxSnapshot { - pub fn interpolate(&mut self, current_version: &clock::Global, text: &BufferSnapshot) { + pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) { let edits = text - .edits_since::<(usize, Point)>(¤t_version) + .edits_since::<(usize, Point)>(&from_version) .collect::>(); if edits.is_empty() { return; @@ -152,16 +138,9 @@ impl SyntaxSnapshot { } else { break; }; - if first_edit.new.start.0 > layer_range.end.0 { - layers.push_tree( - cursor.slice( - &DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)), - Bias::Left, - text, - ), - text, - ); - continue; + let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); + if target.cmp(&cursor.start(), text).is_gt() { + layers.push_tree(cursor.slice(&target, Bias::Left, text), text); } // Preserve any layers at this depth that follow the last edit. @@ -226,10 +205,17 @@ impl SyntaxSnapshot { pub fn reparse( &mut self, + from_version: &clock::Global, + text: &BufferSnapshot, registry: Option>, language: Arc, - text: &BufferSnapshot, ) { + let edits = text.edits_since::(from_version).collect::>(); + if edits.is_empty() { + return; + } + + let max_depth = self.layers.summary().max_depth; let mut cursor = self.layers.cursor::(); cursor.next(&text); let mut layers = SumTree::new(); @@ -248,44 +234,55 @@ impl SyntaxSnapshot { let (depth, range) = if let Some(step) = &step { (step.depth, step.range.clone()) } else { - (cursor.start().max_depth, Anchor::MAX..Anchor::MAX) + (max_depth + 1, Anchor::MAX..Anchor::MAX) }; let target = DepthAndRange(depth, range.clone()); - if target.cmp(cursor.start(), &text).is_gt() { - let change_start_anchor = changed_regions - .first() - .map_or(Anchor::MAX, |region| region.range.start); - let seek_target = - DepthAndRangeOrMaxPosition(depth, range.clone(), change_start_anchor); - let slice = cursor.slice(&seek_target, Bias::Left, text); - layers.push_tree(slice, &text); + let mut done = cursor.item().is_none(); + while !done && target.cmp(cursor.start(), &text).is_gt() { + let bounded_target = DepthAndRangeOrMaxPosition( + target.clone(), + changed_regions + .first() + .map_or(DepthAndMaxPosition(usize::MAX, Anchor::MAX), |region| { + DepthAndMaxPosition(region.depth, region.range.start) + }), + ); + if bounded_target.cmp(&cursor.start(), &text).is_gt() { + let slice = cursor.slice(&bounded_target, Bias::Left, text); + layers.push_tree(slice, &text); + } - while let Some(layer) = cursor.item() { - if target.cmp(&cursor.end(text), text).is_le() { + while target.cmp(&cursor.end(text), text).is_gt() { + let layer = if let Some(layer) = cursor.item() { + layer + } else { break; - } + }; + if layer_is_changed(layer, text, &changed_regions) { - let region = ChangedRegion { + ChangedRegion { depth: depth + 1, range: layer.range.clone(), - }; - if let Err(i) = - changed_regions.binary_search_by(|probe| probe.cmp(®ion, text)) - { - changed_regions.insert(i, region); } + .insert(text, &mut changed_regions); } else { layers.push(layer.clone(), text); } - cursor.next(text); } + done = true; changed_regions.retain(|region| { - region.depth > depth + if region.depth > depth || (region.depth == depth && region.range.end.cmp(&range.start, text).is_gt()) + { + true + } else { + done = false; + false + } }); } @@ -332,15 +329,19 @@ impl SyntaxSnapshot { Some(old_layer.tree.clone()), ranges, ); - - changed_ranges = old_layer - .tree - .changed_ranges(&tree) - .map(|r| r.start_byte..r.end_byte) - .collect(); + changed_ranges = join_ranges( + edits + .iter() + .map(|e| e.new.clone()) + .filter(|range| range.start < end_byte && range.end > start_byte), + old_layer + .tree + .changed_ranges(&tree) + .map(|r| start_byte + r.start_byte..start_byte + r.end_byte), + ); } else { tree = parse_text(grammar, text.as_rope(), None, ranges); - changed_ranges = vec![0..end_byte - start_byte]; + changed_ranges = vec![start_byte..end_byte]; } layers.push( @@ -358,27 +359,19 @@ impl SyntaxSnapshot { changed_ranges.is_empty(), ) { let depth = depth + 1; - for range in &changed_ranges { - let region = ChangedRegion { + ChangedRegion { depth, range: text.anchor_before(range.start)..text.anchor_after(range.end), - }; - if let Err(i) = - changed_regions.binary_search_by(|probe| probe.cmp(®ion, text)) - { - changed_regions.insert(i, region); } + .insert(text, &mut changed_regions); } - get_injections( config, text, - &tree, + tree.root_node_with_offset(start_byte, start_point), registry, depth, - start_byte, - Point::from_ts_point(start_point), &changed_ranges, &mut queue, ); @@ -389,17 +382,16 @@ impl SyntaxSnapshot { self.layers = layers; } - pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, &Tree, (usize, Point))> { + pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, Node)> { self.layers .iter() .filter_map(|layer| { if let Some(grammar) = &layer.language.grammar { Some(( grammar.as_ref(), - &layer.tree, - ( + layer.tree.root_node_with_offset( layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer), + layer.range.start.to_point(buffer).to_ts_point(), ), )) } else { @@ -413,7 +405,7 @@ impl SyntaxSnapshot { &self, range: Range, buffer: &BufferSnapshot, - ) -> Vec<(&Grammar, &Tree, (usize, Point))> { + ) -> Vec<(&Grammar, Node)> { let start = buffer.anchor_before(range.start.to_offset(buffer)); let end = buffer.anchor_after(range.end.to_offset(buffer)); @@ -429,10 +421,9 @@ impl SyntaxSnapshot { if let Some(grammar) = &layer.language.grammar { result.push(( grammar.as_ref(), - &layer.tree, - ( + layer.tree.root_node_with_offset( layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer), + layer.range.start.to_point(buffer).to_ts_point(), ), )); } @@ -443,6 +434,38 @@ impl SyntaxSnapshot { } } +fn join_ranges( + a: impl Iterator>, + b: impl Iterator>, +) -> Vec> { + let mut result = Vec::>::new(); + let mut a = a.peekable(); + let mut b = b.peekable(); + loop { + let range = match (a.peek(), b.peek()) { + (Some(range_a), Some(range_b)) => { + if range_a.start < range_b.start { + a.next().unwrap() + } else { + b.next().unwrap() + } + } + (None, Some(_)) => b.next().unwrap(), + (Some(_), None) => a.next().unwrap(), + (None, None) => break, + }; + + if let Some(last) = result.last_mut() { + if range.start <= last.end { + last.end = last.end.max(range.end); + continue; + } + } + result.push(range); + } + result +} + fn parse_text( grammar: &Grammar, text: &Rope, @@ -485,11 +508,9 @@ fn parse_text( fn get_injections( config: &InjectionConfig, text: &BufferSnapshot, - tree: &Tree, + node: Node, language_registry: &LanguageRegistry, depth: usize, - start_byte: usize, - start_point: Point, query_ranges: &[Range], queue: &mut BinaryHeap, ) -> bool { @@ -498,21 +519,10 @@ fn get_injections( let mut prev_match = None; for query_range in query_ranges { query_cursor.set_byte_range(query_range.start..query_range.end); - for mat in query_cursor.matches( - &config.query, - tree.root_node(), - TextProvider(text.as_rope()), - ) { + for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) { let content_ranges = mat .nodes_for_capture_index(config.content_capture_ix) - .map(|node| tree_sitter::Range { - start_byte: start_byte + node.start_byte(), - end_byte: start_byte + node.end_byte(), - start_point: (start_point + Point::from_ts_point(node.start_position())) - .to_ts_point(), - end_point: (start_point + Point::from_ts_point(node.end_position())) - .to_ts_point(), - }) + .map(|node| node.range()) .collect::>(); if content_ranges.is_empty() { continue; @@ -534,12 +544,7 @@ fn get_injections( .or_else(|| { let ix = config.language_capture_ix?; let node = mat.nodes_for_capture_index(ix).next()?; - Some(Cow::Owned( - text.text_for_range( - start_byte + node.start_byte()..start_byte + node.end_byte(), - ) - .collect(), - )) + Some(Cow::Owned(text.text_for_range(node.byte_range()).collect())) }); if let Some(language_name) = language_name { @@ -566,9 +571,10 @@ fn layer_is_changed( changed_regions: &[ChangedRegion], ) -> bool { changed_regions.iter().any(|region| { + let same_depth = region.depth == layer.depth; let is_before_layer = region.range.end.cmp(&layer.range.start, text).is_le(); let is_after_layer = region.range.start.cmp(&layer.range.end, text).is_ge(); - !is_before_layer && !is_after_layer + same_depth && !is_before_layer && !is_after_layer }) } @@ -613,6 +619,12 @@ impl ReparseStep { } impl ChangedRegion { + fn insert(self, text: &BufferSnapshot, set: &mut Vec) { + if let Err(ix) = set.binary_search_by(|probe| probe.cmp(&self, text)) { + set.insert(ix, self); + } + } + fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering { let range_a = &self.range; let range_b = &other.range; @@ -676,25 +688,11 @@ impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndMaxP impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRangeOrMaxPosition { fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { - let cmp = Ord::cmp(&self.0, &cursor_location.max_depth); - if cmp.is_ne() { - return cmp; + if self.1.cmp(cursor_location, buffer).is_le() { + return Ordering::Less; + } else { + self.0.cmp(cursor_location, buffer) } - - let cmp = self.2.cmp(&cursor_location.range.end, buffer); - if cmp.is_gt() { - return Ordering::Greater; - } - - self.1 - .start - .cmp(&cursor_location.last_layer_range.start, buffer) - .then_with(|| { - cursor_location - .last_layer_range - .end - .cmp(&self.1.end, buffer) - }) } } @@ -827,37 +825,22 @@ mod tests { } #[gpui::test] - fn test_syntax_map_edits() { - let registry = Arc::new(LanguageRegistry::test()); - let language = Arc::new(rust_lang()); - let mut syntax_map = SyntaxMap::new(); - syntax_map.set_language_registry(registry.clone()); - registry.add(language.clone()); - - let mut buffer = Buffer::new(0, 0, "".into()); - syntax_map.reparse(language.clone(), &buffer); - - edit_buffer_n( - &mut buffer, - &[ - "«fn a() { dbg }»", - "fn a() { dbg«!» }", - "fn a() { dbg!«()» }", - "fn a() { dbg!(«b») }", - "fn a() { dbg!(b«.») }", - "fn a() { dbg!(b.«c») }", - "fn a() { dbg!(b.c«()») }", - "fn a() { dbg!(b.c(«vec»)) }", - "fn a() { dbg!(b.c(vec«!»)) }", - "fn a() { dbg!(b.c(vec!«[]»)) }", - "fn a() { dbg!(b.c(vec![«d»])) }", - "fn a() { dbg!(b.c(vec![d«.»])) }", - "fn a() { dbg!(b.c(vec![d.«e»])) }", - ], - ); - - syntax_map.interpolate(&buffer); - syntax_map.reparse(language.clone(), &buffer); + fn test_typing_multiple_new_injections() { + let (buffer, syntax_map) = test_edit_sequence(&[ + "fn a() { dbg }", + "fn a() { dbg«!» }", + "fn a() { dbg!«()» }", + "fn a() { dbg!(«b») }", + "fn a() { dbg!(b«.») }", + "fn a() { dbg!(b.«c») }", + "fn a() { dbg!(b.c«()») }", + "fn a() { dbg!(b.c(«vec»)) }", + "fn a() { dbg!(b.c(vec«!»)) }", + "fn a() { dbg!(b.c(vec!«[]»)) }", + "fn a() { dbg!(b.c(vec![«d»])) }", + "fn a() { dbg!(b.c(vec![d«.»])) }", + "fn a() { dbg!(b.c(vec![d.«e»])) }", + ]); assert_node_ranges( &syntax_map, @@ -867,6 +850,163 @@ mod tests { ); } + #[gpui::test] + fn test_pasting_new_injection_line_between_others() { + let (buffer, syntax_map) = test_edit_sequence(&[ + " + fn a() { + b!(B {}); + c!(C {}); + d!(D {}); + e!(E {}); + f!(F {}); + } + ", + " + fn a() { + b!(B {}); + c!(C {}); + «g!(G {}); + »d!(D {}); + e!(E {}); + f!(F {}); + } + ", + ]); + + assert_node_ranges( + &syntax_map, + &buffer, + "(struct_expression) @_", + " + fn a() { + b!(«B {}»); + c!(«C {}»); + g!(«G {}»); + d!(«D {}»); + e!(«E {}»); + f!(«F {}»); + } + ", + ); + } + + #[gpui::test] + fn test_joining_injections_with_child_injections() { + let (buffer, syntax_map) = test_edit_sequence(&[ + " + fn a() { + b!( + c![one.two.three], + d![four.five.six], + ); + e!( + f![seven.eight], + ); + } + ", + " + fn a() { + b!( + c![one.two.three], + d![four.five.six], + ˇ f![seven.eight], + ); + } + ", + ]); + + assert_node_ranges( + &syntax_map, + &buffer, + "(field_identifier) @_", + " + fn a() { + b!( + c![one.«two».«three»], + d![four.«five».«six»], + f![seven.«eight»], + ); + } + ", + ); + } + + #[gpui::test] + fn test_editing_edges_of_injection() { + test_edit_sequence(&[ + " + fn a() { + b!(c!()) + } + ", + " + fn a() { + «d»!(c!()) + } + ", + " + fn a() { + «e»d!(c!()) + } + ", + " + fn a() { + ed!«[»c!()«]» + } + ", + ]); + } + + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + registry.add(language.clone()); + let mut buffer = Buffer::new(0, 0, Default::default()); + + let mut mutated_syntax_map = SyntaxMap::new(); + mutated_syntax_map.set_language_registry(registry.clone()); + mutated_syntax_map.reparse(language.clone(), &buffer); + + for (i, marked_string) in steps.into_iter().enumerate() { + edit_buffer(&mut buffer, &marked_string.unindent()); + + // Reparse the syntax map + mutated_syntax_map.interpolate(&buffer); + mutated_syntax_map.reparse(language.clone(), &buffer); + + // Create a second syntax map from scratch + let mut reference_syntax_map = SyntaxMap::new(); + reference_syntax_map.set_language_registry(registry.clone()); + reference_syntax_map.reparse(language.clone(), &buffer); + + // Compare the mutated syntax map to the new syntax map + let mutated_layers = mutated_syntax_map.layers(&buffer); + let reference_layers = reference_syntax_map.layers(&buffer); + assert_eq!( + mutated_layers.len(), + reference_layers.len(), + "wrong number of layers at step {i}" + ); + for (edited_layer, reference_layer) in + mutated_layers.into_iter().zip(reference_layers.into_iter()) + { + assert_eq!( + edited_layer.1.to_sexp(), + reference_layer.1.to_sexp(), + "different layer at step {i}" + ); + assert_eq!( + edited_layer.1.range(), + reference_layer.1.range(), + "different layer at step {i}" + ); + } + } + + (buffer, mutated_syntax_map) + } + fn rust_lang() -> Language { Language::new( LanguageConfig { @@ -903,10 +1043,10 @@ mod tests { expected_layers.len(), "wrong number of layers" ); - for (i, ((_, tree, _), expected_s_exp)) in + for (i, ((_, node), expected_s_exp)) in layers.iter().zip(expected_layers.iter()).enumerate() { - let actual_s_exp = tree.root_node().to_sexp(); + let actual_s_exp = node.to_sexp(); assert!( string_contains_sequence( &actual_s_exp, @@ -925,50 +1065,54 @@ mod tests { ) { let mut cursor = QueryCursorHandle::new(); let mut actual_ranges = Vec::>::new(); - for (grammar, tree, (start_byte, _)) in syntax_map.layers(buffer) { + for (grammar, node) in syntax_map.layers(buffer) { let query = Query::new(grammar.ts_language, query).unwrap(); - for (mat, ix) in - cursor.captures(&query, tree.root_node(), TextProvider(buffer.as_rope())) - { - let range = mat.captures[ix].node.byte_range(); - actual_ranges.push(start_byte + range.start..start_byte + range.end); + for (mat, ix) in cursor.captures(&query, node, TextProvider(buffer.as_rope())) { + actual_ranges.push(mat.captures[ix].node.byte_range()); } } - let (text, expected_ranges) = marked_text_ranges(marked_string, false); + let (text, expected_ranges) = marked_text_ranges(&marked_string.unindent(), false); assert_eq!(text, buffer.text()); assert_eq!(actual_ranges, expected_ranges); } - fn edit_buffer_n(buffer: &mut Buffer, marked_strings: &[&str]) { - for marked_string in marked_strings { - edit_buffer(buffer, marked_string); - } - } - fn edit_buffer(buffer: &mut Buffer, marked_string: &str) { let old_text = buffer.text(); let (new_text, mut ranges) = marked_text_ranges(marked_string, false); - assert_eq!(ranges.len(), 1); + if ranges.is_empty() { + ranges.push(0..new_text.len()); + } - let inserted_range = ranges.pop().unwrap(); - let inserted_text = new_text[inserted_range.clone()].to_string(); - let deleted_len = (inserted_range.len() as isize + old_text.len() as isize - - new_text.len() as isize) as usize; - let deleted_range = inserted_range.start..inserted_range.start + deleted_len; + let mut delta = 0; + let mut edits = Vec::new(); + let mut ranges = ranges.into_iter().peekable(); + + while let Some(inserted_range) = ranges.next() { + let old_start = (inserted_range.start as isize - delta) as usize; + let following_text = if let Some(next_range) = ranges.peek() { + &new_text[inserted_range.end..next_range.start] + } else { + &new_text[inserted_range.end..] + }; + + let inserted_len = inserted_range.len(); + let deleted_len = old_text[old_start..] + .find(following_text) + .expect("invalid edit"); + + let old_range = old_start..old_start + deleted_len; + edits.push((old_range, new_text[inserted_range].to_string())); + delta += inserted_len as isize - deleted_len as isize; + } assert_eq!( - old_text[..deleted_range.start], - new_text[..inserted_range.start], - "invalid edit", - ); - assert_eq!( - old_text[deleted_range.end..], - new_text[inserted_range.end..], - "invalid edit", + old_text.len() as isize + delta, + new_text.len() as isize, + "invalid edit" ); - buffer.edit([(deleted_range, inserted_text)]); + buffer.edit(edits); } pub fn string_contains_sequence(text: &str, parts: &[&str]) -> bool { From ae9e1338f6c8431ff4fd20c22009a4f46bfe5a30 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 22 Aug 2022 17:52:14 -0700 Subject: [PATCH 06/22] Fix more bugs in syntax map interpolation --- crates/language/src/syntax_map.rs | 185 ++++++++++++++++++------------ 1 file changed, 112 insertions(+), 73 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index e6198ccb5b..a5ffa9964a 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -106,96 +106,102 @@ impl SyntaxSnapshot { } let mut layers = SumTree::new(); - let max_depth = self.layers.summary().max_depth; + let mut edits_for_depth = &edits[..]; let mut cursor = self.layers.cursor::(); - cursor.next(&text); + cursor.next(text); - for depth in 0..=max_depth { - let mut edits = &edits[..]; - if cursor.start().max_depth < depth { + 'outer: loop { + let depth = cursor.end(text).max_depth; + + // Preserve any layers at this depth that precede the first edit. + if let Some(first_edit) = edits_for_depth.first() { + let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); + if target.cmp(&cursor.start(), text).is_gt() { + let slice = cursor.slice(&target, Bias::Left, text); + layers.push_tree(slice, text); + } + } + // If this layer follows all of the edits, then preserve it and any + // subsequent layers at this same depth. + else { layers.push_tree( cursor.slice( - &DepthAndRange(depth, Anchor::MIN..Anchor::MAX), + &DepthAndRange(depth + 1, Anchor::MIN..Anchor::MAX), Bias::Left, text, ), text, ); - } + edits_for_depth = &edits[..]; + continue; + }; - while let Some(layer) = cursor.item() { - let mut endpoints = text.summaries_for_anchors::<(usize, Point), _>([ - &layer.range.start, - &layer.range.end, - ]); - let layer_range = endpoints.next().unwrap()..endpoints.next().unwrap(); - let start_byte = layer_range.start.0; - let start_point = layer_range.start.1; + let layer = if let Some(layer) = cursor.item() { + layer + } else { + break; + }; - // Preserve any layers at this depth that precede the first edit. - let first_edit = if let Some(edit) = edits.first() { - edit - } else { - break; - }; - let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); - if target.cmp(&cursor.start(), text).is_gt() { - layers.push_tree(cursor.slice(&target, Bias::Left, text), text); - } + let mut endpoints = text + .summaries_for_anchors::<(usize, Point), _>([&layer.range.start, &layer.range.end]); + let layer_range = endpoints.next().unwrap()..endpoints.next().unwrap(); + let start_byte = layer_range.start.0; + let start_point = layer_range.start.1; + let end_byte = layer_range.end.0; - // Preserve any layers at this depth that follow the last edit. - let last_edit = edits.last().unwrap(); - if last_edit.new.end.0 < layer_range.start.0 { - break; - } - - let mut layer = layer.clone(); - for (i, edit) in edits.iter().enumerate().rev() { - // Ignore any edits that start after the end of this layer. - if edit.new.start.0 > layer_range.end.0 { - continue; - } - - // Ignore edits that end before the start of this layer, and don't consider them - // for any subsequent layers at this same depth. - if edit.new.end.0 <= start_byte { - edits = &edits[i + 1..]; - break; - } - - // Apply any edits that intersect this layer to the layer's syntax tree. - let tree_edit = if edit.new.start.0 >= start_byte { - tree_sitter::InputEdit { - start_byte: edit.new.start.0 - start_byte, - old_end_byte: edit.new.start.0 - start_byte - + (edit.old.end.0 - edit.old.start.0), - new_end_byte: edit.new.end.0 - start_byte, - start_position: (edit.new.start.1 - start_point).to_ts_point(), - old_end_position: (edit.new.start.1 - start_point - + (edit.old.end.1 - edit.old.start.1)) - .to_ts_point(), - new_end_position: (edit.new.end.1 - start_point).to_ts_point(), - } + // Ignore edits that end before the start of this layer, and don't consider them + // for any subsequent layers at this same depth. + loop { + if let Some(edit) = edits_for_depth.first() { + if edit.new.end.0 < start_byte { + edits_for_depth = &edits_for_depth[1..]; } else { - tree_sitter::InputEdit { - start_byte: 0, - old_end_byte: edit.new.end.0 - start_byte, - new_end_byte: 0, - start_position: Default::default(), - old_end_position: (edit.new.end.1 - start_point).to_ts_point(), - new_end_position: Default::default(), - } - }; - - layer.tree.edit(&tree_edit); - if edit.new.start.0 < start_byte { break; } + } else { + continue 'outer; + } + } + + let mut layer = layer.clone(); + for edit in edits_for_depth { + // Ignore any edits that follow this layer. + if edit.new.start.0 > end_byte { + break; } - layers.push(layer, text); - cursor.next(text); + // Apply any edits that intersect this layer to the layer's syntax tree. + let tree_edit = if edit.new.start.0 >= start_byte { + tree_sitter::InputEdit { + start_byte: edit.new.start.0 - start_byte, + old_end_byte: edit.new.start.0 - start_byte + + (edit.old.end.0 - edit.old.start.0), + new_end_byte: edit.new.end.0 - start_byte, + start_position: (edit.new.start.1 - start_point).to_ts_point(), + old_end_position: (edit.new.start.1 - start_point + + (edit.old.end.1 - edit.old.start.1)) + .to_ts_point(), + new_end_position: (edit.new.end.1 - start_point).to_ts_point(), + } + } else { + tree_sitter::InputEdit { + start_byte: 0, + old_end_byte: edit.new.end.0 - start_byte, + new_end_byte: 0, + start_position: Default::default(), + old_end_position: (edit.new.end.1 - start_point).to_ts_point(), + new_end_position: Default::default(), + } + }; + + layer.tree.edit(&tree_edit); + if edit.new.start.0 < start_byte { + break; + } } + + layers.push(layer, text); + cursor.next(text); } layers.push_tree(cursor.suffix(&text), &text); @@ -958,6 +964,31 @@ mod tests { ]); } + #[gpui::test] + fn test_edits_preceding_and_intersecting_injection() { + test_edit_sequence(&[ + // + "const aaaaaaaaaaaa: B = c!(d(e.f));", + "const aˇa: B = c!(d(eˇ));", + ]); + } + + #[gpui::test] + fn test_non_local_changes_create_injections() { + test_edit_sequence(&[ + " + // a! { + static B: C = d; + // } + ", + " + ˇa! { + static B: C = d; + ˇ} + ", + ]); + } + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(rust_lang()); @@ -1084,12 +1115,20 @@ mod tests { ranges.push(0..new_text.len()); } + assert_eq!( + old_text[..ranges[0].start], + new_text[..ranges[0].start], + "invalid edit" + ); + let mut delta = 0; let mut edits = Vec::new(); let mut ranges = ranges.into_iter().peekable(); while let Some(inserted_range) = ranges.next() { - let old_start = (inserted_range.start as isize - delta) as usize; + let new_start = inserted_range.start; + let old_start = (new_start as isize - delta) as usize; + let following_text = if let Some(next_range) = ranges.peek() { &new_text[inserted_range.end..next_range.start] } else { From 71e17a54ae9d3bd43de2f54733f6e505a138458e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 23 Aug 2022 09:38:03 -0700 Subject: [PATCH 07/22] Fix case where layers were processed linearly when reparsing --- crates/language/src/syntax_map.rs | 208 +++++++++++++++++++++--------- 1 file changed, 145 insertions(+), 63 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index a5ffa9964a..8800bb5cd8 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -63,6 +63,9 @@ struct ChangedRegion { range: Range, } +#[derive(Default)] +struct ChangeRegionSet(Vec); + impl SyntaxMap { pub fn new() -> Self { Self::default() @@ -124,14 +127,12 @@ impl SyntaxSnapshot { // If this layer follows all of the edits, then preserve it and any // subsequent layers at this same depth. else { - layers.push_tree( - cursor.slice( - &DepthAndRange(depth + 1, Anchor::MIN..Anchor::MAX), - Bias::Left, - text, - ), + let slice = cursor.slice( + &DepthAndRange(depth + 1, Anchor::MIN..Anchor::MAX), + Bias::Left, text, ); + layers.push_tree(slice, text); edits_for_depth = &edits[..]; continue; }; @@ -226,7 +227,7 @@ impl SyntaxSnapshot { cursor.next(&text); let mut layers = SumTree::new(); - let mut changed_regions = Vec::::new(); + let mut changed_regions = ChangeRegionSet::default(); let mut queue = BinaryHeap::new(); queue.push(ReparseStep { depth: 0, @@ -245,18 +246,19 @@ impl SyntaxSnapshot { let target = DepthAndRange(depth, range.clone()); let mut done = cursor.item().is_none(); - while !done && target.cmp(cursor.start(), &text).is_gt() { - let bounded_target = DepthAndRangeOrMaxPosition( - target.clone(), - changed_regions - .first() - .map_or(DepthAndMaxPosition(usize::MAX, Anchor::MAX), |region| { - DepthAndMaxPosition(region.depth, region.range.start) - }), - ); + while !done && target.cmp(&cursor.end(text), &text).is_gt() { + done = true; + + let bounded_target = + DepthAndRangeOrMaxPosition(target.clone(), changed_regions.start_position()); if bounded_target.cmp(&cursor.start(), &text).is_gt() { let slice = cursor.slice(&bounded_target, Bias::Left, text); - layers.push_tree(slice, &text); + if !slice.is_empty() { + layers.push_tree(slice, &text); + if changed_regions.prune(cursor.end(text), text) { + done = false; + } + } } while target.cmp(&cursor.end(text), text).is_gt() { @@ -266,30 +268,23 @@ impl SyntaxSnapshot { break; }; - if layer_is_changed(layer, text, &changed_regions) { - ChangedRegion { - depth: depth + 1, - range: layer.range.clone(), - } - .insert(text, &mut changed_regions); + if changed_regions.intersects(&layer, text) { + changed_regions.insert( + ChangedRegion { + depth: depth + 1, + range: layer.range.clone(), + }, + text, + ); } else { layers.push(layer.clone(), text); } - cursor.next(text); - } - done = true; - changed_regions.retain(|region| { - if region.depth > depth - || (region.depth == depth - && region.range.end.cmp(&range.start, text).is_gt()) - { - true - } else { + cursor.next(text); + if changed_regions.prune(cursor.end(text), text) { done = false; - false } - }); + } } let (ranges, language) = if let Some(step) = step { @@ -366,11 +361,13 @@ impl SyntaxSnapshot { ) { let depth = depth + 1; for range in &changed_ranges { - ChangedRegion { - depth, - range: text.anchor_before(range.start)..text.anchor_after(range.end), - } - .insert(text, &mut changed_regions); + changed_regions.insert( + ChangedRegion { + depth, + range: text.anchor_before(range.start)..text.anchor_after(range.end), + }, + text, + ); } get_injections( config, @@ -571,19 +568,6 @@ fn get_injections( result } -fn layer_is_changed( - layer: &SyntaxLayer, - text: &BufferSnapshot, - changed_regions: &[ChangedRegion], -) -> bool { - changed_regions.iter().any(|region| { - let same_depth = region.depth == layer.depth; - let is_before_layer = region.range.end.cmp(&layer.range.start, text).is_le(); - let is_after_layer = region.range.start.cmp(&layer.range.end, text).is_ge(); - same_depth && !is_before_layer && !is_after_layer - }) -} - impl std::ops::Deref for SyntaxMap { type Target = SyntaxSnapshot; @@ -625,12 +609,6 @@ impl ReparseStep { } impl ChangedRegion { - fn insert(self, text: &BufferSnapshot, set: &mut Vec) { - if let Err(ix) = set.binary_search_by(|probe| probe.cmp(&self, text)) { - set.insert(ix, self); - } - } - fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering { let range_a = &self.range; let range_b = &other.range; @@ -640,6 +618,55 @@ impl ChangedRegion { } } +impl ChangeRegionSet { + fn start_position(&self) -> DepthAndMaxPosition { + self.0 + .first() + .map_or(DepthAndMaxPosition(usize::MAX, Anchor::MAX), |region| { + DepthAndMaxPosition(region.depth, region.range.start) + }) + } + + fn intersects(&self, layer: &SyntaxLayer, text: &BufferSnapshot) -> bool { + for region in &self.0 { + if region.depth < layer.depth { + continue; + } + if region.depth > layer.depth { + break; + } + if region.range.end.cmp(&layer.range.start, text).is_le() { + continue; + } + if region.range.start.cmp(&layer.range.end, text).is_ge() { + break; + } + return true; + } + false + } + + fn insert(&mut self, region: ChangedRegion, text: &BufferSnapshot) { + if let Err(ix) = self.0.binary_search_by(|probe| probe.cmp(®ion, text)) { + self.0.insert(ix, region); + } + } + + fn prune(&mut self, summary: SyntaxLayerSummary, text: &BufferSnapshot) -> bool { + let prev_len = self.0.len(); + self.0.retain(|region| { + region.depth > summary.max_depth + || (region.depth == summary.max_depth + && region + .range + .end + .cmp(&summary.last_layer_range.start, text) + .is_gt()) + }); + self.0.len() < prev_len + } +} + impl Default for SyntaxLayerSummary { fn default() -> Self { Self { @@ -866,16 +893,18 @@ mod tests { d!(D {}); e!(E {}); f!(F {}); + g!(G {}); } ", " fn a() { b!(B {}); c!(C {}); - «g!(G {}); - »d!(D {}); - e!(E {}); + d!(D {}); + « h!(H {}); + » e!(E {}); f!(F {}); + g!(G {}); } ", ]); @@ -888,10 +917,11 @@ mod tests { fn a() { b!(«B {}»); c!(«C {}»); - g!(«G {}»); d!(«D {}»); + h!(«H {}»); e!(«E {}»); f!(«F {}»); + g!(«G {}»); } ", ); @@ -989,6 +1019,58 @@ mod tests { ]); } + #[gpui::test] + fn test_creating_many_injections_in_one_edit() { + test_edit_sequence(&[ + " + fn a() { + one(Two::three(3)); + four(Five::six(6)); + seven(Eight::nine(9)); + } + ", + " + fn a() { + one«!»(Two::three(3)); + four«!»(Five::six(6)); + seven«!»(Eight::nine(9)); + } + ", + " + fn a() { + one!(Two::three«!»(3)); + four!(Five::six«!»(6)); + seven!(Eight::nine«!»(9)); + } + ", + ]); + } + + #[gpui::test] + fn test_editing_across_injection_boundary() { + test_edit_sequence(&[ + " + fn one() { + two(); + three!( + three.four, + five.six, + ); + } + ", + " + fn one() { + two(); + th«irty_five![» + three.four, + five.six, + « seven.eight, + ];» + } + ", + ]); + } + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(rust_lang()); From 9113c94371430ef07fb412aa766ae77db7e164a9 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 23 Aug 2022 14:26:09 -0700 Subject: [PATCH 08/22] Add SyntaxMap methods for running queries and combining their results --- crates/language/src/syntax_map.rs | 241 +++++++++++++++++++++++++++--- 1 file changed, 224 insertions(+), 17 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 8800bb5cd8..a578d36a38 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -3,11 +3,19 @@ use crate::{ ToTreeSitterPoint, }; use std::{ - borrow::Cow, cell::RefCell, cmp::Ordering, collections::BinaryHeap, ops::Range, sync::Arc, + borrow::Cow, + cell::RefCell, + cmp::{Ordering, Reverse}, + collections::BinaryHeap, + iter::Peekable, + ops::{DerefMut, Range}, + sync::Arc, }; use sum_tree::{Bias, SeekTarget, SumTree}; use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; -use tree_sitter::{Node, Parser, Tree}; +use tree_sitter::{ + Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatch, QueryMatches, Tree, +}; thread_local! { static PARSER: RefCell = RefCell::new(Parser::new()); @@ -26,6 +34,42 @@ pub struct SyntaxSnapshot { layers: SumTree, } +pub struct SyntaxMapCaptures<'a> { + layers: Vec>, +} + +pub struct SyntaxMapMatches<'a> { + layers: Vec>, +} + +pub struct SyntaxMapCapture<'a> { + pub grammar: &'a Grammar, + pub depth: usize, + pub node: Node<'a>, + pub index: u32, +} + +pub struct SyntaxMapMatch<'a> { + pub grammar: &'a Grammar, + pub depth: usize, + pub pattern_index: usize, + pub captures: &'a [QueryCapture<'a>], +} + +struct SyntaxMapCapturesLayer<'a> { + depth: usize, + captures: Peekable>>, + grammar: &'a Grammar, + _query_cursor: QueryCursorHandle, +} + +struct SyntaxMapMatchesLayer<'a> { + depth: usize, + matches: Peekable>>, + grammar: &'a Grammar, + _query_cursor: QueryCursorHandle, +} + #[derive(Clone)] struct SyntaxLayer { depth: usize, @@ -385,6 +429,100 @@ impl SyntaxSnapshot { self.layers = layers; } + pub fn captures<'a>( + &'a self, + range: Range, + buffer: &'a BufferSnapshot, + query: impl Fn(&Grammar) -> Option<&Query>, + ) -> SyntaxMapCaptures { + let mut result = SyntaxMapCaptures { layers: Vec::new() }; + for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) { + let query = if let Some(query) = query(grammar) { + query + } else { + continue; + }; + + let mut query_cursor = QueryCursorHandle::new(); + + // TODO - add a Tree-sitter API to remove the need for this. + let cursor = unsafe { + std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) + }; + + cursor.set_byte_range(range.clone()); + let captures = cursor.captures(query, node, TextProvider(buffer.as_rope())); + let mut layer = SyntaxMapCapturesLayer { + depth, + grammar, + captures: captures.peekable(), + _query_cursor: query_cursor, + }; + + if let Some(key) = layer.sort_key() { + let mut ix = 0; + while let Some(next_layer) = result.layers.get_mut(ix) { + if let Some(next_key) = next_layer.sort_key() { + if key > next_key { + ix += 1; + continue; + } + } + break; + } + result.layers.insert(ix, layer); + } + } + result + } + + pub fn matches<'a>( + &'a self, + range: Range, + buffer: &'a BufferSnapshot, + query: impl Fn(&Grammar) -> Option<&Query>, + ) -> SyntaxMapMatches { + let mut result = SyntaxMapMatches { layers: Vec::new() }; + for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) { + let query = if let Some(query) = query(grammar) { + query + } else { + continue; + }; + + let mut query_cursor = QueryCursorHandle::new(); + + // TODO - add a Tree-sitter API to remove the need for this. + let cursor = unsafe { + std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) + }; + + cursor.set_byte_range(range.clone()); + let matches = cursor.matches(query, node, TextProvider(buffer.as_rope())); + let mut layer = SyntaxMapMatchesLayer { + depth, + grammar, + matches: matches.peekable(), + _query_cursor: query_cursor, + }; + + if let Some(key) = layer.sort_key() { + let mut ix = 0; + while let Some(next_layer) = result.layers.get_mut(ix) { + if let Some(next_key) = next_layer.sort_key() { + if key > next_key { + ix += 1; + continue; + } + } + break; + } + result.layers.insert(ix, layer); + } + } + result + } + pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, Node)> { self.layers .iter() @@ -408,7 +546,7 @@ impl SyntaxSnapshot { &self, range: Range, buffer: &BufferSnapshot, - ) -> Vec<(&Grammar, Node)> { + ) -> Vec<(&Grammar, usize, Node)> { let start = buffer.anchor_before(range.start.to_offset(buffer)); let end = buffer.anchor_after(range.end.to_offset(buffer)); @@ -424,6 +562,7 @@ impl SyntaxSnapshot { if let Some(grammar) = &layer.language.grammar { result.push(( grammar.as_ref(), + layer.depth, layer.tree.root_node_with_offset( layer.range.start.to_offset(buffer), layer.range.start.to_point(buffer).to_ts_point(), @@ -437,6 +576,60 @@ impl SyntaxSnapshot { } } +impl<'a> Iterator for SyntaxMapCaptures<'a> { + type Item = SyntaxMapCapture<'a>; + + fn next(&mut self) -> Option { + let layer = self.layers.first_mut()?; + let (mat, ix) = layer.captures.next()?; + + let capture = mat.captures[ix as usize]; + let grammar = layer.grammar; + let depth = layer.depth; + + if let Some(key) = layer.sort_key() { + let mut i = 1; + while let Some(later_layer) = self.layers.get_mut(i) { + if let Some(later_key) = later_layer.sort_key() { + if key > later_key { + i += 1; + continue; + } + } + break; + } + if i > 1 { + self.layers[0..i].rotate_left(1); + } + } else { + self.layers.remove(0); + } + + Some(SyntaxMapCapture { + grammar, + depth, + node: capture.node, + index: capture.index, + }) + } +} + +impl<'a> SyntaxMapCapturesLayer<'a> { + fn sort_key(&mut self) -> Option<(usize, Reverse, usize)> { + let (mat, ix) = self.captures.peek()?; + let range = &mat.captures[*ix].node.byte_range(); + Some((range.start, Reverse(range.end), self.depth)) + } +} + +impl<'a> SyntaxMapMatchesLayer<'a> { + fn sort_key(&mut self) -> Option<(usize, Reverse, usize)> { + let mat = self.matches.peek()?; + let range = mat.captures.first()?.node.start_byte()..mat.captures.last()?.node.end_byte(); + Some((range.start, Reverse(range.end), self.depth)) + } +} + fn join_ranges( a: impl Iterator>, b: impl Iterator>, @@ -875,10 +1068,10 @@ mod tests { "fn a() { dbg!(b.c(vec![d.«e»])) }", ]); - assert_node_ranges( + assert_capture_ranges( &syntax_map, &buffer, - "(field_identifier) @_", + &["field"], "fn a() { dbg!(b.«c»(vec![d.«e»])) }", ); } @@ -909,10 +1102,10 @@ mod tests { ", ]); - assert_node_ranges( + assert_capture_ranges( &syntax_map, &buffer, - "(struct_expression) @_", + &["struct"], " fn a() { b!(«B {}»); @@ -952,10 +1145,10 @@ mod tests { ", ]); - assert_node_ranges( + assert_capture_ranges( &syntax_map, &buffer, - "(field_identifier) @_", + &["field"], " fn a() { b!( @@ -1129,6 +1322,13 @@ mod tests { }, Some(tree_sitter_rust::language()), ) + .with_highlights_query( + r#" + (field_identifier) @field + (struct_expression) @struct + "#, + ) + .unwrap() .with_injection_query( r#" (macro_invocation @@ -1156,7 +1356,7 @@ mod tests { expected_layers.len(), "wrong number of layers" ); - for (i, ((_, node), expected_s_exp)) in + for (i, ((_, _, node), expected_s_exp)) in layers.iter().zip(expected_layers.iter()).enumerate() { let actual_s_exp = node.to_sexp(); @@ -1170,18 +1370,25 @@ mod tests { } } - fn assert_node_ranges( + fn assert_capture_ranges( syntax_map: &SyntaxMap, buffer: &BufferSnapshot, - query: &str, + highlight_query_capture_names: &[&str], marked_string: &str, ) { - let mut cursor = QueryCursorHandle::new(); let mut actual_ranges = Vec::>::new(); - for (grammar, node) in syntax_map.layers(buffer) { - let query = Query::new(grammar.ts_language, query).unwrap(); - for (mat, ix) in cursor.captures(&query, node, TextProvider(buffer.as_rope())) { - actual_ranges.push(mat.captures[ix].node.byte_range()); + for capture in syntax_map.captures(0..buffer.len(), buffer, |grammar| { + grammar.highlights_query.as_ref() + }) { + let name = &capture + .grammar + .highlights_query + .as_ref() + .unwrap() + .capture_names()[capture.index as usize]; + dbg!(capture.node, capture.index, name); + if highlight_query_capture_names.contains(&name.as_str()) { + actual_ranges.push(capture.node.byte_range()); } } From ced45cbb0a16e0fa45a999d64dc7bc9dbc1e57fa Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 23 Aug 2022 17:09:13 -0700 Subject: [PATCH 09/22] Use SyntaxMap in Buffer --- crates/language/src/buffer.rs | 616 ++++++++----------- crates/language/src/language.rs | 144 ++++- crates/language/src/syntax_map.rs | 591 +++++++++++++----- crates/language/src/tests.rs | 4 +- crates/project/src/project.rs | 1 + crates/zed/src/languages/rust/injections.scm | 6 +- 6 files changed, 830 insertions(+), 532 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index b7a1bd30fc..7b298b7420 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -6,13 +6,15 @@ pub use crate::{ use crate::{ diagnostic_set::{DiagnosticEntry, DiagnosticGroup}, outline::OutlineItem, + syntax_map::{ + SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures, SyntaxSnapshot, ToTreeSitterPoint, + }, CodeLabel, Outline, }; use anyhow::{anyhow, Result}; use clock::ReplicaId; use futures::FutureExt as _; use gpui::{fonts::HighlightStyle, AppContext, Entity, ModelContext, MutableAppContext, Task}; -use lazy_static::lazy_static; use parking_lot::Mutex; use settings::Settings; use similar::{ChangeTag, TextDiff}; @@ -25,7 +27,7 @@ use std::{ future::Future, iter::{self, Iterator, Peekable}, mem, - ops::{Deref, DerefMut, Range}, + ops::{Deref, Range}, path::{Path, PathBuf}, str, sync::Arc, @@ -36,7 +38,6 @@ use sum_tree::TreeMap; use text::operation_queue::OperationQueue; pub use text::{Buffer as TextBuffer, BufferSnapshot as TextBufferSnapshot, Operation as _, *}; use theme::SyntaxTheme; -use tree_sitter::{InputEdit, QueryCursor, Tree}; use util::TryFutureExt as _; #[cfg(any(test, feature = "test-support"))] @@ -44,10 +45,6 @@ pub use {tree_sitter_rust, tree_sitter_typescript}; pub use lsp::DiagnosticSeverity; -lazy_static! { - static ref QUERY_CURSORS: Mutex> = Default::default(); -} - pub struct Buffer { text: TextBuffer, file: Option>, @@ -60,7 +57,7 @@ pub struct Buffer { autoindent_requests: Vec>, pending_autoindent: Option>, sync_parse_timeout: Duration, - syntax_tree: Mutex>, + syntax_map: Mutex, parsing_in_background: bool, parse_count: usize, diagnostics: DiagnosticSet, @@ -75,7 +72,7 @@ pub struct Buffer { pub struct BufferSnapshot { text: text::BufferSnapshot, - tree: Option, + syntax: SyntaxSnapshot, file: Option>, diagnostics: DiagnosticSet, diagnostics_update_count: usize, @@ -221,14 +218,6 @@ pub trait LocalFile: File { ); } -pub(crate) struct QueryCursorHandle(Option); - -#[derive(Clone)] -struct SyntaxTree { - tree: Tree, - version: clock::Global, -} - #[derive(Clone, Debug)] pub enum AutoindentMode { /// Indent each line of inserted text. @@ -268,14 +257,11 @@ struct IndentSuggestion { delta: Ordering, } -pub(crate) struct TextProvider<'a>(pub(crate) &'a Rope); - struct BufferChunkHighlights<'a> { - captures: tree_sitter::QueryCaptures<'a, 'a, TextProvider<'a>>, - next_capture: Option<(tree_sitter::QueryMatch<'a, 'a>, usize)>, + captures: SyntaxMapCaptures<'a>, + next_capture: Option>, stack: Vec<(usize, HighlightId)>, - highlight_map: HighlightMap, - _query_cursor: QueryCursorHandle, + highlight_maps: Vec, } pub struct BufferChunks<'a> { @@ -456,7 +442,7 @@ impl Buffer { was_dirty_before_starting_transaction: None, text: buffer, file, - syntax_tree: Mutex::new(None), + syntax_map: Mutex::new(SyntaxMap::new()), parsing_in_background: false, parse_count: 0, sync_parse_timeout: Duration::from_millis(1), @@ -477,7 +463,7 @@ impl Buffer { pub fn snapshot(&self) -> BufferSnapshot { BufferSnapshot { text: self.text.snapshot(), - tree: self.syntax_tree(), + syntax: self.syntax_map(), file: self.file.clone(), remote_selections: self.remote_selections.clone(), diagnostics: self.diagnostics.clone(), @@ -533,11 +519,17 @@ impl Buffer { } pub fn set_language(&mut self, language: Option>, cx: &mut ModelContext) { - *self.syntax_tree.lock() = None; + self.syntax_map.lock().clear(); self.language = language; self.reparse(cx); } + pub fn set_language_registry(&mut self, language_registry: Arc) { + self.syntax_map + .lock() + .set_language_registry(language_registry); + } + pub fn did_save( &mut self, version: clock::Global, @@ -682,13 +674,10 @@ impl Buffer { self.file_update_count } - pub(crate) fn syntax_tree(&self) -> Option { - if let Some(syntax_tree) = self.syntax_tree.lock().as_mut() { - self.interpolate_tree(syntax_tree); - Some(syntax_tree.tree.clone()) - } else { - None - } + pub(crate) fn syntax_map(&self) -> SyntaxSnapshot { + let mut syntax_map = self.syntax_map.lock(); + syntax_map.interpolate(&self.text_snapshot()); + syntax_map.snapshot() } #[cfg(any(test, feature = "test-support"))] @@ -706,35 +695,49 @@ impl Buffer { return false; } - if let Some(grammar) = self.grammar().cloned() { - let old_tree = self.syntax_tree(); - let text = self.as_rope().clone(); + if let Some(language) = self.language.clone() { + let text = self.text_snapshot(); let parsed_version = self.version(); + + let mut syntax_map; + let language_registry; + let syntax_map_version; + { + let mut map = self.syntax_map.lock(); + map.interpolate(&text); + language_registry = map.language_registry(); + syntax_map = map.snapshot(); + syntax_map_version = map.parsed_version(); + } let parse_task = cx.background().spawn({ - let grammar = grammar.clone(); - async move { grammar.parse_text(&text, old_tree) } + let language = language.clone(); + async move { + syntax_map.reparse(&syntax_map_version, &text, language_registry, language); + syntax_map + } }); match cx .background() .block_with_timeout(self.sync_parse_timeout, parse_task) { - Ok(new_tree) => { - self.did_finish_parsing(new_tree, parsed_version, cx); + Ok(new_syntax_map) => { + self.did_finish_parsing(new_syntax_map, parsed_version, cx); return true; } Err(parse_task) => { self.parsing_in_background = true; cx.spawn(move |this, mut cx| async move { - let new_tree = parse_task.await; + let new_syntax_map = parse_task.await; this.update(&mut cx, move |this, cx| { - let grammar_changed = this - .grammar() - .map_or(true, |curr_grammar| !Arc::ptr_eq(&grammar, curr_grammar)); + let grammar_changed = + this.language.as_ref().map_or(true, |current_language| { + !Arc::ptr_eq(&language, current_language) + }); let parse_again = this.version.changed_since(&parsed_version) || grammar_changed; this.parsing_in_background = false; - this.did_finish_parsing(new_tree, parsed_version, cx); + this.did_finish_parsing(new_syntax_map, parsed_version, cx); if parse_again && this.reparse(cx) {} }); @@ -746,30 +749,14 @@ impl Buffer { false } - fn interpolate_tree(&self, tree: &mut SyntaxTree) { - for edit in self.edits_since::<(usize, Point)>(&tree.version) { - let (bytes, lines) = edit.flatten(); - tree.tree.edit(&InputEdit { - start_byte: bytes.new.start, - old_end_byte: bytes.new.start + bytes.old.len(), - new_end_byte: bytes.new.end, - start_position: lines.new.start.to_ts_point(), - old_end_position: (lines.new.start + (lines.old.end - lines.old.start)) - .to_ts_point(), - new_end_position: lines.new.end.to_ts_point(), - }); - } - tree.version = self.version(); - } - fn did_finish_parsing( &mut self, - tree: Tree, + syntax_map: SyntaxSnapshot, version: clock::Global, cx: &mut ModelContext, ) { self.parse_count += 1; - *self.syntax_tree.lock() = Some(SyntaxTree { tree, version }); + self.syntax_map.lock().did_parse(syntax_map, version); self.request_autoindent(cx); cx.emit(Event::Reparsed); cx.notify(); @@ -808,10 +795,7 @@ impl Buffer { fn compute_autoindents(&self) -> Option>> { let max_rows_between_yields = 100; let snapshot = self.snapshot(); - if snapshot.language.is_none() - || snapshot.tree.is_none() - || self.autoindent_requests.is_empty() - { + if snapshot.syntax.is_empty() || self.autoindent_requests.is_empty() { return None; } @@ -1310,10 +1294,6 @@ impl Buffer { cx.notify(); } - fn grammar(&self) -> Option<&Arc> { - self.language.as_ref().and_then(|l| l.grammar.as_ref()) - } - pub fn apply_ops>( &mut self, ops: I, @@ -1654,32 +1634,30 @@ impl BufferSnapshot { let prev_non_blank_row = self.prev_non_blank_row(row_range.start); // Find the suggested indentation ranges based on the syntax tree. - let indents_query = grammar.indents_query.as_ref()?; - let mut query_cursor = QueryCursorHandle::new(); - let indent_capture_ix = indents_query.capture_index_for_name("indent"); - let end_capture_ix = indents_query.capture_index_for_name("end"); - query_cursor.set_point_range( - Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0).to_ts_point() - ..Point::new(row_range.end, 0).to_ts_point(), - ); + let start = Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0); + let end = Point::new(row_range.end, 0); + let range = (start..end).to_offset(&self.text); + let mut matches = self.syntax.matches(range, &self.text, |grammar| { + Some(&grammar.indents_config.as_ref()?.query) + }); let mut indent_ranges = Vec::>::new(); - for mat in query_cursor.matches( - indents_query, - self.tree.as_ref()?.root_node(), - TextProvider(self.as_rope()), - ) { + while let Some(mat) = matches.peek() { let mut start: Option = None; let mut end: Option = None; - for capture in mat.captures { - if Some(capture.index) == indent_capture_ix { - start.get_or_insert(Point::from_ts_point(capture.node.start_position())); - end.get_or_insert(Point::from_ts_point(capture.node.end_position())); - } else if Some(capture.index) == end_capture_ix { - end = Some(Point::from_ts_point(capture.node.start_position())); + + if let Some(config) = &grammar.indents_config { + for capture in mat.captures { + if capture.index == config.indent_capture_ix { + start.get_or_insert(Point::from_ts_point(capture.node.start_position())); + end.get_or_insert(Point::from_ts_point(capture.node.end_position())); + } else if Some(capture.index) == config.end_capture_ix { + end = Some(Point::from_ts_point(capture.node.start_position())); + } } } + matches.advance(); if let Some((start, end)) = start.zip(end) { if start.row == end.row { continue; @@ -1811,10 +1789,18 @@ impl BufferSnapshot { pub fn chunks(&self, range: Range, language_aware: bool) -> BufferChunks { let range = range.start.to_offset(self)..range.end.to_offset(self); - let mut tree = None; + let mut syntax = None; let mut diagnostic_endpoints = Vec::new(); if language_aware { - tree = self.tree.as_ref(); + let captures = self.syntax.captures(range.clone(), &self.text, |grammar| { + grammar.highlights_query.as_ref() + }); + let highlight_maps = captures + .grammars() + .into_iter() + .map(|grammar| grammar.highlight_map()) + .collect(); + syntax = Some((captures, highlight_maps)); for entry in self.diagnostics_in_range::<_, usize>(range.clone(), false) { diagnostic_endpoints.push(DiagnosticEndpoint { offset: entry.range.start, @@ -1833,13 +1819,7 @@ impl BufferSnapshot { .sort_unstable_by_key(|endpoint| (endpoint.offset, !endpoint.is_start)); } - BufferChunks::new( - self.text.as_rope(), - range, - tree, - self.grammar(), - diagnostic_endpoints, - ) + BufferChunks::new(self.text.as_rope(), range, syntax, diagnostic_endpoints) } pub fn for_each_line(&self, range: Range, mut callback: impl FnMut(u32, &str)) { @@ -1865,12 +1845,6 @@ impl BufferSnapshot { self.language.as_ref() } - fn grammar(&self) -> Option<&Arc> { - self.language - .as_ref() - .and_then(|language| language.grammar.as_ref()) - } - pub fn surrounding_word(&self, start: T) -> (Range, Option) { let mut start = start.to_offset(self); let mut end = start; @@ -1901,61 +1875,71 @@ impl BufferSnapshot { } pub fn range_for_syntax_ancestor(&self, range: Range) -> Option> { - let tree = self.tree.as_ref()?; let range = range.start.to_offset(self)..range.end.to_offset(self); - let mut cursor = tree.root_node().walk(); + let mut result: Option> = None; + 'outer: for (_, _, node) in self.syntax.layers_for_range(range.clone(), &self.text) { + let mut cursor = node.walk(); - // Descend to the first leaf that touches the start of the range, - // and if the range is non-empty, extends beyond the start. - while cursor.goto_first_child_for_byte(range.start).is_some() { - if !range.is_empty() && cursor.node().end_byte() == range.start { - cursor.goto_next_sibling(); + // Descend to the first leaf that touches the start of the range, + // and if the range is non-empty, extends beyond the start. + while cursor.goto_first_child_for_byte(range.start).is_some() { + if !range.is_empty() && cursor.node().end_byte() == range.start { + cursor.goto_next_sibling(); + } } - } - // Ascend to the smallest ancestor that strictly contains the range. - loop { - let node_range = cursor.node().byte_range(); - if node_range.start <= range.start - && node_range.end >= range.end - && node_range.len() > range.len() - { - break; - } - if !cursor.goto_parent() { - break; - } - } - - let left_node = cursor.node(); - - // For an empty range, try to find another node immediately to the right of the range. - if left_node.end_byte() == range.start { - let mut right_node = None; - while !cursor.goto_next_sibling() { + // Ascend to the smallest ancestor that strictly contains the range. + loop { + let node_range = cursor.node().byte_range(); + if node_range.start <= range.start + && node_range.end >= range.end + && node_range.len() > range.len() + { + break; + } if !cursor.goto_parent() { - break; + continue 'outer; } } - while cursor.node().start_byte() == range.start { - right_node = Some(cursor.node()); - if !cursor.goto_first_child() { - break; + let left_node = cursor.node(); + let mut layer_result = left_node.byte_range(); + + // For an empty range, try to find another node immediately to the right of the range. + if left_node.end_byte() == range.start { + let mut right_node = None; + while !cursor.goto_next_sibling() { + if !cursor.goto_parent() { + break; + } + } + + while cursor.node().start_byte() == range.start { + right_node = Some(cursor.node()); + if !cursor.goto_first_child() { + break; + } + } + + // If there is a candidate node on both sides of the (empty) range, then + // decide between the two by favoring a named node over an anonymous token. + // If both nodes are the same in that regard, favor the right one. + if let Some(right_node) = right_node { + if right_node.is_named() || !left_node.is_named() { + layer_result = right_node.byte_range(); + } } } - // If there is a candidate node on both sides of the (empty) range, then - // decide between the two by favoring a named node over an anonymous token. - // If both nodes are the same in that regard, favor the right one. - if let Some(right_node) = right_node { - if right_node.is_named() || !left_node.is_named() { - return Some(right_node.byte_range()); + if let Some(previous_result) = &result { + if previous_result.len() < layer_result.len() { + continue; } } + result = Some(layer_result); } - Some(left_node.byte_range()) + result } pub fn outline(&self, theme: Option<&SyntaxTheme>) -> Option> { @@ -1985,109 +1969,107 @@ impl BufferSnapshot { range: Range, theme: Option<&SyntaxTheme>, ) -> Option>> { - let tree = self.tree.as_ref()?; - let grammar = self - .language - .as_ref() - .and_then(|language| language.grammar.as_ref())?; - - let outline_query = grammar.outline_query.as_ref()?; - let mut cursor = QueryCursorHandle::new(); - cursor.set_byte_range(range.clone()); - let matches = cursor.matches( - outline_query, - tree.root_node(), - TextProvider(self.as_rope()), - ); + let mut matches = self.syntax.matches(range.clone(), &self.text, |grammar| { + grammar.outline_config.as_ref().map(|c| &c.query) + }); + let configs = matches + .grammars() + .iter() + .map(|g| g.outline_config.as_ref().unwrap()) + .collect::>(); let mut chunks = self.chunks(0..self.len(), true); - - let item_capture_ix = outline_query.capture_index_for_name("item")?; - let name_capture_ix = outline_query.capture_index_for_name("name")?; - let context_capture_ix = outline_query - .capture_index_for_name("context") - .unwrap_or(u32::MAX); - let mut stack = Vec::>::new(); - let items = matches - .filter_map(|mat| { - let item_node = mat.nodes_for_capture_index(item_capture_ix).next()?; - let item_range = item_node.start_byte()..item_node.end_byte(); - if item_range.end < range.start || item_range.start > range.end { - return None; + let mut items = Vec::new(); + while let Some(mat) = matches.peek() { + let config = &configs[mat.grammar_index]; + let item_node = mat.captures.iter().find_map(|cap| { + if cap.index == config.item_capture_ix { + Some(cap.node) + } else { + None } - let mut text = String::new(); - let mut name_ranges = Vec::new(); - let mut highlight_ranges = Vec::new(); + })?; - for capture in mat.captures { - let node_is_name; - if capture.index == name_capture_ix { - node_is_name = true; - } else if capture.index == context_capture_ix { - node_is_name = false; + let item_range = item_node.byte_range(); + if item_range.end < range.start || item_range.start > range.end { + matches.advance(); + continue; + } + + // TODO - move later, after processing captures + + let mut text = String::new(); + let mut name_ranges = Vec::new(); + let mut highlight_ranges = Vec::new(); + for capture in mat.captures { + let node_is_name; + if capture.index == config.name_capture_ix { + node_is_name = true; + } else if Some(capture.index) == config.context_capture_ix { + node_is_name = false; + } else { + continue; + } + + let range = capture.node.start_byte()..capture.node.end_byte(); + if !text.is_empty() { + text.push(' '); + } + if node_is_name { + let mut start = text.len(); + let end = start + range.len(); + + // When multiple names are captured, then the matcheable text + // includes the whitespace in between the names. + if !name_ranges.is_empty() { + start -= 1; + } + + name_ranges.push(start..end); + } + + let mut offset = range.start; + chunks.seek(offset); + for mut chunk in chunks.by_ref() { + if chunk.text.len() > range.end - offset { + chunk.text = &chunk.text[0..(range.end - offset)]; + offset = range.end; } else { - continue; + offset += chunk.text.len(); } - - let range = capture.node.start_byte()..capture.node.end_byte(); - if !text.is_empty() { - text.push(' '); + let style = chunk + .syntax_highlight_id + .zip(theme) + .and_then(|(highlight, theme)| highlight.style(theme)); + if let Some(style) = style { + let start = text.len(); + let end = start + chunk.text.len(); + highlight_ranges.push((start..end, style)); } - if node_is_name { - let mut start = text.len(); - let end = start + range.len(); - - // When multiple names are captured, then the matcheable text - // includes the whitespace in between the names. - if !name_ranges.is_empty() { - start -= 1; - } - - name_ranges.push(start..end); - } - - let mut offset = range.start; - chunks.seek(offset); - for mut chunk in chunks.by_ref() { - if chunk.text.len() > range.end - offset { - chunk.text = &chunk.text[0..(range.end - offset)]; - offset = range.end; - } else { - offset += chunk.text.len(); - } - let style = chunk - .syntax_highlight_id - .zip(theme) - .and_then(|(highlight, theme)| highlight.style(theme)); - if let Some(style) = style { - let start = text.len(); - let end = start + chunk.text.len(); - highlight_ranges.push((start..end, style)); - } - text.push_str(chunk.text); - if offset >= range.end { - break; - } + text.push_str(chunk.text); + if offset >= range.end { + break; } } + } - while stack.last().map_or(false, |prev_range| { - prev_range.start > item_range.start || prev_range.end < item_range.end - }) { - stack.pop(); - } - stack.push(item_range.clone()); + matches.advance(); + while stack.last().map_or(false, |prev_range| { + prev_range.start > item_range.start || prev_range.end < item_range.end + }) { + stack.pop(); + } + stack.push(item_range.clone()); - Some(OutlineItem { - depth: stack.len() - 1, - range: self.anchor_after(item_range.start)..self.anchor_before(item_range.end), - text, - highlight_ranges, - name_ranges, - }) + items.push(OutlineItem { + depth: stack.len() - 1, + range: self.anchor_after(item_range.start)..self.anchor_before(item_range.end), + text, + highlight_ranges, + name_ranges, }) - .collect::>(); + } Some(items) } @@ -2095,28 +2077,48 @@ impl BufferSnapshot { &self, range: Range, ) -> Option<(Range, Range)> { - let (grammar, tree) = self.grammar().zip(self.tree.as_ref())?; - let brackets_query = grammar.brackets_query.as_ref()?; - let open_capture_ix = brackets_query.capture_index_for_name("open")?; - let close_capture_ix = brackets_query.capture_index_for_name("close")?; - // Find bracket pairs that *inclusively* contain the given range. let range = range.start.to_offset(self).saturating_sub(1)..range.end.to_offset(self) + 1; - let mut cursor = QueryCursorHandle::new(); - let matches = cursor.set_byte_range(range).matches( - brackets_query, - tree.root_node(), - TextProvider(self.as_rope()), - ); + let mut matches = self.syntax.matches(range, &self.text, |grammar| { + grammar.brackets_config.as_ref().map(|c| &c.query) + }); + let configs = matches + .grammars() + .iter() + .map(|grammar| grammar.brackets_config.as_ref().unwrap()) + .collect::>(); // Get the ranges of the innermost pair of brackets. - matches - .filter_map(|mat| { - let open = mat.nodes_for_capture_index(open_capture_ix).next()?; - let close = mat.nodes_for_capture_index(close_capture_ix).next()?; - Some((open.byte_range(), close.byte_range())) - }) - .min_by_key(|(open_range, close_range)| close_range.end - open_range.start) + let mut result: Option<(Range, Range)> = None; + while let Some(mat) = matches.peek() { + let mut open = None; + let mut close = None; + let config = &configs[mat.grammar_index]; + for capture in mat.captures { + if capture.index == config.open_capture_ix { + open = Some(capture.node.byte_range()); + } else if capture.index == config.close_capture_ix { + close = Some(capture.node.byte_range()); + } + } + + matches.advance(); + + if let Some((open, close)) = open.zip(close) { + let len = close.end - open.start; + + if let Some((existing_open, existing_close)) = &result { + let existing_len = existing_close.end - existing_open.start; + if len > existing_len { + continue; + } + } + + result = Some((open, close)); + } + } + + result } #[allow(clippy::type_complexity)] @@ -2228,7 +2230,7 @@ impl Clone for BufferSnapshot { fn clone(&self) -> Self { Self { text: self.text.clone(), - tree: self.tree.clone(), + syntax: self.syntax.clone(), file: self.file.clone(), remote_selections: self.remote_selections.clone(), diagnostics: self.diagnostics.clone(), @@ -2249,56 +2251,23 @@ impl Deref for BufferSnapshot { } } -impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> { - type I = ByteChunks<'a>; - - fn text(&mut self, node: tree_sitter::Node) -> Self::I { - ByteChunks(self.0.chunks_in_range(node.byte_range())) - } -} - -pub(crate) struct ByteChunks<'a>(rope::Chunks<'a>); - -impl<'a> Iterator for ByteChunks<'a> { - type Item = &'a [u8]; - - fn next(&mut self) -> Option { - self.0.next().map(str::as_bytes) - } -} - unsafe impl<'a> Send for BufferChunks<'a> {} impl<'a> BufferChunks<'a> { pub(crate) fn new( text: &'a Rope, range: Range, - tree: Option<&'a Tree>, - grammar: Option<&'a Arc>, + syntax: Option<(SyntaxMapCaptures<'a>, Vec)>, diagnostic_endpoints: Vec, ) -> Self { let mut highlights = None; - if let Some((grammar, tree)) = grammar.zip(tree) { - if let Some(highlights_query) = grammar.highlights_query.as_ref() { - let mut query_cursor = QueryCursorHandle::new(); - - // TODO - add a Tree-sitter API to remove the need for this. - let cursor = unsafe { - std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) - }; - let captures = cursor.set_byte_range(range.clone()).captures( - highlights_query, - tree.root_node(), - TextProvider(text), - ); - highlights = Some(BufferChunkHighlights { - captures, - next_capture: None, - stack: Default::default(), - highlight_map: grammar.highlight_map(), - _query_cursor: query_cursor, - }) - } + if let Some((captures, highlight_maps)) = syntax { + highlights = Some(BufferChunkHighlights { + captures, + next_capture: None, + stack: Default::default(), + highlight_maps, + }) } let diagnostic_endpoints = diagnostic_endpoints.into_iter().peekable(); @@ -2324,14 +2293,13 @@ impl<'a> BufferChunks<'a> { highlights .stack .retain(|(end_offset, _)| *end_offset > offset); - if let Some((mat, capture_ix)) = &highlights.next_capture { - let capture = mat.captures[*capture_ix as usize]; + if let Some(capture) = &highlights.next_capture { if offset >= capture.node.start_byte() { let next_capture_end = capture.node.end_byte(); if offset < next_capture_end { highlights.stack.push(( next_capture_end, - highlights.highlight_map.get(capture.index), + highlights.highlight_maps[capture.grammar_index].get(capture.index), )); } highlights.next_capture.take(); @@ -2407,13 +2375,13 @@ impl<'a> Iterator for BufferChunks<'a> { highlights.next_capture = highlights.captures.next(); } - while let Some((mat, capture_ix)) = highlights.next_capture.as_ref() { - let capture = mat.captures[*capture_ix as usize]; + while let Some(capture) = highlights.next_capture.as_ref() { if self.range.start < capture.node.start_byte() { next_capture_start = capture.node.start_byte(); break; } else { - let highlight_id = highlights.highlight_map.get(capture.index); + let highlight_id = + highlights.highlight_maps[capture.grammar_index].get(capture.index); highlights .stack .push((capture.node.end_byte(), highlight_id)); @@ -2465,52 +2433,6 @@ impl<'a> Iterator for BufferChunks<'a> { } } -impl QueryCursorHandle { - pub(crate) fn new() -> Self { - let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new); - cursor.set_match_limit(64); - QueryCursorHandle(Some(cursor)) - } -} - -impl Deref for QueryCursorHandle { - type Target = QueryCursor; - - fn deref(&self) -> &Self::Target { - self.0.as_ref().unwrap() - } -} - -impl DerefMut for QueryCursorHandle { - fn deref_mut(&mut self) -> &mut Self::Target { - self.0.as_mut().unwrap() - } -} - -impl Drop for QueryCursorHandle { - fn drop(&mut self) { - let mut cursor = self.0.take().unwrap(); - cursor.set_byte_range(0..usize::MAX); - cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point()); - QUERY_CURSORS.lock().push(cursor) - } -} - -pub(crate) trait ToTreeSitterPoint { - fn to_ts_point(self) -> tree_sitter::Point; - fn from_ts_point(point: tree_sitter::Point) -> Self; -} - -impl ToTreeSitterPoint for Point { - fn to_ts_point(self) -> tree_sitter::Point { - tree_sitter::Point::new(self.row as usize, self.column as usize) - } - - fn from_ts_point(point: tree_sitter::Point) -> Self { - Point::new(point.row as u32, point.column as u32) - } -} - impl operation_queue::Operation for Operation { fn lamport_timestamp(&self) -> clock::Lamport { match self { diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 8dcfc8fffd..780f6e75b5 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -30,8 +30,12 @@ use std::{ ops::Range, path::{Path, PathBuf}, str, - sync::Arc, + sync::{ + atomic::{AtomicUsize, Ordering::SeqCst}, + Arc, + }, }; +use syntax_map::SyntaxSnapshot; use theme::{SyntaxTheme, Theme}; use tree_sitter::{self, Query}; use util::ResultExt; @@ -50,6 +54,7 @@ thread_local! { } lazy_static! { + pub static ref NEXT_GRAMMAR_ID: AtomicUsize = Default::default(); pub static ref PLAIN_TEXT: Arc = Arc::new(Language::new( LanguageConfig { name: "Plain Text".into(), @@ -286,15 +291,29 @@ pub struct Language { } pub struct Grammar { + id: usize, pub(crate) ts_language: tree_sitter::Language, pub(crate) highlights_query: Option, - pub(crate) brackets_query: Option, - pub(crate) indents_query: Option, - pub(crate) outline_query: Option, + pub(crate) brackets_config: Option, + pub(crate) indents_config: Option, + pub(crate) outline_config: Option, pub(crate) injection_config: Option, pub(crate) highlight_map: Mutex, } +struct IndentConfig { + query: Query, + indent_capture_ix: u32, + end_capture_ix: Option, +} + +struct OutlineConfig { + query: Query, + item_capture_ix: u32, + name_capture_ix: u32, + context_capture_ix: Option, +} + struct InjectionConfig { query: Query, content_capture_ix: u32, @@ -302,6 +321,12 @@ struct InjectionConfig { languages_by_pattern_ix: Vec>>, } +struct BracketConfig { + query: Query, + open_capture_ix: u32, + close_capture_ix: u32, +} + #[derive(Clone)] pub enum LanguageServerBinaryStatus { CheckingForUpdate, @@ -499,6 +524,13 @@ impl LanguageRegistry { } } +#[cfg(any(test, feature = "test-support"))] +impl Default for LanguageRegistry { + fn default() -> Self { + Self::test() + } +} + async fn get_server_binary_path( adapter: Arc, language: Arc, @@ -576,10 +608,11 @@ impl Language { config, grammar: ts_language.map(|ts_language| { Arc::new(Grammar { + id: NEXT_GRAMMAR_ID.fetch_add(1, SeqCst), highlights_query: None, - brackets_query: None, - indents_query: None, - outline_query: None, + brackets_config: None, + outline_config: None, + indents_config: None, injection_config: None, ts_language, highlight_map: Default::default(), @@ -604,19 +637,70 @@ impl Language { pub fn with_brackets_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); - grammar.brackets_query = Some(Query::new(grammar.ts_language, source)?); + let query = Query::new(grammar.ts_language, source)?; + let mut open_capture_ix = None; + let mut close_capture_ix = None; + get_capture_indices( + &query, + &mut [ + ("open", &mut open_capture_ix), + ("close", &mut close_capture_ix), + ], + ); + if let Some((open_capture_ix, close_capture_ix)) = open_capture_ix.zip(close_capture_ix) { + grammar.brackets_config = Some(BracketConfig { + query, + open_capture_ix, + close_capture_ix, + }); + } Ok(self) } pub fn with_indents_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); - grammar.indents_query = Some(Query::new(grammar.ts_language, source)?); + let query = Query::new(grammar.ts_language, source)?; + let mut indent_capture_ix = None; + let mut end_capture_ix = None; + get_capture_indices( + &query, + &mut [ + ("indent", &mut indent_capture_ix), + ("end", &mut end_capture_ix), + ], + ); + if let Some(indent_capture_ix) = indent_capture_ix { + grammar.indents_config = Some(IndentConfig { + query, + indent_capture_ix, + end_capture_ix, + }); + } Ok(self) } pub fn with_outline_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); - grammar.outline_query = Some(Query::new(grammar.ts_language, source)?); + let query = Query::new(grammar.ts_language, source)?; + let mut item_capture_ix = None; + let mut name_capture_ix = None; + let mut context_capture_ix = None; + get_capture_indices( + &query, + &mut [ + ("item", &mut item_capture_ix), + ("name", &mut name_capture_ix), + ("context", &mut context_capture_ix), + ], + ); + if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) { + grammar.outline_config = Some(OutlineConfig { + query, + item_capture_ix, + name_capture_ix, + context_capture_ix, + }); + } Ok(self) } @@ -625,13 +709,13 @@ impl Language { let query = Query::new(grammar.ts_language, source)?; let mut language_capture_ix = None; let mut content_capture_ix = None; - for (ix, name) in query.capture_names().iter().enumerate() { - *match name.as_str() { - "language" => &mut language_capture_ix, - "content" => &mut content_capture_ix, - _ => continue, - } = Some(ix as u32); - } + get_capture_indices( + &query, + &mut [ + ("language", &mut language_capture_ix), + ("content", &mut content_capture_ix), + ], + ); let languages_by_pattern_ix = (0..query.pattern_count()) .map(|ix| { query.property_settings(ix).iter().find_map(|setting| { @@ -729,9 +813,16 @@ impl Language { let mut result = Vec::new(); if let Some(grammar) = &self.grammar { let tree = grammar.parse_text(text, None); + let captures = SyntaxSnapshot::single_tree_captures( + range.clone(), + text, + &tree, + grammar, + |grammar| grammar.highlights_query.as_ref(), + ); + let highlight_maps = vec![grammar.highlight_map()]; let mut offset = 0; - for chunk in BufferChunks::new(text, range, Some(&tree), self.grammar.as_ref(), vec![]) - { + for chunk in BufferChunks::new(text, range, Some((captures, highlight_maps)), vec![]) { let end_offset = offset + chunk.text.len(); if let Some(highlight_id) = chunk.syntax_highlight_id { if !highlight_id.is_default() { @@ -771,6 +862,10 @@ impl Language { } impl Grammar { + pub fn id(&self) -> usize { + self.id + } + fn parse_text(&self, text: &Rope, old_tree: Option) -> Tree { PARSER.with(|parser| { let mut parser = parser.borrow_mut(); @@ -870,6 +965,17 @@ impl LspAdapter for Arc { } } +fn get_capture_indices(query: &Query, captures: &mut [(&str, &mut Option)]) { + for (ix, name) in query.capture_names().iter().enumerate() { + for (capture_name, index) in captures.iter_mut() { + if capture_name == name { + **index = Some(ix as u32); + break; + } + } + } +} + pub fn point_to_lsp(point: PointUtf16) -> lsp::Position { lsp::Position::new(point.row, point.column) } diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index a578d36a38..ca0c28202c 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1,26 +1,28 @@ -use crate::{ - Grammar, InjectionConfig, Language, LanguageRegistry, QueryCursorHandle, TextProvider, - ToTreeSitterPoint, -}; +use crate::{Grammar, InjectionConfig, Language, LanguageRegistry}; +use lazy_static::lazy_static; +use parking_lot::Mutex; use std::{ borrow::Cow, cell::RefCell, cmp::{Ordering, Reverse}, collections::BinaryHeap, - iter::Peekable, - ops::{DerefMut, Range}, + ops::{Deref, DerefMut, Range}, sync::Arc, }; use sum_tree::{Bias, SeekTarget, SumTree}; -use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; +use text::{rope, Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; use tree_sitter::{ - Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatch, QueryMatches, Tree, + Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree, }; thread_local! { static PARSER: RefCell = RefCell::new(Parser::new()); } +lazy_static! { + static ref QUERY_CURSORS: Mutex> = Default::default(); +} + #[derive(Default)] pub struct SyntaxMap { parsed_version: clock::Global, @@ -34,39 +36,51 @@ pub struct SyntaxSnapshot { layers: SumTree, } +#[derive(Default)] pub struct SyntaxMapCaptures<'a> { layers: Vec>, + active_layer_count: usize, + grammars: Vec<&'a Grammar>, } +#[derive(Default)] pub struct SyntaxMapMatches<'a> { layers: Vec>, + active_layer_count: usize, + grammars: Vec<&'a Grammar>, } +#[derive(Debug)] pub struct SyntaxMapCapture<'a> { - pub grammar: &'a Grammar, pub depth: usize, pub node: Node<'a>, pub index: u32, + pub grammar_index: usize, } +#[derive(Debug)] pub struct SyntaxMapMatch<'a> { - pub grammar: &'a Grammar, pub depth: usize, pub pattern_index: usize, pub captures: &'a [QueryCapture<'a>], + pub grammar_index: usize, } struct SyntaxMapCapturesLayer<'a> { depth: usize, - captures: Peekable>>, - grammar: &'a Grammar, + captures: QueryCaptures<'a, 'a, TextProvider<'a>>, + next_capture: Option>, + grammar_index: usize, _query_cursor: QueryCursorHandle, } struct SyntaxMapMatchesLayer<'a> { depth: usize, - matches: Peekable>>, - grammar: &'a Grammar, + next_pattern_index: usize, + next_captures: Vec>, + has_next: bool, + matches: QueryMatches<'a, 'a, TextProvider<'a>>, + grammar_index: usize, _query_cursor: QueryCursorHandle, } @@ -80,6 +94,7 @@ struct SyntaxLayer { #[derive(Debug, Clone)] struct SyntaxLayerSummary { + min_depth: usize, max_depth: usize, range: Range, last_layer_range: Range, @@ -110,6 +125,12 @@ struct ChangedRegion { #[derive(Default)] struct ChangeRegionSet(Vec); +struct TextProvider<'a>(&'a Rope); + +struct ByteChunks<'a>(rope::Chunks<'a>); + +struct QueryCursorHandle(Option); + impl SyntaxMap { pub fn new() -> Self { Self::default() @@ -123,11 +144,20 @@ impl SyntaxMap { self.snapshot.clone() } + pub fn language_registry(&self) -> Option> { + self.language_registry.clone() + } + + pub fn parsed_version(&self) -> clock::Global { + self.parsed_version.clone() + } + pub fn interpolate(&mut self, text: &BufferSnapshot) { self.snapshot.interpolate(&self.interpolated_version, text); self.interpolated_version = text.version.clone(); } + #[cfg(test)] pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { if !self.interpolated_version.observed_all(&text.version) { self.interpolate(text); @@ -141,9 +171,22 @@ impl SyntaxMap { ); self.parsed_version = text.version.clone(); } + + pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) { + self.parsed_version = version; + self.snapshot = snapshot; + } + + pub fn clear(&mut self) { + self.snapshot = SyntaxSnapshot::default(); + } } impl SyntaxSnapshot { + pub fn is_empty(&self) -> bool { + self.layers.is_empty() + } + pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) { let edits = text .edits_since::<(usize, Point)>(&from_version) @@ -429,117 +472,52 @@ impl SyntaxSnapshot { self.layers = layers; } + pub fn single_tree_captures<'a>( + range: Range, + text: &'a Rope, + tree: &'a Tree, + grammar: &'a Grammar, + query: fn(&Grammar) -> Option<&Query>, + ) -> SyntaxMapCaptures<'a> { + SyntaxMapCaptures::new( + range.clone(), + text, + [(grammar, 0, tree.root_node())].into_iter(), + query, + ) + } + pub fn captures<'a>( &'a self, range: Range, buffer: &'a BufferSnapshot, - query: impl Fn(&Grammar) -> Option<&Query>, + query: fn(&Grammar) -> Option<&Query>, ) -> SyntaxMapCaptures { - let mut result = SyntaxMapCaptures { layers: Vec::new() }; - for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) { - let query = if let Some(query) = query(grammar) { - query - } else { - continue; - }; - - let mut query_cursor = QueryCursorHandle::new(); - - // TODO - add a Tree-sitter API to remove the need for this. - let cursor = unsafe { - std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) - }; - - cursor.set_byte_range(range.clone()); - let captures = cursor.captures(query, node, TextProvider(buffer.as_rope())); - let mut layer = SyntaxMapCapturesLayer { - depth, - grammar, - captures: captures.peekable(), - _query_cursor: query_cursor, - }; - - if let Some(key) = layer.sort_key() { - let mut ix = 0; - while let Some(next_layer) = result.layers.get_mut(ix) { - if let Some(next_key) = next_layer.sort_key() { - if key > next_key { - ix += 1; - continue; - } - } - break; - } - result.layers.insert(ix, layer); - } - } - result + SyntaxMapCaptures::new( + range.clone(), + buffer.as_rope(), + self.layers_for_range(range, buffer).into_iter(), + query, + ) } pub fn matches<'a>( &'a self, range: Range, buffer: &'a BufferSnapshot, - query: impl Fn(&Grammar) -> Option<&Query>, + query: fn(&Grammar) -> Option<&Query>, ) -> SyntaxMapMatches { - let mut result = SyntaxMapMatches { layers: Vec::new() }; - for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) { - let query = if let Some(query) = query(grammar) { - query - } else { - continue; - }; - - let mut query_cursor = QueryCursorHandle::new(); - - // TODO - add a Tree-sitter API to remove the need for this. - let cursor = unsafe { - std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) - }; - - cursor.set_byte_range(range.clone()); - let matches = cursor.matches(query, node, TextProvider(buffer.as_rope())); - let mut layer = SyntaxMapMatchesLayer { - depth, - grammar, - matches: matches.peekable(), - _query_cursor: query_cursor, - }; - - if let Some(key) = layer.sort_key() { - let mut ix = 0; - while let Some(next_layer) = result.layers.get_mut(ix) { - if let Some(next_key) = next_layer.sort_key() { - if key > next_key { - ix += 1; - continue; - } - } - break; - } - result.layers.insert(ix, layer); - } - } - result + SyntaxMapMatches::new( + range.clone(), + buffer.as_rope(), + self.layers_for_range(range, buffer).into_iter(), + query, + ) } - pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, Node)> { - self.layers - .iter() - .filter_map(|layer| { - if let Some(grammar) = &layer.language.grammar { - Some(( - grammar.as_ref(), - layer.tree.root_node_with_offset( - layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer).to_ts_point(), - ), - )) - } else { - None - } - }) - .collect() + #[cfg(test)] + pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, usize, Node)> { + self.layers_for_range(0..buffer.len(), buffer) } pub fn layers_for_range<'a, T: ToOffset>( @@ -551,9 +529,13 @@ impl SyntaxSnapshot { let end = buffer.anchor_after(range.end.to_offset(buffer)); let mut cursor = self.layers.filter::<_, ()>(|summary| { - let is_before_start = summary.range.end.cmp(&start, buffer).is_lt(); - let is_after_end = summary.range.start.cmp(&end, buffer).is_gt(); - !is_before_start && !is_after_end + if summary.max_depth > summary.min_depth { + true + } else { + let is_before_start = summary.range.end.cmp(&start, buffer).is_lt(); + let is_after_end = summary.range.start.cmp(&end, buffer).is_gt(); + !is_before_start && !is_after_end + } }); let mut result = Vec::new(); @@ -576,57 +558,274 @@ impl SyntaxSnapshot { } } -impl<'a> Iterator for SyntaxMapCaptures<'a> { - type Item = SyntaxMapCapture<'a>; +impl<'a> SyntaxMapCaptures<'a> { + fn new( + range: Range, + text: &'a Rope, + layers: impl Iterator)>, + query: fn(&Grammar) -> Option<&Query>, + ) -> Self { + let mut result = Self { + layers: Vec::new(), + grammars: Vec::new(), + active_layer_count: 0, + }; + for (grammar, depth, node) in layers { + let query = if let Some(query) = query(grammar) { + query + } else { + continue; + }; - fn next(&mut self) -> Option { - let layer = self.layers.first_mut()?; - let (mat, ix) = layer.captures.next()?; + let mut query_cursor = QueryCursorHandle::new(); - let capture = mat.captures[ix as usize]; - let grammar = layer.grammar; - let depth = layer.depth; + // TODO - add a Tree-sitter API to remove the need for this. + let cursor = unsafe { + std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) + }; - if let Some(key) = layer.sort_key() { - let mut i = 1; - while let Some(later_layer) = self.layers.get_mut(i) { - if let Some(later_key) = later_layer.sort_key() { - if key > later_key { - i += 1; - continue; - } - } - break; + cursor.set_byte_range(range.clone()); + let captures = cursor.captures(query, node, TextProvider(text)); + let grammar_index = result + .grammars + .iter() + .position(|g| g.id == grammar.id()) + .unwrap_or_else(|| { + result.grammars.push(grammar); + result.grammars.len() - 1 + }); + let mut layer = SyntaxMapCapturesLayer { + depth, + grammar_index, + next_capture: None, + captures, + _query_cursor: query_cursor, + }; + + layer.advance(); + if layer.next_capture.is_some() { + let key = layer.sort_key(); + let ix = match result.layers[..result.active_layer_count] + .binary_search_by_key(&key, |layer| layer.sort_key()) + { + Ok(ix) | Err(ix) => ix, + }; + result.layers.insert(ix, layer); + result.active_layer_count += 1; + } else { + result.layers.push(layer); } - if i > 1 { - self.layers[0..i].rotate_left(1); - } - } else { - self.layers.remove(0); } + result + } + + pub fn grammars(&self) -> &[&'a Grammar] { + &self.grammars + } + + pub fn peek(&self) -> Option> { + let layer = self.layers[..self.active_layer_count].first()?; + let capture = layer.next_capture?; Some(SyntaxMapCapture { - grammar, - depth, - node: capture.node, + depth: layer.depth, + grammar_index: layer.grammar_index, index: capture.index, + node: capture.node, }) } + + pub fn advance(&mut self) -> bool { + let layer = if let Some(layer) = self.layers[..self.active_layer_count].first_mut() { + layer + } else { + return false; + }; + + layer.advance(); + if layer.next_capture.is_some() { + let key = layer.sort_key(); + let i = 1 + self.layers[1..self.active_layer_count] + .iter() + .position(|later_layer| key < later_layer.sort_key()) + .unwrap_or(self.active_layer_count - 1); + self.layers[0..i].rotate_left(1); + } else { + self.layers[0..self.active_layer_count].rotate_left(1); + self.active_layer_count -= 1; + } + + true + } + + pub fn set_byte_range(&mut self, range: Range) { + for layer in &mut self.layers { + layer.captures.set_byte_range(range.clone()); + if let Some(capture) = &layer.next_capture { + if capture.node.end_byte() > range.start { + continue; + } + } + layer.advance(); + } + self.layers.sort_unstable_by_key(|layer| layer.sort_key()); + self.active_layer_count = self + .layers + .iter() + .position(|layer| layer.next_capture.is_none()) + .unwrap_or(self.layers.len()); + } +} + +impl<'a> SyntaxMapMatches<'a> { + fn new( + range: Range, + text: &'a Rope, + layers: impl Iterator)>, + query: fn(&Grammar) -> Option<&Query>, + ) -> Self { + let mut result = Self::default(); + for (grammar, depth, node) in layers { + let query = if let Some(query) = query(grammar) { + query + } else { + continue; + }; + + let mut query_cursor = QueryCursorHandle::new(); + + // TODO - add a Tree-sitter API to remove the need for this. + let cursor = unsafe { + std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) + }; + + cursor.set_byte_range(range.clone()); + let matches = cursor.matches(query, node, TextProvider(text)); + let grammar_index = result + .grammars + .iter() + .position(|g| g.id == grammar.id()) + .unwrap_or_else(|| { + result.grammars.push(grammar); + result.grammars.len() - 1 + }); + let mut layer = SyntaxMapMatchesLayer { + depth, + grammar_index, + matches, + next_pattern_index: 0, + next_captures: Vec::new(), + has_next: false, + _query_cursor: query_cursor, + }; + + layer.advance(); + if layer.has_next { + let key = layer.sort_key(); + let ix = match result.layers[..result.active_layer_count] + .binary_search_by_key(&key, |layer| layer.sort_key()) + { + Ok(ix) | Err(ix) => ix, + }; + result.layers.insert(ix, layer); + result.active_layer_count += 1; + } else { + result.layers.push(layer); + } + } + result + } + + pub fn grammars(&self) -> &[&'a Grammar] { + &self.grammars + } + + pub fn peek(&self) -> Option { + let layer = self.layers.first()?; + if !layer.has_next { + return None; + } + Some(SyntaxMapMatch { + depth: layer.depth, + grammar_index: layer.grammar_index, + pattern_index: layer.next_pattern_index, + captures: &layer.next_captures, + }) + } + + pub fn advance(&mut self) -> bool { + let layer = if let Some(layer) = self.layers.first_mut() { + layer + } else { + return false; + }; + + layer.advance(); + if layer.has_next { + let key = layer.sort_key(); + let i = 1 + self.layers[1..self.active_layer_count] + .iter() + .position(|later_layer| key < later_layer.sort_key()) + .unwrap_or(self.active_layer_count - 1); + self.layers[0..i].rotate_left(1); + } else { + self.layers[0..self.active_layer_count].rotate_left(1); + self.active_layer_count -= 1; + } + + true + } } impl<'a> SyntaxMapCapturesLayer<'a> { - fn sort_key(&mut self) -> Option<(usize, Reverse, usize)> { - let (mat, ix) = self.captures.peek()?; - let range = &mat.captures[*ix].node.byte_range(); - Some((range.start, Reverse(range.end), self.depth)) + fn advance(&mut self) { + self.next_capture = self.captures.next().map(|(mat, ix)| mat.captures[ix]); + } + + fn sort_key(&self) -> (usize, Reverse, usize) { + if let Some(capture) = &self.next_capture { + let range = capture.node.byte_range(); + (range.start, Reverse(range.end), self.depth) + } else { + (usize::MAX, Reverse(0), usize::MAX) + } } } impl<'a> SyntaxMapMatchesLayer<'a> { - fn sort_key(&mut self) -> Option<(usize, Reverse, usize)> { - let mat = self.matches.peek()?; - let range = mat.captures.first()?.node.start_byte()..mat.captures.last()?.node.end_byte(); - Some((range.start, Reverse(range.end), self.depth)) + fn advance(&mut self) { + if let Some(mat) = self.matches.next() { + self.next_captures.clear(); + self.next_captures.extend_from_slice(&mat.captures); + self.next_pattern_index = mat.pattern_index; + self.has_next = true; + } else { + self.has_next = false; + } + } + + fn sort_key(&self) -> (usize, Reverse, usize) { + if self.has_next { + let captures = &self.next_captures; + if let Some((first, last)) = captures.first().zip(captures.last()) { + return ( + first.node.start_byte(), + Reverse(last.node.end_byte()), + self.depth, + ); + } + } + (usize::MAX, Reverse(0), usize::MAX) + } +} + +impl<'a> Iterator for SyntaxMapCaptures<'a> { + type Item = SyntaxMapCapture<'a>; + + fn next(&mut self) -> Option { + let result = self.peek(); + self.advance(); + result } } @@ -864,6 +1063,7 @@ impl Default for SyntaxLayerSummary { fn default() -> Self { Self { max_depth: 0, + min_depth: 0, range: Anchor::MAX..Anchor::MIN, last_layer_range: Anchor::MIN..Anchor::MAX, } @@ -875,7 +1075,8 @@ impl sum_tree::Summary for SyntaxLayerSummary { fn add_summary(&mut self, other: &Self, buffer: &Self::Context) { if other.max_depth > self.max_depth { - *self = other.clone(); + self.max_depth = other.max_depth; + self.range = other.range.clone(); } else { if other.range.start.cmp(&self.range.start, buffer).is_lt() { self.range.start = other.range.start; @@ -883,8 +1084,8 @@ impl sum_tree::Summary for SyntaxLayerSummary { if other.range.end.cmp(&self.range.end, buffer).is_gt() { self.range.end = other.range.end; } - self.last_layer_range = other.last_layer_range.clone(); } + self.last_layer_range = other.last_layer_range.clone(); } } @@ -927,6 +1128,7 @@ impl sum_tree::Item for SyntaxLayer { fn summary(&self) -> Self::Summary { SyntaxLayerSummary { + min_depth: self.depth, max_depth: self.depth, range: self.range.clone(), last_layer_range: self.range.clone(), @@ -944,12 +1146,73 @@ impl std::fmt::Debug for SyntaxLayer { } } +impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> { + type I = ByteChunks<'a>; + + fn text(&mut self, node: tree_sitter::Node) -> Self::I { + ByteChunks(self.0.chunks_in_range(node.byte_range())) + } +} + +impl<'a> Iterator for ByteChunks<'a> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option { + self.0.next().map(str::as_bytes) + } +} + +impl QueryCursorHandle { + pub(crate) fn new() -> Self { + let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new); + cursor.set_match_limit(64); + QueryCursorHandle(Some(cursor)) + } +} + +impl Deref for QueryCursorHandle { + type Target = QueryCursor; + + fn deref(&self) -> &Self::Target { + self.0.as_ref().unwrap() + } +} + +impl DerefMut for QueryCursorHandle { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0.as_mut().unwrap() + } +} + +impl Drop for QueryCursorHandle { + fn drop(&mut self) { + let mut cursor = self.0.take().unwrap(); + cursor.set_byte_range(0..usize::MAX); + cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point()); + QUERY_CURSORS.lock().push(cursor) + } +} + +pub(crate) trait ToTreeSitterPoint { + fn to_ts_point(self) -> tree_sitter::Point; + fn from_ts_point(point: tree_sitter::Point) -> Self; +} + +impl ToTreeSitterPoint for Point { + fn to_ts_point(self) -> tree_sitter::Point { + tree_sitter::Point::new(self.row as usize, self.column as usize) + } + + fn from_ts_point(point: tree_sitter::Point) -> Self { + Point::new(point.row as u32, point.column as u32) + } +} + #[cfg(test)] mod tests { use super::*; use crate::LanguageConfig; use text::{Buffer, Point}; - use tree_sitter::Query; use unindent::Unindent as _; use util::test::marked_text_ranges; @@ -1298,13 +1561,13 @@ mod tests { mutated_layers.into_iter().zip(reference_layers.into_iter()) { assert_eq!( - edited_layer.1.to_sexp(), - reference_layer.1.to_sexp(), + edited_layer.2.to_sexp(), + reference_layer.2.to_sexp(), "different layer at step {i}" ); assert_eq!( - edited_layer.1.range(), - reference_layer.1.range(), + edited_layer.2.range(), + reference_layer.2.range(), "different layer at step {i}" ); } @@ -1377,16 +1640,16 @@ mod tests { marked_string: &str, ) { let mut actual_ranges = Vec::>::new(); - for capture in syntax_map.captures(0..buffer.len(), buffer, |grammar| { + let captures = syntax_map.captures(0..buffer.len(), buffer, |grammar| { grammar.highlights_query.as_ref() - }) { - let name = &capture - .grammar - .highlights_query - .as_ref() - .unwrap() - .capture_names()[capture.index as usize]; - dbg!(capture.node, capture.index, name); + }); + let queries = captures + .grammars() + .iter() + .map(|grammar| grammar.highlights_query.as_ref().unwrap()) + .collect::>(); + for capture in captures { + let name = &queries[capture.grammar_index].capture_names()[capture.index as usize]; if highlight_query_capture_names.contains(&name.as_str()) { actual_ranges.push(capture.node.byte_range()); } diff --git a/crates/language/src/tests.rs b/crates/language/src/tests.rs index 44c15d1a3b..ad997753cd 100644 --- a/crates/language/src/tests.rs +++ b/crates/language/src/tests.rs @@ -1407,7 +1407,9 @@ fn json_lang() -> Language { fn get_tree_sexp(buffer: &ModelHandle, cx: &gpui::TestAppContext) -> String { buffer.read_with(cx, |buffer, _| { - buffer.syntax_tree().unwrap().root_node().to_sexp() + let syntax_map = buffer.syntax_map(); + let layers = syntax_map.layers(buffer.as_text_snapshot()); + layers[0].2.to_sexp() }) } diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 0f762f822f..531fdcbe15 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -2056,6 +2056,7 @@ impl Project { let full_path = buffer.read(cx).file()?.full_path(cx); let language = self.languages.select_language(&full_path)?; buffer.update(cx, |buffer, cx| { + buffer.set_language_registry(self.languages.clone()); buffer.set_language(Some(language.clone()), cx); }); diff --git a/crates/zed/src/languages/rust/injections.scm b/crates/zed/src/languages/rust/injections.scm index 9d8c03c889..57ebea8539 100644 --- a/crates/zed/src/languages/rust/injections.scm +++ b/crates/zed/src/languages/rust/injections.scm @@ -1,3 +1,7 @@ (macro_invocation - (token_tree) @content) + (token_tree) @content + (#set! "language" "rust")) + +(macro_rule + (token_tree) @content (#set! "language" "rust")) \ No newline at end of file From 587175d0ea007875704813fe5beb274d9072c315 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 15:09:10 -0700 Subject: [PATCH 10/22] Update syntax map's interpolated version when a parse completes --- crates/language/src/syntax_map.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index ca0c28202c..14055991e6 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -173,6 +173,7 @@ impl SyntaxMap { } pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) { + self.interpolated_version = version.clone(); self.parsed_version = version; self.snapshot = snapshot; } From 3245e4f8d70b88edb37d9758539990207be4bc06 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 15:10:53 -0700 Subject: [PATCH 11/22] Fix out-of-range panic when requesting outline items at EOF --- crates/language/src/buffer.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 7b298b7420..80b9cdbaa3 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -1953,8 +1953,10 @@ impl BufferSnapshot { theme: Option<&SyntaxTheme>, ) -> Option>> { let position = position.to_offset(self); - let mut items = - self.outline_items_containing(position.saturating_sub(1)..position + 1, theme)?; + let mut items = self.outline_items_containing( + position.saturating_sub(1)..self.len().min(position + 1), + theme, + )?; let mut prev_depth = None; items.retain(|item| { let result = prev_depth.map_or(true, |prev_depth| item.depth > prev_depth); From b3f4c732648d8677162468ec922e051ef9ad57fd Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 15:11:26 -0700 Subject: [PATCH 12/22] Clean up some of buffer's syntax-related methods --- crates/language/src/buffer.rs | 119 +++++++++++++++++----------------- crates/language/src/tests.rs | 4 +- 2 files changed, 61 insertions(+), 62 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 80b9cdbaa3..ca32c8b1fb 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -72,7 +72,7 @@ pub struct Buffer { pub struct BufferSnapshot { text: text::BufferSnapshot, - syntax: SyntaxSnapshot, + pub(crate) syntax: SyntaxSnapshot, file: Option>, diagnostics: DiagnosticSet, diagnostics_update_count: usize, @@ -461,9 +461,14 @@ impl Buffer { } pub fn snapshot(&self) -> BufferSnapshot { + let text = self.text.snapshot(); + let mut syntax_map = self.syntax_map.lock(); + syntax_map.interpolate(&text); + let syntax = syntax_map.snapshot(); + BufferSnapshot { - text: self.text.snapshot(), - syntax: self.syntax_map(), + text, + syntax, file: self.file.clone(), remote_selections: self.remote_selections.clone(), diagnostics: self.diagnostics.clone(), @@ -674,12 +679,6 @@ impl Buffer { self.file_update_count } - pub(crate) fn syntax_map(&self) -> SyntaxSnapshot { - let mut syntax_map = self.syntax_map.lock(); - syntax_map.interpolate(&self.text_snapshot()); - syntax_map.snapshot() - } - #[cfg(any(test, feature = "test-support"))] pub fn is_parsing(&self) -> bool { self.parsing_in_background @@ -690,73 +689,73 @@ impl Buffer { self.sync_parse_timeout = timeout; } - fn reparse(&mut self, cx: &mut ModelContext) -> bool { + fn reparse(&mut self, cx: &mut ModelContext) { if self.parsing_in_background { - return false; + return; } + let language = if let Some(language) = self.language.clone() { + language + } else { + return; + }; - if let Some(language) = self.language.clone() { - let text = self.text_snapshot(); - let parsed_version = self.version(); + let text = self.text_snapshot(); + let parsed_version = self.version(); - let mut syntax_map; - let language_registry; - let syntax_map_version; - { - let mut map = self.syntax_map.lock(); - map.interpolate(&text); - language_registry = map.language_registry(); - syntax_map = map.snapshot(); - syntax_map_version = map.parsed_version(); + let mut syntax_map = self.syntax_map.lock(); + syntax_map.interpolate(&text); + let language_registry = syntax_map.language_registry(); + let mut syntax_snapshot = syntax_map.snapshot(); + let syntax_map_version = syntax_map.parsed_version(); + drop(syntax_map); + + let parse_task = cx.background().spawn({ + let language = language.clone(); + async move { + syntax_snapshot.reparse(&syntax_map_version, &text, language_registry, language); + syntax_snapshot } - let parse_task = cx.background().spawn({ - let language = language.clone(); - async move { - syntax_map.reparse(&syntax_map_version, &text, language_registry, language); - syntax_map - } - }); + }); - match cx - .background() - .block_with_timeout(self.sync_parse_timeout, parse_task) - { - Ok(new_syntax_map) => { - self.did_finish_parsing(new_syntax_map, parsed_version, cx); - return true; - } - Err(parse_task) => { - self.parsing_in_background = true; - cx.spawn(move |this, mut cx| async move { - let new_syntax_map = parse_task.await; - this.update(&mut cx, move |this, cx| { - let grammar_changed = - this.language.as_ref().map_or(true, |current_language| { - !Arc::ptr_eq(&language, current_language) - }); - let parse_again = - this.version.changed_since(&parsed_version) || grammar_changed; - this.parsing_in_background = false; - this.did_finish_parsing(new_syntax_map, parsed_version, cx); - - if parse_again && this.reparse(cx) {} - }); - }) - .detach(); - } + match cx + .background() + .block_with_timeout(self.sync_parse_timeout, parse_task) + { + Ok(new_syntax_snapshot) => { + self.did_finish_parsing(new_syntax_snapshot, parsed_version, cx); + return; + } + Err(parse_task) => { + self.parsing_in_background = true; + cx.spawn(move |this, mut cx| async move { + let new_syntax_map = parse_task.await; + this.update(&mut cx, move |this, cx| { + let grammar_changed = + this.language.as_ref().map_or(true, |current_language| { + !Arc::ptr_eq(&language, current_language) + }); + let parse_again = + this.version.changed_since(&parsed_version) || grammar_changed; + this.did_finish_parsing(new_syntax_map, parsed_version, cx); + this.parsing_in_background = false; + if parse_again { + this.reparse(cx); + } + }); + }) + .detach(); } } - false } fn did_finish_parsing( &mut self, - syntax_map: SyntaxSnapshot, + syntax_snapshot: SyntaxSnapshot, version: clock::Global, cx: &mut ModelContext, ) { self.parse_count += 1; - self.syntax_map.lock().did_parse(syntax_map, version); + self.syntax_map.lock().did_parse(syntax_snapshot, version); self.request_autoindent(cx); cx.emit(Event::Reparsed); cx.notify(); diff --git a/crates/language/src/tests.rs b/crates/language/src/tests.rs index ad997753cd..8a912b9a9b 100644 --- a/crates/language/src/tests.rs +++ b/crates/language/src/tests.rs @@ -1407,8 +1407,8 @@ fn json_lang() -> Language { fn get_tree_sexp(buffer: &ModelHandle, cx: &gpui::TestAppContext) -> String { buffer.read_with(cx, |buffer, _| { - let syntax_map = buffer.syntax_map(); - let layers = syntax_map.layers(buffer.as_text_snapshot()); + let snapshot = buffer.snapshot(); + let layers = snapshot.syntax.layers(buffer.as_text_snapshot()); layers[0].2.to_sexp() }) } From 015b6c4a31cfc6e0d34d4989daab8de1c4b04ccf Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 15:29:07 -0700 Subject: [PATCH 13/22] Fix test about auto-indent with no indent query --- crates/language/src/buffer.rs | 24 +++++++++++++----------- crates/language/src/tests.rs | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index ca32c8b1fb..7fe62d7cd8 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -1627,9 +1627,7 @@ impl BufferSnapshot { &self, row_range: Range, ) -> Option> + '_> { - let language = self.language.as_ref()?; - let grammar = language.grammar.as_ref()?; - let config = &language.config; + let config = &self.language.as_ref()?.config; let prev_non_blank_row = self.prev_non_blank_row(row_range.start); // Find the suggested indentation ranges based on the syntax tree. @@ -1639,20 +1637,24 @@ impl BufferSnapshot { let mut matches = self.syntax.matches(range, &self.text, |grammar| { Some(&grammar.indents_config.as_ref()?.query) }); + let indent_configs = matches + .grammars() + .iter() + .map(|grammar| grammar.indents_config.as_ref().unwrap()) + .collect::>(); let mut indent_ranges = Vec::>::new(); while let Some(mat) = matches.peek() { let mut start: Option = None; let mut end: Option = None; - if let Some(config) = &grammar.indents_config { - for capture in mat.captures { - if capture.index == config.indent_capture_ix { - start.get_or_insert(Point::from_ts_point(capture.node.start_position())); - end.get_or_insert(Point::from_ts_point(capture.node.end_position())); - } else if Some(capture.index) == config.end_capture_ix { - end = Some(Point::from_ts_point(capture.node.start_position())); - } + let config = &indent_configs[mat.grammar_index]; + for capture in mat.captures { + if capture.index == config.indent_capture_ix { + start.get_or_insert(Point::from_ts_point(capture.node.start_position())); + end.get_or_insert(Point::from_ts_point(capture.node.end_position())); + } else if Some(capture.index) == config.end_capture_ix { + end = Some(Point::from_ts_point(capture.node.start_position())); } } diff --git a/crates/language/src/tests.rs b/crates/language/src/tests.rs index 8a912b9a9b..f2a33ccbd3 100644 --- a/crates/language/src/tests.rs +++ b/crates/language/src/tests.rs @@ -998,6 +998,7 @@ fn test_autoindent_language_without_indents_query(cx: &mut MutableAppContext) { Arc::new(Language::new( LanguageConfig { name: "Markdown".into(), + auto_indent_using_last_non_empty_line: false, ..Default::default() }, Some(tree_sitter_json::language()), From 61b70b7e2d6c528c89b7df377c088d9e4fc6f8c7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 15:29:25 -0700 Subject: [PATCH 14/22] Fix re-parsing when buffer's language has changed --- crates/language/src/syntax_map.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 14055991e6..117699a351 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -306,10 +306,6 @@ impl SyntaxSnapshot { language: Arc, ) { let edits = text.edits_since::(from_version).collect::>(); - if edits.is_empty() { - return; - } - let max_depth = self.layers.summary().max_depth; let mut cursor = self.layers.cursor::(); cursor.next(&text); From f96c19b81a730f5bbf2cb586da616fa9d6c2e59a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 16:37:14 -0700 Subject: [PATCH 15/22] Add injection queries for macro bodies in C and C++ --- crates/zed/src/languages/c/injections.scm | 7 +++++++ crates/zed/src/languages/cpp/injections.scm | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 crates/zed/src/languages/c/injections.scm create mode 100644 crates/zed/src/languages/cpp/injections.scm diff --git a/crates/zed/src/languages/c/injections.scm b/crates/zed/src/languages/c/injections.scm new file mode 100644 index 0000000000..845a63bd1b --- /dev/null +++ b/crates/zed/src/languages/c/injections.scm @@ -0,0 +1,7 @@ +(preproc_def + value: (preproc_arg) @content + (#set! "language" "c")) + +(preproc_function_def + value: (preproc_arg) @content + (#set! "language" "c")) \ No newline at end of file diff --git a/crates/zed/src/languages/cpp/injections.scm b/crates/zed/src/languages/cpp/injections.scm new file mode 100644 index 0000000000..eca372d577 --- /dev/null +++ b/crates/zed/src/languages/cpp/injections.scm @@ -0,0 +1,7 @@ +(preproc_def + value: (preproc_arg) @content + (#set! "language" "c++")) + +(preproc_function_def + value: (preproc_arg) @content + (#set! "language" "c++")) \ No newline at end of file From d48380bc48890021b51c601277538261ae514920 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 16:37:25 -0700 Subject: [PATCH 16/22] Fix pattern order in C++ highlight query Later patterns take precedence in Zed, so function names were previously being highlighted as variables due to the plain `identifier` pattern. --- crates/zed/src/languages/cpp/highlights.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/zed/src/languages/cpp/highlights.scm b/crates/zed/src/languages/cpp/highlights.scm index d579d70187..2dd9188308 100644 --- a/crates/zed/src/languages/cpp/highlights.scm +++ b/crates/zed/src/languages/cpp/highlights.scm @@ -1,3 +1,5 @@ +(identifier) @variable + (call_expression function: (qualified_identifier name: (identifier) @function)) @@ -34,8 +36,6 @@ (auto) @type (type_identifier) @type -(identifier) @variable - ((identifier) @constant (#match? @constant "^[A-Z][A-Z\\d_]*$")) From 345b266deefc6c26bc527537d73912a0bac92eeb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Aug 2022 16:49:03 -0700 Subject: [PATCH 17/22] Add missing rust highlights --- crates/zed/src/languages/rust/highlights.scm | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/crates/zed/src/languages/rust/highlights.scm b/crates/zed/src/languages/rust/highlights.scm index d4f571dd52..72482b4073 100644 --- a/crates/zed/src/languages/rust/highlights.scm +++ b/crates/zed/src/languages/rust/highlights.scm @@ -1,6 +1,6 @@ (type_identifier) @type (primitive_type) @type.builtin - +(self) @variable.builtin (field_identifier) @property (call_expression @@ -15,6 +15,16 @@ (function_item name: (identifier) @function.definition) (function_signature_item name: (identifier) @function.definition) +(macro_invocation + macro: [ + (identifier) @function.special + (scoped_identifier + name: (identifier) @function.special) + ]) + +(macro_definition + name: (identifier) @function.special.definition) + ; Identifier conventions ; Assume uppercase names are enum constructors @@ -71,6 +81,7 @@ "mod" "move" "pub" + "ref" "return" "static" "struct" @@ -91,6 +102,13 @@ (char_literal) ] @string +[ + (integer_literal) + (float_literal) +] @number + +(boolean_literal) @constant + [ (line_comment) (block_comment) From 1f12186e3ce53c0981a2a84e1cf562413d849782 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Aug 2022 10:58:46 -0700 Subject: [PATCH 18/22] Update to latest tree-sitter commit This is needed for https://github.com/tree-sitter/tree-sitter/pull/1845 --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/language/src/syntax_map.rs | 8 ++++++++ crates/text/src/text.rs | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2a6d594f66..0f187a54e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5842,7 +5842,7 @@ dependencies = [ [[package]] name = "tree-sitter" version = "0.20.8" -source = "git+https://github.com/tree-sitter/tree-sitter?rev=477b6677537e89c7bdff14ce84dad6d23a6415bb#477b6677537e89c7bdff14ce84dad6d23a6415bb" +source = "git+https://github.com/tree-sitter/tree-sitter?rev=366210ae925d7ea0891bc7a0c738f60c77c04d7b#366210ae925d7ea0891bc7a0c738f60c77c04d7b" dependencies = [ "cc", "regex", diff --git a/Cargo.toml b/Cargo.toml index b4df3fd101..31a9118a1a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ default-members = ["crates/zed"] resolver = "2" [patch.crates-io] -tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "477b6677537e89c7bdff14ce84dad6d23a6415bb" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "366210ae925d7ea0891bc7a0c738f60c77c04d7b" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457 diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 117699a351..e537e5e793 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -284,11 +284,19 @@ impl SyntaxSnapshot { }; layer.tree.edit(&tree_edit); + if edit.new.start.0 < start_byte { break; } } + debug_assert!( + layer.tree.root_node().end_byte() <= text.len(), + "tree's size {}, is larger than text size {}", + layer.tree.root_node().end_byte(), + text.len(), + ); + layers.push(layer, text); cursor.next(text); } diff --git a/crates/text/src/text.rs b/crates/text/src/text.rs index 1f2e4e7c7a..39812740fb 100644 --- a/crates/text/src/text.rs +++ b/crates/text/src/text.rs @@ -2435,7 +2435,7 @@ impl ToOffset for PointUtf16 { impl ToOffset for usize { fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize { - assert!(*self <= snapshot.len(), "offset is out of range"); + assert!(*self <= snapshot.len(), "offset {self} is out of range"); *self } } From 74a2b093ab2ba88e7465fe3c69818020c6da496d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Aug 2022 16:40:18 -0700 Subject: [PATCH 19/22] Start work on randomized test for SyntaxMap, fix discovered bugs --- crates/language/src/syntax_map.rs | 133 ++++++++++++++++++++++++++---- 1 file changed, 118 insertions(+), 15 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index e537e5e793..f7d135da1c 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -197,7 +197,7 @@ impl SyntaxSnapshot { } let mut layers = SumTree::new(); - let mut edits_for_depth = &edits[..]; + let mut first_edit_ix_for_depth = 0; let mut cursor = self.layers.cursor::(); cursor.next(text); @@ -205,7 +205,7 @@ impl SyntaxSnapshot { let depth = cursor.end(text).max_depth; // Preserve any layers at this depth that precede the first edit. - if let Some(first_edit) = edits_for_depth.first() { + if let Some(first_edit) = edits.get(first_edit_ix_for_depth) { let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); if target.cmp(&cursor.start(), text).is_gt() { let slice = cursor.slice(&target, Bias::Left, text); @@ -221,7 +221,7 @@ impl SyntaxSnapshot { text, ); layers.push_tree(slice, text); - edits_for_depth = &edits[..]; + first_edit_ix_for_depth = 0; continue; }; @@ -241,9 +241,9 @@ impl SyntaxSnapshot { // Ignore edits that end before the start of this layer, and don't consider them // for any subsequent layers at this same depth. loop { - if let Some(edit) = edits_for_depth.first() { + if let Some(edit) = edits.get(first_edit_ix_for_depth) { if edit.new.end.0 < start_byte { - edits_for_depth = &edits_for_depth[1..]; + first_edit_ix_for_depth += 1; } else { break; } @@ -252,15 +252,21 @@ impl SyntaxSnapshot { } } + let mut old_start_byte = start_byte; + if first_edit_ix_for_depth > 0 { + let edit = &edits[first_edit_ix_for_depth - 1]; + old_start_byte = edit.old.end.0 + (start_byte - edit.new.end.0); + } + let mut layer = layer.clone(); - for edit in edits_for_depth { + for edit in &edits[first_edit_ix_for_depth..] { // Ignore any edits that follow this layer. if edit.new.start.0 > end_byte { break; } // Apply any edits that intersect this layer to the layer's syntax tree. - let tree_edit = if edit.new.start.0 >= start_byte { + let tree_edit = if edit.old.start.0 >= old_start_byte { tree_sitter::InputEdit { start_byte: edit.new.start.0 - start_byte, old_end_byte: edit.new.start.0 - start_byte @@ -273,21 +279,18 @@ impl SyntaxSnapshot { new_end_position: (edit.new.end.1 - start_point).to_ts_point(), } } else { + let node = layer.tree.root_node(); tree_sitter::InputEdit { start_byte: 0, - old_end_byte: edit.new.end.0 - start_byte, + old_end_byte: node.end_byte(), new_end_byte: 0, start_position: Default::default(), - old_end_position: (edit.new.end.1 - start_point).to_ts_point(), + old_end_position: node.end_position(), new_end_position: Default::default(), } }; layer.tree.edit(&tree_edit); - - if edit.new.start.0 < start_byte { - break; - } } debug_assert!( @@ -363,7 +366,7 @@ impl SyntaxSnapshot { if changed_regions.intersects(&layer, text) { changed_regions.insert( ChangedRegion { - depth: depth + 1, + depth: layer.depth + 1, range: layer.range.clone(), }, text, @@ -918,7 +921,7 @@ fn get_injections( let mut query_cursor = QueryCursorHandle::new(); let mut prev_match = None; for query_range in query_ranges { - query_cursor.set_byte_range(query_range.start..query_range.end); + query_cursor.set_byte_range(query_range.start.saturating_sub(1)..query_range.end); for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) { let content_ranges = mat .nodes_for_capture_index(config.content_capture_ix) @@ -1217,6 +1220,8 @@ impl ToTreeSitterPoint for Point { mod tests { use super::*; use crate::LanguageConfig; + use rand::rngs::StdRng; + use std::env; use text::{Buffer, Point}; use unindent::Unindent as _; use util::test::marked_text_ranges; @@ -1532,6 +1537,104 @@ mod tests { ]); } + #[gpui::test] + fn test_removing_injection_by_replacing_across_boundary() { + test_edit_sequence(&[ + " + fn one() { + two!( + three.four, + ); + } + ", + " + fn one() { + t«en + .eleven( + twelve, + » + three.four, + ); + } + ", + ]); + } + + #[gpui::test(iterations = 100)] + fn test_random_syntax_map_edits(mut rng: StdRng) { + let operations = env::var("OPERATIONS") + .map(|i| i.parse().expect("invalid `OPERATIONS` variable")) + .unwrap_or(10); + + let text = r#" + fn test_something() { + let vec = vec![5, 1, 3, 8]; + assert_eq!( + vec + .into_iter() + .map(|i| i * 2) + .collect::>(), + vec![ + 5 * 2, 1 * 2, 3 * 2, 8 * 2 + ], + ); + } + "# + .unindent(); + + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + registry.add(language.clone()); + let mut buffer = Buffer::new(0, 0, text); + + let mut syntax_map = SyntaxMap::new(); + syntax_map.set_language_registry(registry.clone()); + syntax_map.reparse(language.clone(), &buffer); + + let mut reference_syntax_map = SyntaxMap::new(); + reference_syntax_map.set_language_registry(registry.clone()); + + for i in 0..operations { + buffer.randomly_edit(&mut rng, 2); + log::info!("text:\n{}", buffer.text()); + + syntax_map.reparse(language.clone(), &buffer); + + reference_syntax_map.clear(); + reference_syntax_map.reparse(language.clone(), &buffer); + assert_eq!( + syntax_map.layers(&buffer).len(), + reference_syntax_map.layers(&buffer).len(), + "wrong number of layers after performing edit {i}" + ); + } + + for i in 0..operations { + let i = operations - i - 1; + buffer.undo(); + log::info!("undoing operation {}", i); + log::info!("text:\n{}", buffer.text()); + + syntax_map.reparse(language.clone(), &buffer); + + reference_syntax_map.clear(); + reference_syntax_map.reparse(language.clone(), &buffer); + assert_eq!( + syntax_map.layers(&buffer).len(), + reference_syntax_map.layers(&buffer).len(), + "wrong number of layers after undoing edit {i}" + ); + } + + let layers = syntax_map.layers(&buffer); + let reference_layers = reference_syntax_map.layers(&buffer); + for (edited_layer, reference_layer) in layers.into_iter().zip(reference_layers.into_iter()) + { + assert_eq!(edited_layer.2.to_sexp(), reference_layer.2.to_sexp()); + assert_eq!(edited_layer.2.range(), reference_layer.2.range()); + } + } + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(rust_lang()); From 1746ec573a9c51b63ead5674437adcba70039937 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 29 Aug 2022 16:15:53 -0700 Subject: [PATCH 20/22] Check invariants after interpolating tree in random syntax map test --- crates/language/src/syntax_map.rs | 124 ++++++++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 5 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index f7d135da1c..414516f824 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -159,10 +159,6 @@ impl SyntaxMap { #[cfg(test)] pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { - if !self.interpolated_version.observed_all(&text.version) { - self.interpolate(text); - } - self.snapshot.reparse( &self.parsed_version, text, @@ -170,6 +166,7 @@ impl SyntaxMap { language, ); self.parsed_version = text.version.clone(); + self.interpolated_version = text.version.clone(); } pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) { @@ -1580,7 +1577,8 @@ mod tests { ); } "# - .unindent(); + .unindent() + .repeat(2); let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(rust_lang()); @@ -1594,10 +1592,18 @@ mod tests { let mut reference_syntax_map = SyntaxMap::new(); reference_syntax_map.set_language_registry(registry.clone()); + log::info!("initial text:\n{}", buffer.text()); + for i in 0..operations { + let prev_buffer = buffer.snapshot(); + let prev_syntax_map = syntax_map.snapshot(); + buffer.randomly_edit(&mut rng, 2); log::info!("text:\n{}", buffer.text()); + syntax_map.interpolate(&buffer); + check_interpolation(&prev_syntax_map, &syntax_map, &prev_buffer, &buffer); + syntax_map.reparse(language.clone(), &buffer); reference_syntax_map.clear(); @@ -1615,6 +1621,7 @@ mod tests { log::info!("undoing operation {}", i); log::info!("text:\n{}", buffer.text()); + syntax_map.interpolate(&buffer); syntax_map.reparse(language.clone(), &buffer); reference_syntax_map.clear(); @@ -1635,6 +1642,113 @@ mod tests { } } + fn check_interpolation( + old_syntax_map: &SyntaxSnapshot, + new_syntax_map: &SyntaxSnapshot, + old_buffer: &BufferSnapshot, + new_buffer: &BufferSnapshot, + ) { + let edits = new_buffer + .edits_since::(&old_buffer.version()) + .collect::>(); + + for (old_layer, new_layer) in old_syntax_map + .layers + .iter() + .zip(new_syntax_map.layers.iter()) + { + assert_eq!(old_layer.range, new_layer.range); + let old_start_byte = old_layer.range.start.to_offset(old_buffer); + let new_start_byte = new_layer.range.start.to_offset(new_buffer); + let old_start_point = old_layer.range.start.to_point(old_buffer).to_ts_point(); + let new_start_point = new_layer.range.start.to_point(new_buffer).to_ts_point(); + let old_node = old_layer + .tree + .root_node_with_offset(old_start_byte, old_start_point); + let new_node = new_layer + .tree + .root_node_with_offset(new_start_byte, new_start_point); + check_node_edits( + old_layer.depth, + &old_layer.range, + old_node, + new_node, + old_buffer, + new_buffer, + &edits, + ); + } + + fn check_node_edits( + depth: usize, + range: &Range, + old_node: Node, + new_node: Node, + old_buffer: &BufferSnapshot, + new_buffer: &BufferSnapshot, + edits: &[text::Edit], + ) { + assert_eq!(old_node.kind(), new_node.kind()); + + let old_range = old_node.byte_range(); + let new_range = new_node.byte_range(); + + let is_edited = edits + .iter() + .any(|edit| edit.new.start < new_range.end && edit.new.end > new_range.start); + if is_edited { + assert!( + new_node.has_changes(), + concat!( + "failed to mark node as edited.\n", + "layer depth: {}, old layer range: {:?}, new layer range: {:?},\n", + "node kind: {}, old node range: {:?}, new node range: {:?}", + ), + depth, + range.to_offset(old_buffer), + range.to_offset(new_buffer), + new_node.kind(), + old_range, + new_range, + ); + } + + if !new_node.has_changes() { + assert_eq!( + old_buffer + .text_for_range(old_range.clone()) + .collect::(), + new_buffer + .text_for_range(new_range.clone()) + .collect::(), + concat!( + "mismatched text for node\n", + "layer depth: {}, old layer range: {:?}, new layer range: {:?},\n", + "node kind: {}, old node range:{:?}, new node range:{:?}", + ), + depth, + range.to_offset(old_buffer), + range.to_offset(new_buffer), + new_node.kind(), + old_range, + new_range, + ); + } + + for i in 0..new_node.child_count() { + check_node_edits( + depth, + range, + old_node.child(i).unwrap(), + new_node.child(i).unwrap(), + old_buffer, + new_buffer, + edits, + ) + } + } + } + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { let registry = Arc::new(LanguageRegistry::test()); let language = Arc::new(rust_lang()); From 74fd348d22c83ae649e8ae5b6d8e1a931fa3739d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 29 Aug 2022 16:51:31 -0700 Subject: [PATCH 21/22] Add Buffer::anchored_edits_since method This method returns the anchor range associated with each edit. The anchor ranges allow you to determine how each edit interacts with an existing anchor range that the edit has touched. --- crates/text/src/text.rs | 76 +++++++++++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/crates/text/src/text.rs b/crates/text/src/text.rs index 39812740fb..a7736cc8cc 100644 --- a/crates/text/src/text.rs +++ b/crates/text/src/text.rs @@ -382,6 +382,7 @@ struct Edits<'a, D: TextDimension, F: FnMut(&FragmentSummary) -> bool> { old_end: D, new_end: D, range: Range<(&'a Locator, usize)>, + buffer_id: u64, } #[derive(Clone, Debug, Default, Eq, PartialEq)] @@ -1917,11 +1918,33 @@ impl BufferSnapshot { self.edits_since_in_range(since, Anchor::MIN..Anchor::MAX) } + pub fn anchored_edits_since<'a, D>( + &'a self, + since: &'a clock::Global, + ) -> impl 'a + Iterator, Range)> + where + D: TextDimension + Ord, + { + self.anchored_edits_since_in_range(since, Anchor::MIN..Anchor::MAX) + } + pub fn edits_since_in_range<'a, D>( &'a self, since: &'a clock::Global, range: Range, ) -> impl 'a + Iterator> + where + D: TextDimension + Ord, + { + self.anchored_edits_since_in_range(since, range) + .map(|item| item.0) + } + + pub fn anchored_edits_since_in_range<'a, D>( + &'a self, + since: &'a clock::Global, + range: Range, + ) -> impl 'a + Iterator, Range)> where D: TextDimension + Ord, { @@ -1961,6 +1984,7 @@ impl BufferSnapshot { old_end: Default::default(), new_end: Default::default(), range: (start_fragment_id, range.start.offset)..(end_fragment_id, range.end.offset), + buffer_id: self.remote_id, } } } @@ -2019,10 +2043,10 @@ impl<'a> RopeBuilder<'a> { } impl<'a, D: TextDimension + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator for Edits<'a, D, F> { - type Item = Edit; + type Item = (Edit, Range); fn next(&mut self) -> Option { - let mut pending_edit: Option> = None; + let mut pending_edit: Option = None; let cursor = self.fragments_cursor.as_mut()?; while let Some(fragment) = cursor.item() { @@ -2041,11 +2065,25 @@ impl<'a, D: TextDimension + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator fo if pending_edit .as_ref() - .map_or(false, |change| change.new.end < self.new_end) + .map_or(false, |(change, _)| change.new.end < self.new_end) { break; } + let timestamp = fragment.insertion_timestamp.local(); + let start_anchor = Anchor { + timestamp, + offset: fragment.insertion_offset, + bias: Bias::Right, + buffer_id: Some(self.buffer_id), + }; + let end_anchor = Anchor { + timestamp, + offset: fragment.insertion_offset + fragment.len, + bias: Bias::Left, + buffer_id: Some(self.buffer_id), + }; + if !fragment.was_visible(self.since, self.undos) && fragment.visible { let mut visible_end = cursor.end(&None).visible; if fragment.id == *self.range.end.0 { @@ -2058,13 +2096,17 @@ impl<'a, D: TextDimension + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator fo let fragment_summary = self.visible_cursor.summary(visible_end); let mut new_end = self.new_end.clone(); new_end.add_assign(&fragment_summary); - if let Some(pending_edit) = pending_edit.as_mut() { - pending_edit.new.end = new_end.clone(); + if let Some((edit, range)) = pending_edit.as_mut() { + edit.new.end = new_end.clone(); + range.end = end_anchor; } else { - pending_edit = Some(Edit { - old: self.old_end.clone()..self.old_end.clone(), - new: self.new_end.clone()..new_end.clone(), - }); + pending_edit = Some(( + Edit { + old: self.old_end.clone()..self.old_end.clone(), + new: self.new_end.clone()..new_end.clone(), + }, + start_anchor..end_anchor, + )); } self.new_end = new_end; @@ -2083,13 +2125,17 @@ impl<'a, D: TextDimension + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator fo let fragment_summary = self.deleted_cursor.summary(deleted_end); let mut old_end = self.old_end.clone(); old_end.add_assign(&fragment_summary); - if let Some(pending_edit) = pending_edit.as_mut() { - pending_edit.old.end = old_end.clone(); + if let Some((edit, range)) = pending_edit.as_mut() { + edit.old.end = old_end.clone(); + range.end = end_anchor; } else { - pending_edit = Some(Edit { - old: self.old_end.clone()..old_end.clone(), - new: self.new_end.clone()..self.new_end.clone(), - }); + pending_edit = Some(( + Edit { + old: self.old_end.clone()..old_end.clone(), + new: self.new_end.clone()..self.new_end.clone(), + }, + start_anchor..end_anchor, + )); } self.old_end = old_end; From a38c6015db40a32361c15ae2d6faa9d916fe187f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 29 Aug 2022 16:52:58 -0700 Subject: [PATCH 22/22] Fix bugs in SyntaxMap::interpolate found by the randomized test --- crates/language/src/syntax_map.rs | 46 ++++++++++++------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 414516f824..d1bf698e52 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -187,7 +187,7 @@ impl SyntaxSnapshot { pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) { let edits = text - .edits_since::<(usize, Point)>(&from_version) + .anchored_edits_since::<(usize, Point)>(&from_version) .collect::>(); if edits.is_empty() { return; @@ -195,15 +195,20 @@ impl SyntaxSnapshot { let mut layers = SumTree::new(); let mut first_edit_ix_for_depth = 0; + let mut prev_depth = 0; let mut cursor = self.layers.cursor::(); cursor.next(text); 'outer: loop { let depth = cursor.end(text).max_depth; + if depth > prev_depth { + first_edit_ix_for_depth = 0; + prev_depth = depth; + } // Preserve any layers at this depth that precede the first edit. - if let Some(first_edit) = edits.get(first_edit_ix_for_depth) { - let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); + if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) { + let target = DepthAndMaxPosition(depth, edit_range.start); if target.cmp(&cursor.start(), text).is_gt() { let slice = cursor.slice(&target, Bias::Left, text); layers.push_tree(slice, text); @@ -211,14 +216,13 @@ impl SyntaxSnapshot { } // If this layer follows all of the edits, then preserve it and any // subsequent layers at this same depth. - else { + else if cursor.item().is_some() { let slice = cursor.slice( &DepthAndRange(depth + 1, Anchor::MIN..Anchor::MAX), Bias::Left, text, ); layers.push_tree(slice, text); - first_edit_ix_for_depth = 0; continue; }; @@ -227,19 +231,14 @@ impl SyntaxSnapshot { } else { break; }; + let (start_byte, start_point) = layer.range.start.summary::<(usize, Point)>(text); - let mut endpoints = text - .summaries_for_anchors::<(usize, Point), _>([&layer.range.start, &layer.range.end]); - let layer_range = endpoints.next().unwrap()..endpoints.next().unwrap(); - let start_byte = layer_range.start.0; - let start_point = layer_range.start.1; - let end_byte = layer_range.end.0; // Ignore edits that end before the start of this layer, and don't consider them // for any subsequent layers at this same depth. loop { - if let Some(edit) = edits.get(first_edit_ix_for_depth) { - if edit.new.end.0 < start_byte { + if let Some((_, edit_range)) = edits.get(first_edit_ix_for_depth) { + if edit_range.end.cmp(&layer.range.start, text).is_le() { first_edit_ix_for_depth += 1; } else { break; @@ -249,21 +248,15 @@ impl SyntaxSnapshot { } } - let mut old_start_byte = start_byte; - if first_edit_ix_for_depth > 0 { - let edit = &edits[first_edit_ix_for_depth - 1]; - old_start_byte = edit.old.end.0 + (start_byte - edit.new.end.0); - } - let mut layer = layer.clone(); - for edit in &edits[first_edit_ix_for_depth..] { + for (edit, edit_range) in &edits[first_edit_ix_for_depth..] { // Ignore any edits that follow this layer. - if edit.new.start.0 > end_byte { + if edit_range.start.cmp(&layer.range.end, text).is_ge() { break; } // Apply any edits that intersect this layer to the layer's syntax tree. - let tree_edit = if edit.old.start.0 >= old_start_byte { + let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() { tree_sitter::InputEdit { start_byte: edit.new.start.0 - start_byte, old_end_byte: edit.new.start.0 - start_byte @@ -1594,11 +1587,11 @@ mod tests { log::info!("initial text:\n{}", buffer.text()); - for i in 0..operations { + for _ in 0..operations { let prev_buffer = buffer.snapshot(); let prev_syntax_map = syntax_map.snapshot(); - buffer.randomly_edit(&mut rng, 2); + buffer.randomly_edit(&mut rng, 3); log::info!("text:\n{}", buffer.text()); syntax_map.interpolate(&buffer); @@ -1608,11 +1601,6 @@ mod tests { reference_syntax_map.clear(); reference_syntax_map.reparse(language.clone(), &buffer); - assert_eq!( - syntax_map.layers(&buffer).len(), - reference_syntax_map.layers(&buffer).len(), - "wrong number of layers after performing edit {i}" - ); } for i in 0..operations {