From 58fda5ac1c0c05377f2a846adb88b8b335ef0732 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 22 Aug 2022 14:49:16 -0700 Subject: [PATCH] Test more editing patterns of SyntaxMap, fix bugs --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/language/src/syntax_map.rs | 534 +++++++++++++++++++----------- 3 files changed, 341 insertions(+), 197 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 680e40a7f9..2a6d594f66 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5842,7 +5842,7 @@ dependencies = [ [[package]] name = "tree-sitter" version = "0.20.8" -source = "git+https://github.com/tree-sitter/tree-sitter?rev=1f1b1eb4501ed0a2d195d37f7de15f72aa10acd0#1f1b1eb4501ed0a2d195d37f7de15f72aa10acd0" +source = "git+https://github.com/tree-sitter/tree-sitter?rev=477b6677537e89c7bdff14ce84dad6d23a6415bb#477b6677537e89c7bdff14ce84dad6d23a6415bb" dependencies = [ "cc", "regex", diff --git a/Cargo.toml b/Cargo.toml index 74c36d7006..b4df3fd101 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ default-members = ["crates/zed"] resolver = "2" [patch.crates-io] -tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "1f1b1eb4501ed0a2d195d37f7de15f72aa10acd0" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "477b6677537e89c7bdff14ce84dad6d23a6415bb" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457 diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 71ac4d2959..e6198ccb5b 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -7,7 +7,7 @@ use std::{ }; use sum_tree::{Bias, SeekTarget, SumTree}; use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; -use tree_sitter::{Parser, Tree}; +use tree_sitter::{Node, Parser, Tree}; thread_local! { static PARSER: RefCell = RefCell::new(Parser::new()); @@ -15,7 +15,8 @@ thread_local! { #[derive(Default)] pub struct SyntaxMap { - version: clock::Global, + parsed_version: clock::Global, + interpolated_version: clock::Global, snapshot: SyntaxSnapshot, language_registry: Option>, } @@ -40,14 +41,14 @@ struct SyntaxLayerSummary { last_layer_range: Range, } -#[derive(Debug)] +#[derive(Clone, Debug)] struct DepthAndRange(usize, Range); -#[derive(Debug)] +#[derive(Clone, Debug)] struct DepthAndMaxPosition(usize, Anchor); -#[derive(Debug)] -struct DepthAndRangeOrMaxPosition(usize, Range, Anchor); +#[derive(Clone, Debug)] +struct DepthAndRangeOrMaxPosition(DepthAndRange, DepthAndMaxPosition); struct ReparseStep { depth: usize, @@ -76,44 +77,29 @@ impl SyntaxMap { } pub fn interpolate(&mut self, text: &BufferSnapshot) { - self.snapshot.interpolate(&self.version, text); - self.version = text.version.clone(); + self.snapshot.interpolate(&self.interpolated_version, text); + self.interpolated_version = text.version.clone(); } pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { - self.version = text.version.clone(); - self.snapshot - .reparse(self.language_registry.clone(), language, text); + if !self.interpolated_version.observed_all(&text.version) { + self.interpolate(text); + } + + self.snapshot.reparse( + &self.parsed_version, + text, + self.language_registry.clone(), + language, + ); + self.parsed_version = text.version.clone(); } } -// Assumptions: -// * The maximum depth is small (< 5) -// * For a given depth, the number of layers that touch a given range -// is small (usually only 1) - -// |change| -// 0 (............................................................) -// 1 (...............................................) -// 1 (................) -// 1 (.......) -// 2 (....) -// 2 (....) -// 2 (.......) -// 2 (...) -// 2 (.........) -// 2 (...) -// 3 (.) -// 3 (.) -// 3 (..) -// 3 (..) -// 3 (..) -// 3 (.) - impl SyntaxSnapshot { - pub fn interpolate(&mut self, current_version: &clock::Global, text: &BufferSnapshot) { + pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) { let edits = text - .edits_since::<(usize, Point)>(¤t_version) + .edits_since::<(usize, Point)>(&from_version) .collect::>(); if edits.is_empty() { return; @@ -152,16 +138,9 @@ impl SyntaxSnapshot { } else { break; }; - if first_edit.new.start.0 > layer_range.end.0 { - layers.push_tree( - cursor.slice( - &DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)), - Bias::Left, - text, - ), - text, - ); - continue; + let target = DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)); + if target.cmp(&cursor.start(), text).is_gt() { + layers.push_tree(cursor.slice(&target, Bias::Left, text), text); } // Preserve any layers at this depth that follow the last edit. @@ -226,10 +205,17 @@ impl SyntaxSnapshot { pub fn reparse( &mut self, + from_version: &clock::Global, + text: &BufferSnapshot, registry: Option>, language: Arc, - text: &BufferSnapshot, ) { + let edits = text.edits_since::(from_version).collect::>(); + if edits.is_empty() { + return; + } + + let max_depth = self.layers.summary().max_depth; let mut cursor = self.layers.cursor::(); cursor.next(&text); let mut layers = SumTree::new(); @@ -248,44 +234,55 @@ impl SyntaxSnapshot { let (depth, range) = if let Some(step) = &step { (step.depth, step.range.clone()) } else { - (cursor.start().max_depth, Anchor::MAX..Anchor::MAX) + (max_depth + 1, Anchor::MAX..Anchor::MAX) }; let target = DepthAndRange(depth, range.clone()); - if target.cmp(cursor.start(), &text).is_gt() { - let change_start_anchor = changed_regions - .first() - .map_or(Anchor::MAX, |region| region.range.start); - let seek_target = - DepthAndRangeOrMaxPosition(depth, range.clone(), change_start_anchor); - let slice = cursor.slice(&seek_target, Bias::Left, text); - layers.push_tree(slice, &text); + let mut done = cursor.item().is_none(); + while !done && target.cmp(cursor.start(), &text).is_gt() { + let bounded_target = DepthAndRangeOrMaxPosition( + target.clone(), + changed_regions + .first() + .map_or(DepthAndMaxPosition(usize::MAX, Anchor::MAX), |region| { + DepthAndMaxPosition(region.depth, region.range.start) + }), + ); + if bounded_target.cmp(&cursor.start(), &text).is_gt() { + let slice = cursor.slice(&bounded_target, Bias::Left, text); + layers.push_tree(slice, &text); + } - while let Some(layer) = cursor.item() { - if target.cmp(&cursor.end(text), text).is_le() { + while target.cmp(&cursor.end(text), text).is_gt() { + let layer = if let Some(layer) = cursor.item() { + layer + } else { break; - } + }; + if layer_is_changed(layer, text, &changed_regions) { - let region = ChangedRegion { + ChangedRegion { depth: depth + 1, range: layer.range.clone(), - }; - if let Err(i) = - changed_regions.binary_search_by(|probe| probe.cmp(®ion, text)) - { - changed_regions.insert(i, region); } + .insert(text, &mut changed_regions); } else { layers.push(layer.clone(), text); } - cursor.next(text); } + done = true; changed_regions.retain(|region| { - region.depth > depth + if region.depth > depth || (region.depth == depth && region.range.end.cmp(&range.start, text).is_gt()) + { + true + } else { + done = false; + false + } }); } @@ -332,15 +329,19 @@ impl SyntaxSnapshot { Some(old_layer.tree.clone()), ranges, ); - - changed_ranges = old_layer - .tree - .changed_ranges(&tree) - .map(|r| r.start_byte..r.end_byte) - .collect(); + changed_ranges = join_ranges( + edits + .iter() + .map(|e| e.new.clone()) + .filter(|range| range.start < end_byte && range.end > start_byte), + old_layer + .tree + .changed_ranges(&tree) + .map(|r| start_byte + r.start_byte..start_byte + r.end_byte), + ); } else { tree = parse_text(grammar, text.as_rope(), None, ranges); - changed_ranges = vec![0..end_byte - start_byte]; + changed_ranges = vec![start_byte..end_byte]; } layers.push( @@ -358,27 +359,19 @@ impl SyntaxSnapshot { changed_ranges.is_empty(), ) { let depth = depth + 1; - for range in &changed_ranges { - let region = ChangedRegion { + ChangedRegion { depth, range: text.anchor_before(range.start)..text.anchor_after(range.end), - }; - if let Err(i) = - changed_regions.binary_search_by(|probe| probe.cmp(®ion, text)) - { - changed_regions.insert(i, region); } + .insert(text, &mut changed_regions); } - get_injections( config, text, - &tree, + tree.root_node_with_offset(start_byte, start_point), registry, depth, - start_byte, - Point::from_ts_point(start_point), &changed_ranges, &mut queue, ); @@ -389,17 +382,16 @@ impl SyntaxSnapshot { self.layers = layers; } - pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, &Tree, (usize, Point))> { + pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, Node)> { self.layers .iter() .filter_map(|layer| { if let Some(grammar) = &layer.language.grammar { Some(( grammar.as_ref(), - &layer.tree, - ( + layer.tree.root_node_with_offset( layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer), + layer.range.start.to_point(buffer).to_ts_point(), ), )) } else { @@ -413,7 +405,7 @@ impl SyntaxSnapshot { &self, range: Range, buffer: &BufferSnapshot, - ) -> Vec<(&Grammar, &Tree, (usize, Point))> { + ) -> Vec<(&Grammar, Node)> { let start = buffer.anchor_before(range.start.to_offset(buffer)); let end = buffer.anchor_after(range.end.to_offset(buffer)); @@ -429,10 +421,9 @@ impl SyntaxSnapshot { if let Some(grammar) = &layer.language.grammar { result.push(( grammar.as_ref(), - &layer.tree, - ( + layer.tree.root_node_with_offset( layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer), + layer.range.start.to_point(buffer).to_ts_point(), ), )); } @@ -443,6 +434,38 @@ impl SyntaxSnapshot { } } +fn join_ranges( + a: impl Iterator>, + b: impl Iterator>, +) -> Vec> { + let mut result = Vec::>::new(); + let mut a = a.peekable(); + let mut b = b.peekable(); + loop { + let range = match (a.peek(), b.peek()) { + (Some(range_a), Some(range_b)) => { + if range_a.start < range_b.start { + a.next().unwrap() + } else { + b.next().unwrap() + } + } + (None, Some(_)) => b.next().unwrap(), + (Some(_), None) => a.next().unwrap(), + (None, None) => break, + }; + + if let Some(last) = result.last_mut() { + if range.start <= last.end { + last.end = last.end.max(range.end); + continue; + } + } + result.push(range); + } + result +} + fn parse_text( grammar: &Grammar, text: &Rope, @@ -485,11 +508,9 @@ fn parse_text( fn get_injections( config: &InjectionConfig, text: &BufferSnapshot, - tree: &Tree, + node: Node, language_registry: &LanguageRegistry, depth: usize, - start_byte: usize, - start_point: Point, query_ranges: &[Range], queue: &mut BinaryHeap, ) -> bool { @@ -498,21 +519,10 @@ fn get_injections( let mut prev_match = None; for query_range in query_ranges { query_cursor.set_byte_range(query_range.start..query_range.end); - for mat in query_cursor.matches( - &config.query, - tree.root_node(), - TextProvider(text.as_rope()), - ) { + for mat in query_cursor.matches(&config.query, node, TextProvider(text.as_rope())) { let content_ranges = mat .nodes_for_capture_index(config.content_capture_ix) - .map(|node| tree_sitter::Range { - start_byte: start_byte + node.start_byte(), - end_byte: start_byte + node.end_byte(), - start_point: (start_point + Point::from_ts_point(node.start_position())) - .to_ts_point(), - end_point: (start_point + Point::from_ts_point(node.end_position())) - .to_ts_point(), - }) + .map(|node| node.range()) .collect::>(); if content_ranges.is_empty() { continue; @@ -534,12 +544,7 @@ fn get_injections( .or_else(|| { let ix = config.language_capture_ix?; let node = mat.nodes_for_capture_index(ix).next()?; - Some(Cow::Owned( - text.text_for_range( - start_byte + node.start_byte()..start_byte + node.end_byte(), - ) - .collect(), - )) + Some(Cow::Owned(text.text_for_range(node.byte_range()).collect())) }); if let Some(language_name) = language_name { @@ -566,9 +571,10 @@ fn layer_is_changed( changed_regions: &[ChangedRegion], ) -> bool { changed_regions.iter().any(|region| { + let same_depth = region.depth == layer.depth; let is_before_layer = region.range.end.cmp(&layer.range.start, text).is_le(); let is_after_layer = region.range.start.cmp(&layer.range.end, text).is_ge(); - !is_before_layer && !is_after_layer + same_depth && !is_before_layer && !is_after_layer }) } @@ -613,6 +619,12 @@ impl ReparseStep { } impl ChangedRegion { + fn insert(self, text: &BufferSnapshot, set: &mut Vec) { + if let Err(ix) = set.binary_search_by(|probe| probe.cmp(&self, text)) { + set.insert(ix, self); + } + } + fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering { let range_a = &self.range; let range_b = &other.range; @@ -676,25 +688,11 @@ impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndMaxP impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRangeOrMaxPosition { fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { - let cmp = Ord::cmp(&self.0, &cursor_location.max_depth); - if cmp.is_ne() { - return cmp; + if self.1.cmp(cursor_location, buffer).is_le() { + return Ordering::Less; + } else { + self.0.cmp(cursor_location, buffer) } - - let cmp = self.2.cmp(&cursor_location.range.end, buffer); - if cmp.is_gt() { - return Ordering::Greater; - } - - self.1 - .start - .cmp(&cursor_location.last_layer_range.start, buffer) - .then_with(|| { - cursor_location - .last_layer_range - .end - .cmp(&self.1.end, buffer) - }) } } @@ -827,37 +825,22 @@ mod tests { } #[gpui::test] - fn test_syntax_map_edits() { - let registry = Arc::new(LanguageRegistry::test()); - let language = Arc::new(rust_lang()); - let mut syntax_map = SyntaxMap::new(); - syntax_map.set_language_registry(registry.clone()); - registry.add(language.clone()); - - let mut buffer = Buffer::new(0, 0, "".into()); - syntax_map.reparse(language.clone(), &buffer); - - edit_buffer_n( - &mut buffer, - &[ - "«fn a() { dbg }»", - "fn a() { dbg«!» }", - "fn a() { dbg!«()» }", - "fn a() { dbg!(«b») }", - "fn a() { dbg!(b«.») }", - "fn a() { dbg!(b.«c») }", - "fn a() { dbg!(b.c«()») }", - "fn a() { dbg!(b.c(«vec»)) }", - "fn a() { dbg!(b.c(vec«!»)) }", - "fn a() { dbg!(b.c(vec!«[]»)) }", - "fn a() { dbg!(b.c(vec![«d»])) }", - "fn a() { dbg!(b.c(vec![d«.»])) }", - "fn a() { dbg!(b.c(vec![d.«e»])) }", - ], - ); - - syntax_map.interpolate(&buffer); - syntax_map.reparse(language.clone(), &buffer); + fn test_typing_multiple_new_injections() { + let (buffer, syntax_map) = test_edit_sequence(&[ + "fn a() { dbg }", + "fn a() { dbg«!» }", + "fn a() { dbg!«()» }", + "fn a() { dbg!(«b») }", + "fn a() { dbg!(b«.») }", + "fn a() { dbg!(b.«c») }", + "fn a() { dbg!(b.c«()») }", + "fn a() { dbg!(b.c(«vec»)) }", + "fn a() { dbg!(b.c(vec«!»)) }", + "fn a() { dbg!(b.c(vec!«[]»)) }", + "fn a() { dbg!(b.c(vec![«d»])) }", + "fn a() { dbg!(b.c(vec![d«.»])) }", + "fn a() { dbg!(b.c(vec![d.«e»])) }", + ]); assert_node_ranges( &syntax_map, @@ -867,6 +850,163 @@ mod tests { ); } + #[gpui::test] + fn test_pasting_new_injection_line_between_others() { + let (buffer, syntax_map) = test_edit_sequence(&[ + " + fn a() { + b!(B {}); + c!(C {}); + d!(D {}); + e!(E {}); + f!(F {}); + } + ", + " + fn a() { + b!(B {}); + c!(C {}); + «g!(G {}); + »d!(D {}); + e!(E {}); + f!(F {}); + } + ", + ]); + + assert_node_ranges( + &syntax_map, + &buffer, + "(struct_expression) @_", + " + fn a() { + b!(«B {}»); + c!(«C {}»); + g!(«G {}»); + d!(«D {}»); + e!(«E {}»); + f!(«F {}»); + } + ", + ); + } + + #[gpui::test] + fn test_joining_injections_with_child_injections() { + let (buffer, syntax_map) = test_edit_sequence(&[ + " + fn a() { + b!( + c![one.two.three], + d![four.five.six], + ); + e!( + f![seven.eight], + ); + } + ", + " + fn a() { + b!( + c![one.two.three], + d![four.five.six], + ˇ f![seven.eight], + ); + } + ", + ]); + + assert_node_ranges( + &syntax_map, + &buffer, + "(field_identifier) @_", + " + fn a() { + b!( + c![one.«two».«three»], + d![four.«five».«six»], + f![seven.«eight»], + ); + } + ", + ); + } + + #[gpui::test] + fn test_editing_edges_of_injection() { + test_edit_sequence(&[ + " + fn a() { + b!(c!()) + } + ", + " + fn a() { + «d»!(c!()) + } + ", + " + fn a() { + «e»d!(c!()) + } + ", + " + fn a() { + ed!«[»c!()«]» + } + ", + ]); + } + + fn test_edit_sequence(steps: &[&str]) -> (Buffer, SyntaxMap) { + let registry = Arc::new(LanguageRegistry::test()); + let language = Arc::new(rust_lang()); + registry.add(language.clone()); + let mut buffer = Buffer::new(0, 0, Default::default()); + + let mut mutated_syntax_map = SyntaxMap::new(); + mutated_syntax_map.set_language_registry(registry.clone()); + mutated_syntax_map.reparse(language.clone(), &buffer); + + for (i, marked_string) in steps.into_iter().enumerate() { + edit_buffer(&mut buffer, &marked_string.unindent()); + + // Reparse the syntax map + mutated_syntax_map.interpolate(&buffer); + mutated_syntax_map.reparse(language.clone(), &buffer); + + // Create a second syntax map from scratch + let mut reference_syntax_map = SyntaxMap::new(); + reference_syntax_map.set_language_registry(registry.clone()); + reference_syntax_map.reparse(language.clone(), &buffer); + + // Compare the mutated syntax map to the new syntax map + let mutated_layers = mutated_syntax_map.layers(&buffer); + let reference_layers = reference_syntax_map.layers(&buffer); + assert_eq!( + mutated_layers.len(), + reference_layers.len(), + "wrong number of layers at step {i}" + ); + for (edited_layer, reference_layer) in + mutated_layers.into_iter().zip(reference_layers.into_iter()) + { + assert_eq!( + edited_layer.1.to_sexp(), + reference_layer.1.to_sexp(), + "different layer at step {i}" + ); + assert_eq!( + edited_layer.1.range(), + reference_layer.1.range(), + "different layer at step {i}" + ); + } + } + + (buffer, mutated_syntax_map) + } + fn rust_lang() -> Language { Language::new( LanguageConfig { @@ -903,10 +1043,10 @@ mod tests { expected_layers.len(), "wrong number of layers" ); - for (i, ((_, tree, _), expected_s_exp)) in + for (i, ((_, node), expected_s_exp)) in layers.iter().zip(expected_layers.iter()).enumerate() { - let actual_s_exp = tree.root_node().to_sexp(); + let actual_s_exp = node.to_sexp(); assert!( string_contains_sequence( &actual_s_exp, @@ -925,50 +1065,54 @@ mod tests { ) { let mut cursor = QueryCursorHandle::new(); let mut actual_ranges = Vec::>::new(); - for (grammar, tree, (start_byte, _)) in syntax_map.layers(buffer) { + for (grammar, node) in syntax_map.layers(buffer) { let query = Query::new(grammar.ts_language, query).unwrap(); - for (mat, ix) in - cursor.captures(&query, tree.root_node(), TextProvider(buffer.as_rope())) - { - let range = mat.captures[ix].node.byte_range(); - actual_ranges.push(start_byte + range.start..start_byte + range.end); + for (mat, ix) in cursor.captures(&query, node, TextProvider(buffer.as_rope())) { + actual_ranges.push(mat.captures[ix].node.byte_range()); } } - let (text, expected_ranges) = marked_text_ranges(marked_string, false); + let (text, expected_ranges) = marked_text_ranges(&marked_string.unindent(), false); assert_eq!(text, buffer.text()); assert_eq!(actual_ranges, expected_ranges); } - fn edit_buffer_n(buffer: &mut Buffer, marked_strings: &[&str]) { - for marked_string in marked_strings { - edit_buffer(buffer, marked_string); - } - } - fn edit_buffer(buffer: &mut Buffer, marked_string: &str) { let old_text = buffer.text(); let (new_text, mut ranges) = marked_text_ranges(marked_string, false); - assert_eq!(ranges.len(), 1); + if ranges.is_empty() { + ranges.push(0..new_text.len()); + } - let inserted_range = ranges.pop().unwrap(); - let inserted_text = new_text[inserted_range.clone()].to_string(); - let deleted_len = (inserted_range.len() as isize + old_text.len() as isize - - new_text.len() as isize) as usize; - let deleted_range = inserted_range.start..inserted_range.start + deleted_len; + let mut delta = 0; + let mut edits = Vec::new(); + let mut ranges = ranges.into_iter().peekable(); + + while let Some(inserted_range) = ranges.next() { + let old_start = (inserted_range.start as isize - delta) as usize; + let following_text = if let Some(next_range) = ranges.peek() { + &new_text[inserted_range.end..next_range.start] + } else { + &new_text[inserted_range.end..] + }; + + let inserted_len = inserted_range.len(); + let deleted_len = old_text[old_start..] + .find(following_text) + .expect("invalid edit"); + + let old_range = old_start..old_start + deleted_len; + edits.push((old_range, new_text[inserted_range].to_string())); + delta += inserted_len as isize - deleted_len as isize; + } assert_eq!( - old_text[..deleted_range.start], - new_text[..inserted_range.start], - "invalid edit", - ); - assert_eq!( - old_text[deleted_range.end..], - new_text[inserted_range.end..], - "invalid edit", + old_text.len() as isize + delta, + new_text.len() as isize, + "invalid edit" ); - buffer.edit([(deleted_range, inserted_text)]); + buffer.edit(edits); } pub fn string_contains_sequence(text: &str, parts: &[&str]) -> bool {