From 81dc8e8d6b1476a7d522058ee0b4454115913ab5 Mon Sep 17 00:00:00 2001 From: woojiq Date: Tue, 9 Apr 2024 18:29:00 +0300 Subject: [PATCH] feat: find closest pair using tree-sitter --- helix-core/src/match_brackets.rs | 98 +++++++++++++++++++-------- helix-core/src/selection.rs | 2 +- helix-core/src/surround.rs | 111 +++++++++++++++++++++---------- helix-core/src/textobject.rs | 12 ++-- helix-term/src/commands.rs | 53 +++++++++------ 5 files changed, 189 insertions(+), 87 deletions(-) diff --git a/helix-core/src/match_brackets.rs b/helix-core/src/match_brackets.rs index b8bcc28ca..95d6a3dc4 100644 --- a/helix-core/src/match_brackets.rs +++ b/helix-core/src/match_brackets.rs @@ -9,16 +9,32 @@ const MAX_PLAINTEXT_SCAN: usize = 10000; const MATCH_LIMIT: usize = 16; -// Limit matching pairs to only ( ) { } [ ] < > ' ' " " -const PAIRS: &[(char, char)] = &[ +pub const BRACKETS: [(char, char); 7] = [ ('(', ')'), ('{', '}'), ('[', ']'), ('<', '>'), - ('\'', '\''), - ('\"', '\"'), + ('«', '»'), + ('「', '」'), + ('(', ')'), ]; +// The difference between BRACKETS and PAIRS is that we can find matching +// BRACKETS in a plain text file, but we can't do the same for PAIRs. +// PAIRS also contains all BRACKETS. +pub const PAIRS: [(char, char); BRACKETS.len() + 3] = { + let mut pairs = [(' ', ' '); BRACKETS.len() + 3]; + let mut idx = 0; + while idx < BRACKETS.len() { + pairs[idx] = BRACKETS[idx]; + idx += 1; + } + pairs[idx] = ('"', '"'); + pairs[idx + 1] = ('\'', '\''); + pairs[idx + 2] = ('`', '`'); + pairs +}; + /// Returns the position of the matching bracket under cursor. /// /// If the cursor is on the opening bracket, the position of @@ -30,7 +46,7 @@ /// If no matching bracket is found, `None` is returned. #[must_use] pub fn find_matching_bracket(syntax: &Syntax, doc: RopeSlice, pos: usize) -> Option { - if pos >= doc.len_chars() || !is_valid_bracket(doc.char(pos)) { + if pos >= doc.len_chars() || !is_valid_pair(doc.char(pos)) { return None; } find_pair(syntax, doc, pos, false) @@ -67,7 +83,7 @@ fn find_pair( let (start_byte, end_byte) = surrounding_bytes(doc, &node)?; let (start_char, end_char) = (doc.byte_to_char(start_byte), doc.byte_to_char(end_byte)); - if is_valid_pair(doc, start_char, end_char) { + if is_valid_pair_on_pos(doc, start_char, end_char) { if end_byte == pos { return Some(start_char); } @@ -140,14 +156,22 @@ fn find_pair( /// If no matching bracket is found, `None` is returned. #[must_use] pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Option { - // Don't do anything when the cursor is not on top of a bracket. let bracket = doc.get_char(cursor_pos)?; + let matching_bracket = { + let pair = get_pair(bracket); + if pair.0 == bracket { + pair.1 + } else { + pair.0 + } + }; + // Don't do anything when the cursor is not on top of a bracket. if !is_valid_bracket(bracket) { return None; } // Determine the direction of the matching. - let is_fwd = is_forward_bracket(bracket); + let is_fwd = is_open_bracket(bracket); let chars_iter = if is_fwd { doc.chars_at(cursor_pos + 1) } else { @@ -159,19 +183,7 @@ pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Opt for (i, candidate) in chars_iter.take(MAX_PLAINTEXT_SCAN).enumerate() { if candidate == bracket { open_cnt += 1; - } else if is_valid_pair( - doc, - if is_fwd { - cursor_pos - } else { - cursor_pos - i - 1 - }, - if is_fwd { - cursor_pos + i + 1 - } else { - cursor_pos - }, - ) { + } else if candidate == matching_bracket { // Return when all pending brackets have been closed. if open_cnt == 1 { return Some(if is_fwd { @@ -187,15 +199,49 @@ pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Opt None } -fn is_valid_bracket(c: char) -> bool { - PAIRS.iter().any(|(l, r)| *l == c || *r == c) +/// Returns the open and closing chars pair. If not found in +/// [`BRACKETS`] returns (ch, ch). +/// +/// ``` +/// use helix_core::match_brackets::get_pair; +/// +/// assert_eq!(get_pair('['), ('[', ']')); +/// assert_eq!(get_pair('}'), ('{', '}')); +/// assert_eq!(get_pair('"'), ('"', '"')); +/// ``` +pub fn get_pair(ch: char) -> (char, char) { + PAIRS + .iter() + .find(|(open, close)| *open == ch || *close == ch) + .copied() + .unwrap_or((ch, ch)) } -fn is_forward_bracket(c: char) -> bool { - PAIRS.iter().any(|(l, _)| *l == c) +pub fn is_open_bracket(ch: char) -> bool { + BRACKETS.iter().any(|(l, _)| *l == ch) } -fn is_valid_pair(doc: RopeSlice, start_char: usize, end_char: usize) -> bool { +pub fn is_close_bracket(ch: char) -> bool { + BRACKETS.iter().any(|(_, r)| *r == ch) +} + +pub fn is_valid_bracket(ch: char) -> bool { + BRACKETS.iter().any(|(l, r)| *l == ch || *r == ch) +} + +pub fn is_open_pair(ch: char) -> bool { + PAIRS.iter().any(|(l, _)| *l == ch) +} + +pub fn is_close_pair(ch: char) -> bool { + PAIRS.iter().any(|(_, r)| *r == ch) +} + +pub fn is_valid_pair(ch: char) -> bool { + PAIRS.iter().any(|(l, r)| *l == ch || *r == ch) +} + +fn is_valid_pair_on_pos(doc: RopeSlice, start_char: usize, end_char: usize) -> bool { PAIRS.contains(&(doc.char(start_char), doc.char(end_char))) } diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs index 652612872..48eaf289c 100644 --- a/helix-core/src/selection.rs +++ b/helix-core/src/selection.rs @@ -122,7 +122,7 @@ pub fn is_empty(&self) -> bool { } /// `Direction::Backward` when head < anchor. - /// `Direction::Backward` otherwise. + /// `Direction::Forward` otherwise. #[inline] #[must_use] pub fn direction(&self) -> Direction { diff --git a/helix-core/src/surround.rs b/helix-core/src/surround.rs index ed9764883..879c2adf1 100644 --- a/helix-core/src/surround.rs +++ b/helix-core/src/surround.rs @@ -1,18 +1,16 @@ use std::fmt::Display; -use crate::{movement::Direction, search, Range, Selection}; +use crate::{ + graphemes::next_grapheme_boundary, + match_brackets::{ + find_matching_bracket, find_matching_bracket_fuzzy, get_pair, is_close_bracket, + is_open_bracket, + }, + movement::Direction, + search, Range, Selection, Syntax, +}; use ropey::RopeSlice; -pub const PAIRS: &[(char, char)] = &[ - ('(', ')'), - ('[', ']'), - ('{', '}'), - ('<', '>'), - ('«', '»'), - ('「', '」'), - ('(', ')'), -]; - #[derive(Debug, PartialEq, Eq)] pub enum Error { PairNotFound, @@ -34,32 +32,68 @@ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { type Result = std::result::Result; -/// Given any char in [PAIRS], return the open and closing chars. If not found in -/// [PAIRS] return (ch, ch). +/// Finds the position of surround pairs of any [`crate::match_brackets::PAIRS`] +/// using tree-sitter when possible. /// -/// ``` -/// use helix_core::surround::get_pair; +/// # Returns /// -/// assert_eq!(get_pair('['), ('[', ']')); -/// assert_eq!(get_pair('}'), ('{', '}')); -/// assert_eq!(get_pair('"'), ('"', '"')); -/// ``` -pub fn get_pair(ch: char) -> (char, char) { - PAIRS - .iter() - .find(|(open, close)| *open == ch || *close == ch) - .copied() - .unwrap_or((ch, ch)) +/// Tuple `(anchor, head)`, meaning it is not always ordered. +pub fn find_nth_closest_pairs_pos( + syntax: Option<&Syntax>, + text: RopeSlice, + range: Range, + skip: usize, +) -> Result<(usize, usize)> { + match syntax { + Some(syntax) => find_nth_closest_pairs_ts(syntax, text, range, skip), + None => find_nth_closest_pairs_plain(text, range, skip), + } } -pub fn find_nth_closest_pairs_pos( +fn find_nth_closest_pairs_ts( + syntax: &Syntax, text: RopeSlice, range: Range, mut skip: usize, ) -> Result<(usize, usize)> { - let is_open_pair = |ch| PAIRS.iter().any(|(open, _)| *open == ch); - let is_close_pair = |ch| PAIRS.iter().any(|(_, close)| *close == ch); + let mut opening = range.from(); + // We want to expand the selection if we are already on the found pair, + // otherwise we would need to subtract "-1" from "range.to()". + let mut closing = range.to(); + while skip > 0 { + closing = find_matching_bracket_fuzzy(syntax, text, closing).ok_or(Error::PairNotFound)?; + opening = find_matching_bracket(syntax, text, closing).ok_or(Error::PairNotFound)?; + // If we're already on a closing bracket "find_matching_bracket_fuzzy" will return + // the position of the opening bracket. + if closing < opening { + (opening, closing) = (closing, opening); + } + + // In case found brackets are partially inside current selection. + if range.from() < opening || closing < range.to() - 1 { + closing = next_grapheme_boundary(text, closing); + } else { + skip -= 1; + if skip != 0 { + closing = next_grapheme_boundary(text, closing); + } + } + } + + // Keep the original direction. + if let Direction::Forward = range.direction() { + Ok((opening, closing)) + } else { + Ok((closing, opening)) + } +} + +fn find_nth_closest_pairs_plain( + text: RopeSlice, + range: Range, + mut skip: usize, +) -> Result<(usize, usize)> { let mut stack = Vec::with_capacity(2); let pos = range.from(); let mut close_pos = pos.saturating_sub(1); @@ -67,7 +101,7 @@ pub fn find_nth_closest_pairs_pos( for ch in text.chars_at(pos) { close_pos += 1; - if is_open_pair(ch) { + if is_open_bracket(ch) { // Track open pairs encountered so that we can step over // the corresponding close pairs that will come up further // down the loop. We want to find a lone close pair whose @@ -76,7 +110,7 @@ pub fn find_nth_closest_pairs_pos( continue; } - if !is_close_pair(ch) { + if !is_close_bracket(ch) { // We don't care if this character isn't a brace pair item, // so short circuit here. continue; @@ -157,7 +191,11 @@ pub fn find_nth_pairs_pos( ) }; - Option::zip(open, close).ok_or(Error::PairNotFound) + // preserve original direction + match range.direction() { + Direction::Forward => Option::zip(open, close).ok_or(Error::PairNotFound), + Direction::Backward => Option::zip(close, open).ok_or(Error::PairNotFound), + } } fn find_nth_open_pair( @@ -249,6 +287,7 @@ fn find_nth_close_pair( /// are automatically detected around each cursor (note that this may result /// in them selecting different surround characters for each selection). pub fn get_surround_pos( + syntax: Option<&Syntax>, text: RopeSlice, selection: &Selection, ch: Option, @@ -257,9 +296,13 @@ pub fn get_surround_pos( let mut change_pos = Vec::new(); for &range in selection { - let (open_pos, close_pos) = match ch { - Some(ch) => find_nth_pairs_pos(text, ch, range, skip)?, - None => find_nth_closest_pairs_pos(text, range, skip)?, + let (open_pos, close_pos) = { + let range_raw = match ch { + Some(ch) => find_nth_pairs_pos(text, ch, range, skip)?, + None => find_nth_closest_pairs_pos(syntax, text, range, skip)?, + }; + let range = Range::new(range_raw.0, range_raw.1); + (range.from(), range.to()) }; if change_pos.contains(&open_pos) || change_pos.contains(&close_pos) { return Err(Error::CursorOverlap); diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs index bf00a4580..412301261 100644 --- a/helix-core/src/textobject.rs +++ b/helix-core/src/textobject.rs @@ -7,9 +7,9 @@ use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary}; use crate::line_ending::rope_is_line_ending; use crate::movement::Direction; -use crate::surround; use crate::syntax::LanguageConfiguration; use crate::Range; +use crate::{surround, Syntax}; fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction, long: bool) -> usize { use CharCategory::{Eol, Whitespace}; @@ -199,25 +199,28 @@ pub fn textobject_paragraph( } pub fn textobject_pair_surround( + syntax: Option<&Syntax>, slice: RopeSlice, range: Range, textobject: TextObject, ch: char, count: usize, ) -> Range { - textobject_pair_surround_impl(slice, range, textobject, Some(ch), count) + textobject_pair_surround_impl(syntax, slice, range, textobject, Some(ch), count) } pub fn textobject_pair_surround_closest( + syntax: Option<&Syntax>, slice: RopeSlice, range: Range, textobject: TextObject, count: usize, ) -> Range { - textobject_pair_surround_impl(slice, range, textobject, None, count) + textobject_pair_surround_impl(syntax, slice, range, textobject, None, count) } fn textobject_pair_surround_impl( + syntax: Option<&Syntax>, slice: RopeSlice, range: Range, textobject: TextObject, @@ -226,8 +229,7 @@ fn textobject_pair_surround_impl( ) -> Range { let pair_pos = match ch { Some(ch) => surround::find_nth_pairs_pos(slice, ch, range, count), - // Automatically find the closest surround pairs - None => surround::find_nth_closest_pairs_pos(slice, range, count), + None => surround::find_nth_closest_pairs_pos(syntax, slice, range, count), }; pair_pos .map(|(anchor, head)| match textobject { diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index cc7b84c4b..8610a2048 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -5409,13 +5409,22 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) { 'e' => textobject_treesitter("entry", range), 'p' => textobject::textobject_paragraph(text, range, objtype, count), 'm' => textobject::textobject_pair_surround_closest( - text, range, objtype, count, + doc.syntax(), + text, + range, + objtype, + count, ), 'g' => textobject_change(range), // TODO: cancel new ranges if inconsistent surround matches across lines - ch if !ch.is_ascii_alphanumeric() => { - textobject::textobject_pair_surround(text, range, objtype, ch, count) - } + ch if !ch.is_ascii_alphanumeric() => textobject::textobject_pair_surround( + doc.syntax(), + text, + range, + objtype, + ch, + count, + ), _ => range, } }); @@ -5440,7 +5449,7 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) { ("c", "Comment (tree-sitter)"), ("T", "Test (tree-sitter)"), ("e", "Data structure entry (tree-sitter)"), - ("m", "Closest surrounding pair"), + ("m", "Closest surrounding pair (tree-sitter)"), ("g", "Change"), (" ", "... or any character acting as a pair"), ]; @@ -5454,7 +5463,7 @@ fn surround_add(cx: &mut Context) { // surround_len is the number of new characters being added. let (open, close, surround_len) = match event.char() { Some(ch) => { - let (o, c) = surround::get_pair(ch); + let (o, c) = match_brackets::get_pair(ch); let mut open = Tendril::new(); open.push(o); let mut close = Tendril::new(); @@ -5505,13 +5514,14 @@ fn surround_replace(cx: &mut Context) { let text = doc.text().slice(..); let selection = doc.selection(view.id); - let change_pos = match surround::get_surround_pos(text, selection, surround_ch, count) { - Ok(c) => c, - Err(err) => { - cx.editor.set_error(err.to_string()); - return; - } - }; + let change_pos = + match surround::get_surround_pos(doc.syntax(), text, selection, surround_ch, count) { + Ok(c) => c, + Err(err) => { + cx.editor.set_error(err.to_string()); + return; + } + }; let selection = selection.clone(); let ranges: SmallVec<[Range; 1]> = change_pos.iter().map(|&p| Range::point(p)).collect(); @@ -5526,7 +5536,7 @@ fn surround_replace(cx: &mut Context) { Some(to) => to, None => return doc.set_selection(view.id, selection), }; - let (open, close) = surround::get_pair(to); + let (open, close) = match_brackets::get_pair(to); // the changeset has to be sorted to allow nested surrounds let mut sorted_pos: Vec<(usize, char)> = Vec::new(); @@ -5563,13 +5573,14 @@ fn surround_delete(cx: &mut Context) { let text = doc.text().slice(..); let selection = doc.selection(view.id); - let mut change_pos = match surround::get_surround_pos(text, selection, surround_ch, count) { - Ok(c) => c, - Err(err) => { - cx.editor.set_error(err.to_string()); - return; - } - }; + let mut change_pos = + match surround::get_surround_pos(doc.syntax(), text, selection, surround_ch, count) { + Ok(c) => c, + Err(err) => { + cx.editor.set_error(err.to_string()); + return; + } + }; change_pos.sort_unstable(); // the changeset has to be sorted to allow nested surrounds let transaction = Transaction::change(doc.text(), change_pos.into_iter().map(|p| (p, p + 1, None)));