feat: find closest pair using tree-sitter

This commit is contained in:
woojiq 2024-04-09 18:29:00 +03:00 committed by Michael Davis
parent 50c90cb47c
commit 81dc8e8d6b
5 changed files with 189 additions and 87 deletions

View File

@ -9,16 +9,32 @@
const MAX_PLAINTEXT_SCAN: usize = 10000; const MAX_PLAINTEXT_SCAN: usize = 10000;
const MATCH_LIMIT: usize = 16; const MATCH_LIMIT: usize = 16;
// Limit matching pairs to only ( ) { } [ ] < > ' ' " " pub const BRACKETS: [(char, char); 7] = [
const PAIRS: &[(char, char)] = &[
('(', ')'), ('(', ')'),
('{', '}'), ('{', '}'),
('[', ']'), ('[', ']'),
('<', '>'), ('<', '>'),
('\'', '\''), ('«', '»'),
('\"', '\"'), ('「', '」'),
('', ''),
]; ];
// The difference between BRACKETS and PAIRS is that we can find matching
// BRACKETS in a plain text file, but we can't do the same for PAIRs.
// PAIRS also contains all BRACKETS.
pub const PAIRS: [(char, char); BRACKETS.len() + 3] = {
let mut pairs = [(' ', ' '); BRACKETS.len() + 3];
let mut idx = 0;
while idx < BRACKETS.len() {
pairs[idx] = BRACKETS[idx];
idx += 1;
}
pairs[idx] = ('"', '"');
pairs[idx + 1] = ('\'', '\'');
pairs[idx + 2] = ('`', '`');
pairs
};
/// Returns the position of the matching bracket under cursor. /// Returns the position of the matching bracket under cursor.
/// ///
/// If the cursor is on the opening bracket, the position of /// If the cursor is on the opening bracket, the position of
@ -30,7 +46,7 @@
/// If no matching bracket is found, `None` is returned. /// If no matching bracket is found, `None` is returned.
#[must_use] #[must_use]
pub fn find_matching_bracket(syntax: &Syntax, doc: RopeSlice, pos: usize) -> Option<usize> { pub fn find_matching_bracket(syntax: &Syntax, doc: RopeSlice, pos: usize) -> Option<usize> {
if pos >= doc.len_chars() || !is_valid_bracket(doc.char(pos)) { if pos >= doc.len_chars() || !is_valid_pair(doc.char(pos)) {
return None; return None;
} }
find_pair(syntax, doc, pos, false) find_pair(syntax, doc, pos, false)
@ -67,7 +83,7 @@ fn find_pair(
let (start_byte, end_byte) = surrounding_bytes(doc, &node)?; let (start_byte, end_byte) = surrounding_bytes(doc, &node)?;
let (start_char, end_char) = (doc.byte_to_char(start_byte), doc.byte_to_char(end_byte)); let (start_char, end_char) = (doc.byte_to_char(start_byte), doc.byte_to_char(end_byte));
if is_valid_pair(doc, start_char, end_char) { if is_valid_pair_on_pos(doc, start_char, end_char) {
if end_byte == pos { if end_byte == pos {
return Some(start_char); return Some(start_char);
} }
@ -140,14 +156,22 @@ fn find_pair(
/// If no matching bracket is found, `None` is returned. /// If no matching bracket is found, `None` is returned.
#[must_use] #[must_use]
pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Option<usize> { pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Option<usize> {
// Don't do anything when the cursor is not on top of a bracket.
let bracket = doc.get_char(cursor_pos)?; let bracket = doc.get_char(cursor_pos)?;
let matching_bracket = {
let pair = get_pair(bracket);
if pair.0 == bracket {
pair.1
} else {
pair.0
}
};
// Don't do anything when the cursor is not on top of a bracket.
if !is_valid_bracket(bracket) { if !is_valid_bracket(bracket) {
return None; return None;
} }
// Determine the direction of the matching. // Determine the direction of the matching.
let is_fwd = is_forward_bracket(bracket); let is_fwd = is_open_bracket(bracket);
let chars_iter = if is_fwd { let chars_iter = if is_fwd {
doc.chars_at(cursor_pos + 1) doc.chars_at(cursor_pos + 1)
} else { } else {
@ -159,19 +183,7 @@ pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Opt
for (i, candidate) in chars_iter.take(MAX_PLAINTEXT_SCAN).enumerate() { for (i, candidate) in chars_iter.take(MAX_PLAINTEXT_SCAN).enumerate() {
if candidate == bracket { if candidate == bracket {
open_cnt += 1; open_cnt += 1;
} else if is_valid_pair( } else if candidate == matching_bracket {
doc,
if is_fwd {
cursor_pos
} else {
cursor_pos - i - 1
},
if is_fwd {
cursor_pos + i + 1
} else {
cursor_pos
},
) {
// Return when all pending brackets have been closed. // Return when all pending brackets have been closed.
if open_cnt == 1 { if open_cnt == 1 {
return Some(if is_fwd { return Some(if is_fwd {
@ -187,15 +199,49 @@ pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Opt
None None
} }
fn is_valid_bracket(c: char) -> bool { /// Returns the open and closing chars pair. If not found in
PAIRS.iter().any(|(l, r)| *l == c || *r == c) /// [`BRACKETS`] returns (ch, ch).
///
/// ```
/// use helix_core::match_brackets::get_pair;
///
/// assert_eq!(get_pair('['), ('[', ']'));
/// assert_eq!(get_pair('}'), ('{', '}'));
/// assert_eq!(get_pair('"'), ('"', '"'));
/// ```
pub fn get_pair(ch: char) -> (char, char) {
PAIRS
.iter()
.find(|(open, close)| *open == ch || *close == ch)
.copied()
.unwrap_or((ch, ch))
} }
fn is_forward_bracket(c: char) -> bool { pub fn is_open_bracket(ch: char) -> bool {
PAIRS.iter().any(|(l, _)| *l == c) BRACKETS.iter().any(|(l, _)| *l == ch)
} }
fn is_valid_pair(doc: RopeSlice, start_char: usize, end_char: usize) -> bool { pub fn is_close_bracket(ch: char) -> bool {
BRACKETS.iter().any(|(_, r)| *r == ch)
}
pub fn is_valid_bracket(ch: char) -> bool {
BRACKETS.iter().any(|(l, r)| *l == ch || *r == ch)
}
pub fn is_open_pair(ch: char) -> bool {
PAIRS.iter().any(|(l, _)| *l == ch)
}
pub fn is_close_pair(ch: char) -> bool {
PAIRS.iter().any(|(_, r)| *r == ch)
}
pub fn is_valid_pair(ch: char) -> bool {
PAIRS.iter().any(|(l, r)| *l == ch || *r == ch)
}
fn is_valid_pair_on_pos(doc: RopeSlice, start_char: usize, end_char: usize) -> bool {
PAIRS.contains(&(doc.char(start_char), doc.char(end_char))) PAIRS.contains(&(doc.char(start_char), doc.char(end_char)))
} }

View File

@ -122,7 +122,7 @@ pub fn is_empty(&self) -> bool {
} }
/// `Direction::Backward` when head < anchor. /// `Direction::Backward` when head < anchor.
/// `Direction::Backward` otherwise. /// `Direction::Forward` otherwise.
#[inline] #[inline]
#[must_use] #[must_use]
pub fn direction(&self) -> Direction { pub fn direction(&self) -> Direction {

View File

@ -1,18 +1,16 @@
use std::fmt::Display; use std::fmt::Display;
use crate::{movement::Direction, search, Range, Selection}; use crate::{
graphemes::next_grapheme_boundary,
match_brackets::{
find_matching_bracket, find_matching_bracket_fuzzy, get_pair, is_close_bracket,
is_open_bracket,
},
movement::Direction,
search, Range, Selection, Syntax,
};
use ropey::RopeSlice; use ropey::RopeSlice;
pub const PAIRS: &[(char, char)] = &[
('(', ')'),
('[', ']'),
('{', '}'),
('<', '>'),
('«', '»'),
('「', '」'),
('', ''),
];
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Error { pub enum Error {
PairNotFound, PairNotFound,
@ -34,32 +32,68 @@ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type Result<T> = std::result::Result<T, Error>; type Result<T> = std::result::Result<T, Error>;
/// Given any char in [PAIRS], return the open and closing chars. If not found in /// Finds the position of surround pairs of any [`crate::match_brackets::PAIRS`]
/// [PAIRS] return (ch, ch). /// using tree-sitter when possible.
/// ///
/// ``` /// # Returns
/// use helix_core::surround::get_pair;
/// ///
/// assert_eq!(get_pair('['), ('[', ']')); /// Tuple `(anchor, head)`, meaning it is not always ordered.
/// assert_eq!(get_pair('}'), ('{', '}')); pub fn find_nth_closest_pairs_pos(
/// assert_eq!(get_pair('"'), ('"', '"')); syntax: Option<&Syntax>,
/// ``` text: RopeSlice,
pub fn get_pair(ch: char) -> (char, char) { range: Range,
PAIRS skip: usize,
.iter() ) -> Result<(usize, usize)> {
.find(|(open, close)| *open == ch || *close == ch) match syntax {
.copied() Some(syntax) => find_nth_closest_pairs_ts(syntax, text, range, skip),
.unwrap_or((ch, ch)) None => find_nth_closest_pairs_plain(text, range, skip),
}
} }
pub fn find_nth_closest_pairs_pos( fn find_nth_closest_pairs_ts(
syntax: &Syntax,
text: RopeSlice, text: RopeSlice,
range: Range, range: Range,
mut skip: usize, mut skip: usize,
) -> Result<(usize, usize)> { ) -> Result<(usize, usize)> {
let is_open_pair = |ch| PAIRS.iter().any(|(open, _)| *open == ch); let mut opening = range.from();
let is_close_pair = |ch| PAIRS.iter().any(|(_, close)| *close == ch); // We want to expand the selection if we are already on the found pair,
// otherwise we would need to subtract "-1" from "range.to()".
let mut closing = range.to();
while skip > 0 {
closing = find_matching_bracket_fuzzy(syntax, text, closing).ok_or(Error::PairNotFound)?;
opening = find_matching_bracket(syntax, text, closing).ok_or(Error::PairNotFound)?;
// If we're already on a closing bracket "find_matching_bracket_fuzzy" will return
// the position of the opening bracket.
if closing < opening {
(opening, closing) = (closing, opening);
}
// In case found brackets are partially inside current selection.
if range.from() < opening || closing < range.to() - 1 {
closing = next_grapheme_boundary(text, closing);
} else {
skip -= 1;
if skip != 0 {
closing = next_grapheme_boundary(text, closing);
}
}
}
// Keep the original direction.
if let Direction::Forward = range.direction() {
Ok((opening, closing))
} else {
Ok((closing, opening))
}
}
fn find_nth_closest_pairs_plain(
text: RopeSlice,
range: Range,
mut skip: usize,
) -> Result<(usize, usize)> {
let mut stack = Vec::with_capacity(2); let mut stack = Vec::with_capacity(2);
let pos = range.from(); let pos = range.from();
let mut close_pos = pos.saturating_sub(1); let mut close_pos = pos.saturating_sub(1);
@ -67,7 +101,7 @@ pub fn find_nth_closest_pairs_pos(
for ch in text.chars_at(pos) { for ch in text.chars_at(pos) {
close_pos += 1; close_pos += 1;
if is_open_pair(ch) { if is_open_bracket(ch) {
// Track open pairs encountered so that we can step over // Track open pairs encountered so that we can step over
// the corresponding close pairs that will come up further // the corresponding close pairs that will come up further
// down the loop. We want to find a lone close pair whose // down the loop. We want to find a lone close pair whose
@ -76,7 +110,7 @@ pub fn find_nth_closest_pairs_pos(
continue; continue;
} }
if !is_close_pair(ch) { if !is_close_bracket(ch) {
// We don't care if this character isn't a brace pair item, // We don't care if this character isn't a brace pair item,
// so short circuit here. // so short circuit here.
continue; continue;
@ -157,7 +191,11 @@ pub fn find_nth_pairs_pos(
) )
}; };
Option::zip(open, close).ok_or(Error::PairNotFound) // preserve original direction
match range.direction() {
Direction::Forward => Option::zip(open, close).ok_or(Error::PairNotFound),
Direction::Backward => Option::zip(close, open).ok_or(Error::PairNotFound),
}
} }
fn find_nth_open_pair( fn find_nth_open_pair(
@ -249,6 +287,7 @@ fn find_nth_close_pair(
/// are automatically detected around each cursor (note that this may result /// are automatically detected around each cursor (note that this may result
/// in them selecting different surround characters for each selection). /// in them selecting different surround characters for each selection).
pub fn get_surround_pos( pub fn get_surround_pos(
syntax: Option<&Syntax>,
text: RopeSlice, text: RopeSlice,
selection: &Selection, selection: &Selection,
ch: Option<char>, ch: Option<char>,
@ -257,9 +296,13 @@ pub fn get_surround_pos(
let mut change_pos = Vec::new(); let mut change_pos = Vec::new();
for &range in selection { for &range in selection {
let (open_pos, close_pos) = match ch { let (open_pos, close_pos) = {
Some(ch) => find_nth_pairs_pos(text, ch, range, skip)?, let range_raw = match ch {
None => find_nth_closest_pairs_pos(text, range, skip)?, Some(ch) => find_nth_pairs_pos(text, ch, range, skip)?,
None => find_nth_closest_pairs_pos(syntax, text, range, skip)?,
};
let range = Range::new(range_raw.0, range_raw.1);
(range.from(), range.to())
}; };
if change_pos.contains(&open_pos) || change_pos.contains(&close_pos) { if change_pos.contains(&open_pos) || change_pos.contains(&close_pos) {
return Err(Error::CursorOverlap); return Err(Error::CursorOverlap);

View File

@ -7,9 +7,9 @@
use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary}; use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary};
use crate::line_ending::rope_is_line_ending; use crate::line_ending::rope_is_line_ending;
use crate::movement::Direction; use crate::movement::Direction;
use crate::surround;
use crate::syntax::LanguageConfiguration; use crate::syntax::LanguageConfiguration;
use crate::Range; use crate::Range;
use crate::{surround, Syntax};
fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction, long: bool) -> usize { fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction, long: bool) -> usize {
use CharCategory::{Eol, Whitespace}; use CharCategory::{Eol, Whitespace};
@ -199,25 +199,28 @@ pub fn textobject_paragraph(
} }
pub fn textobject_pair_surround( pub fn textobject_pair_surround(
syntax: Option<&Syntax>,
slice: RopeSlice, slice: RopeSlice,
range: Range, range: Range,
textobject: TextObject, textobject: TextObject,
ch: char, ch: char,
count: usize, count: usize,
) -> Range { ) -> Range {
textobject_pair_surround_impl(slice, range, textobject, Some(ch), count) textobject_pair_surround_impl(syntax, slice, range, textobject, Some(ch), count)
} }
pub fn textobject_pair_surround_closest( pub fn textobject_pair_surround_closest(
syntax: Option<&Syntax>,
slice: RopeSlice, slice: RopeSlice,
range: Range, range: Range,
textobject: TextObject, textobject: TextObject,
count: usize, count: usize,
) -> Range { ) -> Range {
textobject_pair_surround_impl(slice, range, textobject, None, count) textobject_pair_surround_impl(syntax, slice, range, textobject, None, count)
} }
fn textobject_pair_surround_impl( fn textobject_pair_surround_impl(
syntax: Option<&Syntax>,
slice: RopeSlice, slice: RopeSlice,
range: Range, range: Range,
textobject: TextObject, textobject: TextObject,
@ -226,8 +229,7 @@ fn textobject_pair_surround_impl(
) -> Range { ) -> Range {
let pair_pos = match ch { let pair_pos = match ch {
Some(ch) => surround::find_nth_pairs_pos(slice, ch, range, count), Some(ch) => surround::find_nth_pairs_pos(slice, ch, range, count),
// Automatically find the closest surround pairs None => surround::find_nth_closest_pairs_pos(syntax, slice, range, count),
None => surround::find_nth_closest_pairs_pos(slice, range, count),
}; };
pair_pos pair_pos
.map(|(anchor, head)| match textobject { .map(|(anchor, head)| match textobject {

View File

@ -5409,13 +5409,22 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
'e' => textobject_treesitter("entry", range), 'e' => textobject_treesitter("entry", range),
'p' => textobject::textobject_paragraph(text, range, objtype, count), 'p' => textobject::textobject_paragraph(text, range, objtype, count),
'm' => textobject::textobject_pair_surround_closest( 'm' => textobject::textobject_pair_surround_closest(
text, range, objtype, count, doc.syntax(),
text,
range,
objtype,
count,
), ),
'g' => textobject_change(range), 'g' => textobject_change(range),
// TODO: cancel new ranges if inconsistent surround matches across lines // TODO: cancel new ranges if inconsistent surround matches across lines
ch if !ch.is_ascii_alphanumeric() => { ch if !ch.is_ascii_alphanumeric() => textobject::textobject_pair_surround(
textobject::textobject_pair_surround(text, range, objtype, ch, count) doc.syntax(),
} text,
range,
objtype,
ch,
count,
),
_ => range, _ => range,
} }
}); });
@ -5440,7 +5449,7 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
("c", "Comment (tree-sitter)"), ("c", "Comment (tree-sitter)"),
("T", "Test (tree-sitter)"), ("T", "Test (tree-sitter)"),
("e", "Data structure entry (tree-sitter)"), ("e", "Data structure entry (tree-sitter)"),
("m", "Closest surrounding pair"), ("m", "Closest surrounding pair (tree-sitter)"),
("g", "Change"), ("g", "Change"),
(" ", "... or any character acting as a pair"), (" ", "... or any character acting as a pair"),
]; ];
@ -5454,7 +5463,7 @@ fn surround_add(cx: &mut Context) {
// surround_len is the number of new characters being added. // surround_len is the number of new characters being added.
let (open, close, surround_len) = match event.char() { let (open, close, surround_len) = match event.char() {
Some(ch) => { Some(ch) => {
let (o, c) = surround::get_pair(ch); let (o, c) = match_brackets::get_pair(ch);
let mut open = Tendril::new(); let mut open = Tendril::new();
open.push(o); open.push(o);
let mut close = Tendril::new(); let mut close = Tendril::new();
@ -5505,13 +5514,14 @@ fn surround_replace(cx: &mut Context) {
let text = doc.text().slice(..); let text = doc.text().slice(..);
let selection = doc.selection(view.id); let selection = doc.selection(view.id);
let change_pos = match surround::get_surround_pos(text, selection, surround_ch, count) { let change_pos =
Ok(c) => c, match surround::get_surround_pos(doc.syntax(), text, selection, surround_ch, count) {
Err(err) => { Ok(c) => c,
cx.editor.set_error(err.to_string()); Err(err) => {
return; cx.editor.set_error(err.to_string());
} return;
}; }
};
let selection = selection.clone(); let selection = selection.clone();
let ranges: SmallVec<[Range; 1]> = change_pos.iter().map(|&p| Range::point(p)).collect(); let ranges: SmallVec<[Range; 1]> = change_pos.iter().map(|&p| Range::point(p)).collect();
@ -5526,7 +5536,7 @@ fn surround_replace(cx: &mut Context) {
Some(to) => to, Some(to) => to,
None => return doc.set_selection(view.id, selection), None => return doc.set_selection(view.id, selection),
}; };
let (open, close) = surround::get_pair(to); let (open, close) = match_brackets::get_pair(to);
// the changeset has to be sorted to allow nested surrounds // the changeset has to be sorted to allow nested surrounds
let mut sorted_pos: Vec<(usize, char)> = Vec::new(); let mut sorted_pos: Vec<(usize, char)> = Vec::new();
@ -5563,13 +5573,14 @@ fn surround_delete(cx: &mut Context) {
let text = doc.text().slice(..); let text = doc.text().slice(..);
let selection = doc.selection(view.id); let selection = doc.selection(view.id);
let mut change_pos = match surround::get_surround_pos(text, selection, surround_ch, count) { let mut change_pos =
Ok(c) => c, match surround::get_surround_pos(doc.syntax(), text, selection, surround_ch, count) {
Err(err) => { Ok(c) => c,
cx.editor.set_error(err.to_string()); Err(err) => {
return; cx.editor.set_error(err.to_string());
} return;
}; }
};
change_pos.sort_unstable(); // the changeset has to be sorted to allow nested surrounds change_pos.sort_unstable(); // the changeset has to be sorted to allow nested surrounds
let transaction = let transaction =
Transaction::change(doc.text(), change_pos.into_iter().map(|p| (p, p + 1, None))); Transaction::change(doc.text(), change_pos.into_iter().map(|p| (p, p + 1, None)));