Add object selection (textobjects) (#385)

* Add textobjects for word

* Add textobjects for surround characters

* Apply clippy lints

* Remove ThisWordPrevBound in favor of PrevWordEnd

It's the same as PrevWordEnd except for taking the current char
into account, so use a "flag" to capture that usecase

* Add tests for PrevWordEnd movement

* Remove ThisWord* movements

They did not preserve anchor positions and were only used
for textobject boundary search anyway so replace them with
simple position finding functions

* Rewrite tests of word textobject

* Add tests for surround textobject

* Add textobject docs

* Refactor textobject word position functions

* Apply clippy lints on textobject

* Fix overflow error with textobjects
This commit is contained in:
Gokul Soumya 2021-07-03 06:37:49 +05:30 committed by GitHub
parent c5b2973739
commit c68fe1f2a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 475 additions and 7 deletions

View File

@ -150,7 +150,8 @@ Jumps to various locations.
## Match mode
Enter this mode using `m` from normal mode. See the relavant section
in [Usage](./usage.md#surround) for an explanation about surround usage.
in [Usage](./usage.md) for an explanation about [surround](./usage.md#surround)
and [textobject](./usage.md#textobject) usage.
| Key | Description |
| ----- | ----------- |
@ -158,6 +159,8 @@ in [Usage](./usage.md#surround) for an explanation about surround usage.
| `s` `<char>` | Surround current selection with `<char>` |
| `r` `<from><to>` | Replace surround character `<from>` with `<to>` |
| `d` `<char>` | Delete surround character `<char>` |
| `a` `<object>` | Select around textobject |
| `i` `<object>` | Select inside textobject |
## Object mode

View File

@ -24,3 +24,19 @@ It can also act on multiple seletions (yay!). For example, to change every occur
- `mr([` to replace the parens with square brackets
Multiple characters are currently not supported, but planned.
## Textobjects
Currently supported: `word`, `surround`.
![textobject-demo](https://user-images.githubusercontent.com/23398472/124231131-81a4bb00-db2d-11eb-9d10-8e577ca7b177.gif)
- `ma` - Select around the object (`va` in vim, `<alt-a>` in kakoune)
- `mi` - Select inside the object (`vi` in vim, `<alt-i>` in kakoune)
| Key after `mi` or `ma` | Textobject selected |
| --- | --- |
| `w` | Word |
| `(`, `[`, `'`, etc | Specified surround pairs |
Textobjects based on treesitter, like `function`, `class`, etc are planned.

View File

@ -18,6 +18,7 @@ pub mod selection;
mod state;
pub mod surround;
pub mod syntax;
pub mod textobject;
mod transaction;
pub mod unicode {

View File

@ -113,6 +113,10 @@ pub fn move_prev_long_word_start(slice: RopeSlice, range: Range, count: usize) -
word_move(slice, range, count, WordMotionTarget::PrevLongWordStart)
}
pub fn move_prev_word_end(slice: RopeSlice, range: Range, count: usize) -> Range {
word_move(slice, range, count, WordMotionTarget::PrevWordEnd)
}
fn word_move(slice: RopeSlice, range: Range, count: usize, target: WordMotionTarget) -> Range {
(0..count).fold(range, |range, _| {
slice.chars_at(range.head).range_to_target(target, range)
@ -159,6 +163,7 @@ pub enum WordMotionTarget {
NextWordStart,
NextWordEnd,
PrevWordStart,
PrevWordEnd,
// A "Long word" (also known as a WORD in vim/kakoune) is strictly
// delimited by whitespace, and can consist of punctuation as well
// as alphanumerics.
@ -181,7 +186,9 @@ impl CharHelpers for Chars<'_> {
fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range {
// Characters are iterated forward or backwards depending on the motion direction.
let characters: Box<dyn Iterator<Item = char>> = match target {
WordMotionTarget::PrevWordStart | WordMotionTarget::PrevLongWordStart => {
WordMotionTarget::PrevWordStart
| WordMotionTarget::PrevLongWordStart
| WordMotionTarget::PrevWordEnd => {
self.next();
Box::new(from_fn(|| self.prev()))
}
@ -190,9 +197,9 @@ impl CharHelpers for Chars<'_> {
// Index advancement also depends on the direction.
let advance: &dyn Fn(&mut usize) = match target {
WordMotionTarget::PrevWordStart | WordMotionTarget::PrevLongWordStart => {
&|u| *u = u.saturating_sub(1)
}
WordMotionTarget::PrevWordStart
| WordMotionTarget::PrevLongWordStart
| WordMotionTarget::PrevWordEnd => &|u| *u = u.saturating_sub(1),
_ => &|u| *u += 1,
};
@ -265,7 +272,7 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>
};
match target {
WordMotionTarget::NextWordStart => {
WordMotionTarget::NextWordStart | WordMotionTarget::PrevWordEnd => {
is_word_boundary(peek, *next_peek)
&& (char_is_line_ending(*next_peek) || !next_peek.is_whitespace())
}
@ -913,6 +920,88 @@ mod test {
}
}
#[test]
fn test_behaviour_when_moving_to_end_of_previous_words() {
let tests = array::IntoIter::new([
("Basic backward motion from the middle of a word",
vec![(1, Range::new(9, 9), Range::new(9, 5))]),
("Starting from after boundary retreats the anchor",
vec![(1, Range::new(0, 13), Range::new(12, 8))]),
("Jump to end of a word succeeded by whitespace",
vec![(1, Range::new(10, 10), Range::new(10, 4))]),
(" Jump to start of line from end of word preceded by whitespace",
vec![(1, Range::new(7, 7), Range::new(7, 0))]),
("Previous anchor is irrelevant for backward motions",
vec![(1, Range::new(26, 12), Range::new(12, 8))]),
(" Starting from whitespace moves to first space in sequence",
vec![(1, Range::new(0, 3), Range::new(3, 0))]),
("Test identifiers_with_underscores are considered a single word",
vec![(1, Range::new(0, 25), Range::new(25, 4))]),
("Jumping\n \nback through a newline selects whitespace",
vec![(1, Range::new(0, 13), Range::new(11, 8))]),
("Jumping to start of word from the end selects the whole word",
vec![(1, Range::new(15, 15), Range::new(15, 10))]),
("alphanumeric.!,and.?=punctuation are considered 'words' for the purposes of word motion",
vec![
(1, Range::new(30, 30), Range::new(30, 21)),
(1, Range::new(30, 21), Range::new(20, 18)),
(1, Range::new(20, 18), Range::new(17, 15))
]),
("... ... punctuation and spaces behave as expected",
vec![
(1, Range::new(0, 10), Range::new(9, 9)),
(1, Range::new(9, 6), Range::new(5, 3)),
]),
(".._.._ punctuation is not joined by underscores into a single block",
vec![(1, Range::new(0, 5), Range::new(4, 3))]),
("Newlines\n\nare bridged seamlessly.",
vec![
(1, Range::new(0, 10), Range::new(7, 0)),
]),
("Jumping \n\n\n\n\nback from within a newline group selects previous block",
vec![
(1, Range::new(0, 13), Range::new(10, 7)),
]),
("Failed motions do not modify the range",
vec![
(0, Range::new(3, 0), Range::new(3, 0)),
]),
("Multiple motions at once resolve correctly",
vec![
(3, Range::new(23, 23), Range::new(15, 8)),
]),
("Excessive motions are performed partially",
vec![
(999, Range::new(40, 40), Range::new(8, 0)),
]),
("", // Edge case of moving backwards in empty string
vec![
(1, Range::new(0, 0), Range::new(0, 0)),
]),
("\n\n\n\n\n", // Edge case of moving backwards in all newlines
vec![
(1, Range::new(0, 0), Range::new(0, 0)),
]),
(" \n \nJumping back through alternated space blocks and newlines selects the space blocks",
vec![
(1, Range::new(0, 7), Range::new(6, 4)),
(1, Range::new(6, 4), Range::new(2, 0)),
]),
("Test ヒーリクス multibyte characters behave as normal characters",
vec![
(1, Range::new(0, 9), Range::new(9, 4)),
]),
]);
for (sample, scenario) in tests {
for (count, begin, expected_end) in scenario.into_iter() {
let range = move_prev_word_end(Rope::from(sample).slice(..), begin, count);
assert_eq!(range, expected_end, "Case failed: [{}]", sample);
}
}
}
#[test]
fn test_behaviour_when_moving_to_end_of_next_long_words() {
let tests = array::IntoIter::new([

View File

@ -130,6 +130,16 @@ impl Range {
}
}
impl From<(usize, usize)> for Range {
fn from(tuple: (usize, usize)) -> Self {
Self {
anchor: tuple.0,
head: tuple.1,
horiz: None,
}
}
}
/// A selection consists of one or more selection ranges.
/// invariant: A selection can never be empty (always contains at least primary range).
#[derive(Debug, Clone, PartialEq, Eq)]

View File

@ -0,0 +1,319 @@
use ropey::RopeSlice;
use crate::chars::{categorize_char, char_is_line_ending, char_is_whitespace, CharCategory};
use crate::movement::{self, Direction};
use crate::surround;
use crate::Range;
fn this_word_end_pos(slice: RopeSlice, pos: usize) -> usize {
this_word_bound_pos(slice, pos, Direction::Forward)
}
fn this_word_start_pos(slice: RopeSlice, pos: usize) -> usize {
this_word_bound_pos(slice, pos, Direction::Backward)
}
fn this_word_bound_pos(slice: RopeSlice, mut pos: usize, direction: Direction) -> usize {
let iter = match direction {
Direction::Forward => slice.chars_at(pos + 1),
Direction::Backward => {
let mut iter = slice.chars_at(pos);
iter.reverse();
iter
}
};
match categorize_char(slice.char(pos)) {
CharCategory::Eol | CharCategory::Whitespace => pos,
category => {
for peek in iter {
let curr_category = categorize_char(peek);
if curr_category != category
|| curr_category == CharCategory::Eol
|| curr_category == CharCategory::Whitespace
{
return pos;
}
pos = match direction {
Direction::Forward => pos + 1,
Direction::Backward => pos.saturating_sub(1),
}
}
pos
}
}
}
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub enum TextObject {
Around,
Inside,
}
// count doesn't do anything yet
pub fn textobject_word(
slice: RopeSlice,
range: Range,
textobject: TextObject,
count: usize,
) -> Range {
let this_word_start = this_word_start_pos(slice, range.head);
let this_word_end = this_word_end_pos(slice, range.head);
let (anchor, head);
match textobject {
TextObject::Inside => {
anchor = this_word_start;
head = this_word_end;
}
TextObject::Around => {
if slice
.get_char(this_word_end + 1)
.map_or(true, char_is_line_ending)
{
head = this_word_end;
if slice
.get_char(this_word_start.saturating_sub(1))
.map_or(true, char_is_line_ending)
{
// single word on a line
anchor = this_word_start;
} else {
// last word on a line, select the whitespace before it too
anchor = movement::move_prev_word_end(slice, range, count).head;
}
} else if char_is_whitespace(slice.char(range.head)) {
// select whole whitespace and next word
head = movement::move_next_word_end(slice, range, count).head;
anchor = movement::backwards_skip_while(slice, range.head, |c| c.is_whitespace())
.map(|p| p + 1) // p is first *non* whitespace char, so +1 to get whitespace pos
.unwrap_or(0);
} else {
head = movement::move_next_word_start(slice, range, count).head;
anchor = this_word_start;
}
}
};
Range::new(anchor, head)
}
pub fn textobject_surround(
slice: RopeSlice,
range: Range,
textobject: TextObject,
ch: char,
count: usize,
) -> Range {
surround::find_nth_pairs_pos(slice, ch, range.head, count)
.map(|(anchor, head)| match textobject {
TextObject::Inside => Range::new(anchor + 1, head.saturating_sub(1)),
TextObject::Around => Range::new(anchor, head),
})
.unwrap_or(range)
}
#[cfg(test)]
mod test {
use super::TextObject::*;
use super::*;
use crate::Range;
use ropey::Rope;
#[test]
fn test_textobject_word() {
// (text, [(cursor position, textobject, final range), ...])
let tests = &[
(
"cursor at beginning of doc",
vec![(0, Inside, (0, 5)), (0, Around, (0, 6))],
),
(
"cursor at middle of word",
vec![
(13, Inside, (10, 15)),
(10, Inside, (10, 15)),
(15, Inside, (10, 15)),
(13, Around, (10, 16)),
(10, Around, (10, 16)),
(15, Around, (10, 16)),
],
),
(
"cursor between word whitespace",
vec![(6, Inside, (6, 6)), (6, Around, (6, 13))],
),
(
"cursor on word before newline\n",
vec![
(22, Inside, (22, 28)),
(28, Inside, (22, 28)),
(25, Inside, (22, 28)),
(22, Around, (21, 28)),
(28, Around, (21, 28)),
(25, Around, (21, 28)),
],
),
(
"cursor on newline\nnext line",
vec![(17, Inside, (17, 17)), (17, Around, (17, 22))],
),
(
"cursor on word after newline\nnext line",
vec![
(29, Inside, (29, 32)),
(30, Inside, (29, 32)),
(32, Inside, (29, 32)),
(29, Around, (29, 33)),
(30, Around, (29, 33)),
(32, Around, (29, 33)),
],
),
(
"cursor on #$%:;* punctuation",
vec![
(13, Inside, (10, 15)),
(10, Inside, (10, 15)),
(15, Inside, (10, 15)),
(13, Around, (10, 16)),
(10, Around, (10, 16)),
(15, Around, (10, 16)),
],
),
(
"cursor on punc%^#$:;.tuation",
vec![
(14, Inside, (14, 20)),
(20, Inside, (14, 20)),
(17, Inside, (14, 20)),
(14, Around, (14, 20)),
// FIXME: edge case
// (20, Around, (14, 20)),
(17, Around, (14, 20)),
],
),
(
"cursor in extra whitespace",
vec![
(9, Inside, (9, 9)),
(10, Inside, (10, 10)),
(11, Inside, (11, 11)),
(9, Around, (9, 16)),
(10, Around, (9, 16)),
(11, Around, (9, 16)),
],
),
(
"cursor at end of doc",
vec![(19, Inside, (17, 19)), (19, Around, (16, 19))],
),
];
for (sample, scenario) in tests {
let doc = Rope::from(*sample);
let slice = doc.slice(..);
for &case in scenario {
let (pos, objtype, expected_range) = case;
let result = textobject_word(slice, Range::point(pos), objtype, 1);
assert_eq!(
result,
expected_range.into(),
"\nCase failed: {:?} - {:?}",
sample,
case
);
}
}
}
#[test]
fn test_textobject_surround() {
// (text, [(cursor position, textobject, final range, count), ...])
let tests = &[
(
"simple (single) surround pairs",
vec![
(3, Inside, (3, 3), '(', 1),
(7, Inside, (8, 13), ')', 1),
(10, Inside, (8, 13), '(', 1),
(14, Inside, (8, 13), ')', 1),
(3, Around, (3, 3), '(', 1),
(7, Around, (7, 14), ')', 1),
(10, Around, (7, 14), '(', 1),
(14, Around, (7, 14), ')', 1),
],
),
(
"samexx 'single' surround pairs",
vec![
(3, Inside, (3, 3), '\'', 1),
// FIXME: surround doesn't work when *on* same chars pair
// (7, Inner, (8, 13), '\'', 1),
(10, Inside, (8, 13), '\'', 1),
// (14, Inner, (8, 13), '\'', 1),
(3, Around, (3, 3), '\'', 1),
// (7, Around, (7, 14), '\'', 1),
(10, Around, (7, 14), '\'', 1),
// (14, Around, (7, 14), '\'', 1),
],
),
(
"(nested (surround (pairs)) 3 levels)",
vec![
(0, Inside, (1, 34), '(', 1),
(6, Inside, (1, 34), ')', 1),
(8, Inside, (9, 24), '(', 1),
(8, Inside, (9, 34), ')', 2),
(20, Inside, (9, 24), '(', 2),
(20, Inside, (1, 34), ')', 3),
(0, Around, (0, 35), '(', 1),
(6, Around, (0, 35), ')', 1),
(8, Around, (8, 25), '(', 1),
(8, Around, (8, 35), ')', 2),
(20, Around, (8, 25), '(', 2),
(20, Around, (0, 35), ')', 3),
],
),
(
"(mixed {surround [pair] same} line)",
vec![
(2, Inside, (1, 33), '(', 1),
(9, Inside, (8, 27), '{', 1),
(18, Inside, (18, 21), '[', 1),
(2, Around, (0, 34), '(', 1),
(9, Around, (7, 28), '{', 1),
(18, Around, (17, 22), '[', 1),
],
),
(
"(stepped (surround) pairs (should) skip)",
vec![(22, Inside, (1, 38), '(', 1), (22, Around, (0, 39), '(', 1)],
),
(
"[surround pairs{\non different]\nlines}",
vec![
(7, Inside, (1, 28), '[', 1),
(15, Inside, (16, 35), '{', 1),
(7, Around, (0, 29), '[', 1),
(15, Around, (15, 36), '{', 1),
],
),
];
for (sample, scenario) in tests {
let doc = Rope::from(*sample);
let slice = doc.slice(..);
for &case in scenario {
let (pos, objtype, expected_range, ch, count) = case;
let result = textobject_surround(slice, Range::point(pos), objtype, ch, count);
assert_eq!(
result,
expected_range.into(),
"\nCase failed: {:?} - {:?}",
sample,
case
);
}
}
}
}

View File

@ -3502,6 +3502,9 @@ fn right_bracket_mode(cx: &mut Context) {
})
}
use helix_core::surround;
use helix_core::textobject;
fn match_mode(cx: &mut Context) {
let count = cx.count;
cx.on_next_key(move |cx, event| {
@ -3517,13 +3520,40 @@ fn match_mode(cx: &mut Context) {
's' => surround_add(cx),
'r' => surround_replace(cx),
'd' => surround_delete(cx),
'a' => select_textobject(cx, textobject::TextObject::Around),
'i' => select_textobject(cx, textobject::TextObject::Inside),
_ => (),
}
}
})
}
use helix_core::surround;
fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
let count = cx.count();
cx.on_next_key(move |cx, event| {
if let KeyEvent {
code: KeyCode::Char(ch),
..
} = event
{
let (view, doc) = current!(cx.editor);
let text = doc.text().slice(..);
let selection = doc.selection(view.id).transform(|range| {
match ch {
'w' => textobject::textobject_word(text, range, objtype, count),
// TODO: cancel new ranges if inconsistent surround matches across lines
ch if !ch.is_ascii_alphanumeric() => {
textobject::textobject_surround(text, range, objtype, ch, count)
}
_ => range,
}
});
doc.set_selection(view.id, selection);
}
})
}
fn surround_add(cx: &mut Context) {
cx.on_next_key(move |cx, event| {