mirror of
https://github.com/zed-industries/zed.git
synced 2024-09-19 02:17:35 +03:00
Use outline queries to chunk files syntactically (#11283)
This chunking strategy uses the existing `outline` query to chunk files. We try to find chunk boundaries that are: * at starts or ends of lines * nested within as few outline items as possible Release Notes: - N/A
This commit is contained in:
parent
1abd58070b
commit
43ad470e58
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -8704,6 +8704,8 @@ dependencies = [
|
|||||||
"sha2 0.10.7",
|
"sha2 0.10.7",
|
||||||
"smol",
|
"smol",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
"tree-sitter",
|
||||||
|
"unindent",
|
||||||
"util",
|
"util",
|
||||||
"worktree",
|
"worktree",
|
||||||
]
|
]
|
||||||
|
@ -55,10 +55,10 @@ use std::{
|
|||||||
Arc,
|
Arc,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use syntax_map::SyntaxSnapshot;
|
use syntax_map::{QueryCursorHandle, SyntaxSnapshot};
|
||||||
pub use task_context::{BasicContextProvider, ContextProvider, ContextProviderWithTasks};
|
pub use task_context::{BasicContextProvider, ContextProvider, ContextProviderWithTasks};
|
||||||
use theme::SyntaxTheme;
|
use theme::SyntaxTheme;
|
||||||
use tree_sitter::{self, wasmtime, Query, WasmStore};
|
use tree_sitter::{self, wasmtime, Query, QueryCursor, WasmStore};
|
||||||
use util::http::HttpClient;
|
use util::http::HttpClient;
|
||||||
|
|
||||||
pub use buffer::Operation;
|
pub use buffer::Operation;
|
||||||
@ -101,6 +101,15 @@ where
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn with_query_cursor<F, R>(func: F) -> R
|
||||||
|
where
|
||||||
|
F: FnOnce(&mut QueryCursor) -> R,
|
||||||
|
{
|
||||||
|
use std::ops::DerefMut;
|
||||||
|
let mut cursor = QueryCursorHandle::new();
|
||||||
|
func(cursor.deref_mut())
|
||||||
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref NEXT_LANGUAGE_ID: AtomicUsize = Default::default();
|
static ref NEXT_LANGUAGE_ID: AtomicUsize = Default::default();
|
||||||
static ref NEXT_GRAMMAR_ID: AtomicUsize = Default::default();
|
static ref NEXT_GRAMMAR_ID: AtomicUsize = Default::default();
|
||||||
|
@ -211,7 +211,7 @@ struct TextProvider<'a>(&'a Rope);
|
|||||||
|
|
||||||
struct ByteChunks<'a>(text::Chunks<'a>);
|
struct ByteChunks<'a>(text::Chunks<'a>);
|
||||||
|
|
||||||
struct QueryCursorHandle(Option<QueryCursor>);
|
pub(crate) struct QueryCursorHandle(Option<QueryCursor>);
|
||||||
|
|
||||||
impl SyntaxMap {
|
impl SyntaxMap {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
@ -1739,7 +1739,7 @@ impl<'a> Iterator for ByteChunks<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl QueryCursorHandle {
|
impl QueryCursorHandle {
|
||||||
pub(crate) fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new);
|
let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new);
|
||||||
cursor.set_match_limit(64);
|
cursor.set_match_limit(64);
|
||||||
QueryCursorHandle(Some(cursor))
|
QueryCursorHandle(Some(cursor))
|
||||||
|
@ -37,7 +37,9 @@ serde.workspace = true
|
|||||||
serde_json.workspace = true
|
serde_json.workspace = true
|
||||||
sha2.workspace = true
|
sha2.workspace = true
|
||||||
smol.workspace = true
|
smol.workspace = true
|
||||||
|
tree-sitter.workspace = true
|
||||||
util. workspace = true
|
util. workspace = true
|
||||||
|
unindent.workspace = true
|
||||||
worktree.workspace = true
|
worktree.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
@ -1,9 +1,24 @@
|
|||||||
use language::{with_parser, Grammar, Tree};
|
use language::{with_parser, with_query_cursor, Grammar};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use sha2::{Digest, Sha256};
|
use sha2::{Digest, Sha256};
|
||||||
use std::{cmp, ops::Range, sync::Arc};
|
use std::{
|
||||||
|
cmp::{self, Reverse},
|
||||||
|
ops::Range,
|
||||||
|
sync::Arc,
|
||||||
|
};
|
||||||
|
use tree_sitter::QueryCapture;
|
||||||
|
use util::ResultExt as _;
|
||||||
|
|
||||||
const CHUNK_THRESHOLD: usize = 1500;
|
#[derive(Copy, Clone)]
|
||||||
|
struct ChunkSizeRange {
|
||||||
|
min: usize,
|
||||||
|
max: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
const CHUNK_SIZE_RANGE: ChunkSizeRange = ChunkSizeRange {
|
||||||
|
min: 1024,
|
||||||
|
max: 8192,
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct Chunk {
|
pub struct Chunk {
|
||||||
@ -12,193 +27,318 @@ pub struct Chunk {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn chunk_text(text: &str, grammar: Option<&Arc<Grammar>>) -> Vec<Chunk> {
|
pub fn chunk_text(text: &str, grammar: Option<&Arc<Grammar>>) -> Vec<Chunk> {
|
||||||
|
chunk_text_with_size_range(text, grammar, CHUNK_SIZE_RANGE)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn chunk_text_with_size_range(
|
||||||
|
text: &str,
|
||||||
|
grammar: Option<&Arc<Grammar>>,
|
||||||
|
size_config: ChunkSizeRange,
|
||||||
|
) -> Vec<Chunk> {
|
||||||
|
let mut syntactic_ranges = Vec::new();
|
||||||
|
|
||||||
if let Some(grammar) = grammar {
|
if let Some(grammar) = grammar {
|
||||||
let tree = with_parser(|parser| {
|
if let Some(outline) = grammar.outline_config.as_ref() {
|
||||||
parser
|
let tree = with_parser(|parser| {
|
||||||
.set_language(&grammar.ts_language)
|
parser.set_language(&grammar.ts_language).log_err()?;
|
||||||
.expect("incompatible grammar");
|
parser.parse(&text, None)
|
||||||
parser.parse(&text, None).expect("invalid language")
|
});
|
||||||
});
|
|
||||||
|
|
||||||
chunk_parse_tree(tree, &text, CHUNK_THRESHOLD)
|
if let Some(tree) = tree {
|
||||||
} else {
|
with_query_cursor(|cursor| {
|
||||||
chunk_lines(&text)
|
// Retrieve a list of ranges of outline items (types, functions, etc) in the document.
|
||||||
}
|
// Omit single-line outline items (e.g. struct fields, constant declarations), because
|
||||||
}
|
// we'll already be attempting to split on lines.
|
||||||
|
syntactic_ranges = cursor
|
||||||
fn chunk_parse_tree(tree: Tree, text: &str, chunk_threshold: usize) -> Vec<Chunk> {
|
.matches(&outline.query, tree.root_node(), text.as_bytes())
|
||||||
let mut chunk_ranges = Vec::new();
|
.filter_map(|mat| {
|
||||||
let mut cursor = tree.walk();
|
mat.captures
|
||||||
|
.iter()
|
||||||
let mut range = 0..0;
|
.find_map(|QueryCapture { node, index }| {
|
||||||
loop {
|
if *index == outline.item_capture_ix {
|
||||||
let node = cursor.node();
|
if node.end_position().row > node.start_position().row {
|
||||||
|
return Some(node.byte_range());
|
||||||
// If adding the node to the current chunk exceeds the threshold
|
}
|
||||||
if node.end_byte() - range.start > chunk_threshold {
|
}
|
||||||
// Try to descend into its first child. If we can't, flush the current
|
None
|
||||||
// range and try again.
|
})
|
||||||
if cursor.goto_first_child() {
|
})
|
||||||
continue;
|
.collect::<Vec<_>>();
|
||||||
} else if !range.is_empty() {
|
syntactic_ranges
|
||||||
chunk_ranges.push(range.clone());
|
.sort_unstable_by_key(|range| (range.start, Reverse(range.end)));
|
||||||
range.start = range.end;
|
});
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we get here, the node itself has no children but is larger than the threshold.
|
|
||||||
// Break its text into arbitrary chunks.
|
|
||||||
split_text(text, range.clone(), node.end_byte(), &mut chunk_ranges);
|
|
||||||
}
|
|
||||||
range.end = node.end_byte();
|
|
||||||
|
|
||||||
// If we get here, we consumed the node. Advance to the next child, ascending if there isn't one.
|
|
||||||
while !cursor.goto_next_sibling() {
|
|
||||||
if !cursor.goto_parent() {
|
|
||||||
if !range.is_empty() {
|
|
||||||
chunk_ranges.push(range);
|
|
||||||
}
|
|
||||||
|
|
||||||
return chunk_ranges
|
|
||||||
.into_iter()
|
|
||||||
.map(|range| {
|
|
||||||
let digest = Sha256::digest(&text[range.clone()]).into();
|
|
||||||
Chunk { range, digest }
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
chunk_text_with_syntactic_ranges(text, &syntactic_ranges, size_config)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn chunk_lines(text: &str) -> Vec<Chunk> {
|
fn chunk_text_with_syntactic_ranges(
|
||||||
let mut chunk_ranges = Vec::new();
|
text: &str,
|
||||||
|
mut syntactic_ranges: &[Range<usize>],
|
||||||
|
size_config: ChunkSizeRange,
|
||||||
|
) -> Vec<Chunk> {
|
||||||
|
let mut chunks = Vec::new();
|
||||||
let mut range = 0..0;
|
let mut range = 0..0;
|
||||||
|
let mut range_end_nesting_depth = 0;
|
||||||
|
|
||||||
let mut newlines = text.match_indices('\n').peekable();
|
// Try to split the text at line boundaries.
|
||||||
while let Some((newline_ix, _)) = newlines.peek() {
|
let mut line_ixs = text
|
||||||
let newline_ix = newline_ix + 1;
|
.match_indices('\n')
|
||||||
if newline_ix - range.start <= CHUNK_THRESHOLD {
|
.map(|(ix, _)| ix + 1)
|
||||||
range.end = newline_ix;
|
.chain(if text.ends_with('\n') {
|
||||||
newlines.next();
|
None
|
||||||
} else {
|
} else {
|
||||||
|
Some(text.len())
|
||||||
|
})
|
||||||
|
.peekable();
|
||||||
|
|
||||||
|
while let Some(&line_ix) = line_ixs.peek() {
|
||||||
|
// If the current position is beyond the maximum chunk size, then
|
||||||
|
// start a new chunk.
|
||||||
|
if line_ix - range.start > size_config.max {
|
||||||
if range.is_empty() {
|
if range.is_empty() {
|
||||||
split_text(text, range, newline_ix, &mut chunk_ranges);
|
range.end = cmp::min(range.start + size_config.max, line_ix);
|
||||||
range = newline_ix..newline_ix;
|
while !text.is_char_boundary(range.end) {
|
||||||
|
range.end -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
chunks.push(Chunk {
|
||||||
|
range: range.clone(),
|
||||||
|
digest: Sha256::digest(&text[range.clone()]).into(),
|
||||||
|
});
|
||||||
|
range_end_nesting_depth = 0;
|
||||||
|
range.start = range.end;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Discard any syntactic ranges that end before the current position.
|
||||||
|
while let Some(first_item) = syntactic_ranges.first() {
|
||||||
|
if first_item.end < line_ix {
|
||||||
|
syntactic_ranges = &syntactic_ranges[1..];
|
||||||
|
continue;
|
||||||
} else {
|
} else {
|
||||||
chunk_ranges.push(range.clone());
|
break;
|
||||||
range.start = range.end;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Count how many syntactic ranges contain the current position.
|
||||||
|
let mut nesting_depth = 0;
|
||||||
|
for range in syntactic_ranges {
|
||||||
|
if range.start > line_ix {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if range.start < line_ix && range.end > line_ix {
|
||||||
|
nesting_depth += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend the current range to this position, unless an earlier candidate
|
||||||
|
// end position was less nested syntactically.
|
||||||
|
if range.len() < size_config.min || nesting_depth <= range_end_nesting_depth {
|
||||||
|
range.end = line_ix;
|
||||||
|
range_end_nesting_depth = nesting_depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
line_ixs.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
if !range.is_empty() {
|
if !range.is_empty() {
|
||||||
chunk_ranges.push(range);
|
chunks.push(Chunk {
|
||||||
}
|
range: range.clone(),
|
||||||
|
|
||||||
chunk_ranges
|
|
||||||
.into_iter()
|
|
||||||
.map(|range| Chunk {
|
|
||||||
digest: Sha256::digest(&text[range.clone()]).into(),
|
digest: Sha256::digest(&text[range.clone()]).into(),
|
||||||
range,
|
});
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn split_text(
|
|
||||||
text: &str,
|
|
||||||
mut range: Range<usize>,
|
|
||||||
max_end: usize,
|
|
||||||
chunk_ranges: &mut Vec<Range<usize>>,
|
|
||||||
) {
|
|
||||||
while range.start < max_end {
|
|
||||||
range.end = cmp::min(range.start + CHUNK_THRESHOLD, max_end);
|
|
||||||
while !text.is_char_boundary(range.end) {
|
|
||||||
range.end -= 1;
|
|
||||||
}
|
|
||||||
chunk_ranges.push(range.clone());
|
|
||||||
range.start = range.end;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
chunks
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use language::{tree_sitter_rust, Language, LanguageConfig, LanguageMatcher};
|
use language::{tree_sitter_rust, Language, LanguageConfig, LanguageMatcher};
|
||||||
|
use unindent::Unindent as _;
|
||||||
|
|
||||||
// This example comes from crates/gpui/examples/window_positioning.rs which
|
#[test]
|
||||||
// has the property of being CHUNK_THRESHOLD < TEXT.len() < 2*CHUNK_THRESHOLD
|
fn test_chunk_text_with_syntax() {
|
||||||
static TEXT: &str = r#"
|
let language = rust_language();
|
||||||
use gpui::*;
|
|
||||||
|
|
||||||
struct WindowContent {
|
let text = "
|
||||||
text: SharedString,
|
struct Person {
|
||||||
|
first_name: String,
|
||||||
|
last_name: String,
|
||||||
|
age: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Person {
|
||||||
|
fn new(first_name: String, last_name: String, age: u32) -> Self {
|
||||||
|
Self { first_name, last_name, age }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn first_name(&self) -> &str {
|
||||||
|
&self.first_name
|
||||||
|
}
|
||||||
|
|
||||||
|
fn last_name(&self) -> &str {
|
||||||
|
&self.last_name
|
||||||
|
}
|
||||||
|
|
||||||
|
fn age(&self) -> usize {
|
||||||
|
self.ages
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"
|
||||||
|
.unindent();
|
||||||
|
|
||||||
|
let chunks = chunk_text_with_size_range(
|
||||||
|
&text,
|
||||||
|
language.grammar(),
|
||||||
|
ChunkSizeRange {
|
||||||
|
min: text.find('}').unwrap(),
|
||||||
|
max: text.find("Self {").unwrap(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// The entire impl cannot fit in a chunk, so it is split.
|
||||||
|
// Within the impl, two methods can fit in a chunk.
|
||||||
|
assert_chunks(
|
||||||
|
&text,
|
||||||
|
&chunks,
|
||||||
|
&[
|
||||||
|
"struct Person {", // ...
|
||||||
|
"impl Person {",
|
||||||
|
" fn first_name",
|
||||||
|
" fn age",
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
let text = "
|
||||||
|
struct T {}
|
||||||
|
struct U {}
|
||||||
|
struct V {}
|
||||||
|
struct W {
|
||||||
|
a: T,
|
||||||
|
b: U,
|
||||||
|
}
|
||||||
|
"
|
||||||
|
.unindent();
|
||||||
|
|
||||||
|
let chunks = chunk_text_with_size_range(
|
||||||
|
&text,
|
||||||
|
language.grammar(),
|
||||||
|
ChunkSizeRange {
|
||||||
|
min: text.find('{').unwrap(),
|
||||||
|
max: text.find('V').unwrap(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Two single-line structs can fit in a chunk.
|
||||||
|
// The last struct cannot fit in a chunk together
|
||||||
|
// with the previous single-line struct.
|
||||||
|
assert_chunks(
|
||||||
|
&text,
|
||||||
|
&chunks,
|
||||||
|
&[
|
||||||
|
"struct T", // ...
|
||||||
|
"struct V", // ...
|
||||||
|
"struct W", // ...
|
||||||
|
"}",
|
||||||
|
],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Render for WindowContent {
|
#[test]
|
||||||
fn render(&mut self, _cx: &mut ViewContext<Self>) -> impl IntoElement {
|
fn test_chunk_with_long_lines() {
|
||||||
div()
|
let language = rust_language();
|
||||||
.flex()
|
|
||||||
.bg(rgb(0x1e2025))
|
let text = "
|
||||||
.size_full()
|
struct S { a: u32 }
|
||||||
.justify_center()
|
struct T { a: u64 }
|
||||||
.items_center()
|
struct U { a: u64, b: u64, c: u64, d: u64, e: u64, f: u64, g: u64, h: u64, i: u64, j: u64 }
|
||||||
.text_xl()
|
struct W { a: u64, b: u64, c: u64, d: u64, e: u64, f: u64, g: u64, h: u64, i: u64, j: u64 }
|
||||||
.text_color(rgb(0xffffff))
|
"
|
||||||
.child(self.text.clone())
|
.unindent();
|
||||||
|
|
||||||
|
let chunks = chunk_text_with_size_range(
|
||||||
|
&text,
|
||||||
|
language.grammar(),
|
||||||
|
ChunkSizeRange { min: 32, max: 64 },
|
||||||
|
);
|
||||||
|
|
||||||
|
// The line is too long to fit in one chunk
|
||||||
|
assert_chunks(
|
||||||
|
&text,
|
||||||
|
&chunks,
|
||||||
|
&[
|
||||||
|
"struct S {", // ...
|
||||||
|
"struct U",
|
||||||
|
"4, h: u64, i: u64", // ...
|
||||||
|
"struct W",
|
||||||
|
"4, h: u64, i: u64", // ...
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[track_caller]
|
||||||
|
fn assert_chunks(text: &str, chunks: &[Chunk], expected_chunk_text_prefixes: &[&str]) {
|
||||||
|
check_chunk_invariants(text, chunks);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
chunks.len(),
|
||||||
|
expected_chunk_text_prefixes.len(),
|
||||||
|
"unexpected number of chunks: {chunks:?}",
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut prev_chunk_end = 0;
|
||||||
|
for (ix, chunk) in chunks.iter().enumerate() {
|
||||||
|
let expected_prefix = expected_chunk_text_prefixes[ix];
|
||||||
|
let chunk_text = &text[chunk.range.clone()];
|
||||||
|
if !chunk_text.starts_with(expected_prefix) {
|
||||||
|
let chunk_prefix_offset = text[prev_chunk_end..].find(expected_prefix);
|
||||||
|
if let Some(chunk_prefix_offset) = chunk_prefix_offset {
|
||||||
|
panic!(
|
||||||
|
"chunk {ix} starts at unexpected offset {}. expected {}",
|
||||||
|
chunk.range.start,
|
||||||
|
chunk_prefix_offset + prev_chunk_end
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
panic!("invalid expected chunk prefix {ix}: {expected_prefix:?}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prev_chunk_end = chunk.range.end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
#[track_caller]
|
||||||
App::new().run(|cx: &mut AppContext| {
|
fn check_chunk_invariants(text: &str, chunks: &[Chunk]) {
|
||||||
// Create several new windows, positioned in the top right corner of each screen
|
for (ix, chunk) in chunks.iter().enumerate() {
|
||||||
|
if ix > 0 && chunk.range.start != chunks[ix - 1].range.end {
|
||||||
for screen in cx.displays() {
|
panic!("chunk ranges are not contiguous: {:?}", chunks);
|
||||||
let options = {
|
|
||||||
let popup_margin_width = DevicePixels::from(16);
|
|
||||||
let popup_margin_height = DevicePixels::from(-0) - DevicePixels::from(48);
|
|
||||||
|
|
||||||
let window_size = Size {
|
|
||||||
width: px(400.),
|
|
||||||
height: px(72.),
|
|
||||||
};
|
|
||||||
|
|
||||||
let screen_bounds = screen.bounds();
|
|
||||||
let size: Size<DevicePixels> = window_size.into();
|
|
||||||
|
|
||||||
let bounds = gpui::Bounds::<DevicePixels> {
|
|
||||||
origin: screen_bounds.upper_right()
|
|
||||||
- point(size.width + popup_margin_width, popup_margin_height),
|
|
||||||
size: window_size.into(),
|
|
||||||
};
|
|
||||||
|
|
||||||
WindowOptions {
|
|
||||||
// Set the bounds of the window in screen coordinates
|
|
||||||
bounds: Some(bounds),
|
|
||||||
// Specify the display_id to ensure the window is created on the correct screen
|
|
||||||
display_id: Some(screen.id()),
|
|
||||||
|
|
||||||
titlebar: None,
|
|
||||||
window_background: WindowBackgroundAppearance::default(),
|
|
||||||
focus: false,
|
|
||||||
show: true,
|
|
||||||
kind: WindowKind::PopUp,
|
|
||||||
is_movable: false,
|
|
||||||
fullscreen: false,
|
|
||||||
app_id: None,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
cx.open_window(options, |cx| {
|
|
||||||
cx.new_view(|_| WindowContent {
|
|
||||||
text: format!("{:?}", screen.id()).into(),
|
|
||||||
})
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
}"#;
|
|
||||||
|
|
||||||
fn setup_rust_language() -> Language {
|
if text.is_empty() {
|
||||||
|
assert!(chunks.is_empty())
|
||||||
|
} else if chunks.first().unwrap().range.start != 0
|
||||||
|
|| chunks.last().unwrap().range.end != text.len()
|
||||||
|
{
|
||||||
|
panic!("chunks don't cover entire text {:?}", chunks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chunk_text() {
|
||||||
|
let text = "a\n".repeat(1000);
|
||||||
|
let chunks = chunk_text(&text, None);
|
||||||
|
assert_eq!(
|
||||||
|
chunks.len(),
|
||||||
|
((2000_f64) / (CHUNK_SIZE_RANGE.max as f64)).ceil() as usize
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rust_language() -> Language {
|
||||||
Language::new(
|
Language::new(
|
||||||
LanguageConfig {
|
LanguageConfig {
|
||||||
name: "Rust".into(),
|
name: "Rust".into(),
|
||||||
@ -210,198 +350,14 @@ mod tests {
|
|||||||
},
|
},
|
||||||
Some(tree_sitter_rust::language()),
|
Some(tree_sitter_rust::language()),
|
||||||
)
|
)
|
||||||
}
|
.with_outline_query(
|
||||||
|
"
|
||||||
#[test]
|
(function_item name: (_) @name) @item
|
||||||
fn test_chunk_text() {
|
(impl_item type: (_) @name) @item
|
||||||
let text = "a\n".repeat(1000);
|
(struct_item name: (_) @name) @item
|
||||||
let chunks = chunk_text(&text, None);
|
(field_declaration name: (_) @name) @item
|
||||||
assert_eq!(
|
",
|
||||||
chunks.len(),
|
)
|
||||||
((2000_f64) / (CHUNK_THRESHOLD as f64)).ceil() as usize
|
.unwrap()
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_chunk_text_grammar() {
|
|
||||||
// Let's set up a big text with some known segments
|
|
||||||
// We'll then chunk it and verify that the chunks are correct
|
|
||||||
|
|
||||||
let language = setup_rust_language();
|
|
||||||
|
|
||||||
let chunks = chunk_text(TEXT, language.grammar());
|
|
||||||
assert_eq!(chunks.len(), 2);
|
|
||||||
|
|
||||||
assert_eq!(chunks[0].range.start, 0);
|
|
||||||
assert_eq!(chunks[0].range.end, 1498);
|
|
||||||
// The break between chunks is right before the "Specify the display_id" comment
|
|
||||||
|
|
||||||
assert_eq!(chunks[1].range.start, 1498);
|
|
||||||
assert_eq!(chunks[1].range.end, 2434);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_chunk_parse_tree() {
|
|
||||||
let language = setup_rust_language();
|
|
||||||
let grammar = language.grammar().unwrap();
|
|
||||||
|
|
||||||
let tree = with_parser(|parser| {
|
|
||||||
parser
|
|
||||||
.set_language(&grammar.ts_language)
|
|
||||||
.expect("incompatible grammar");
|
|
||||||
parser.parse(TEXT, None).expect("invalid language")
|
|
||||||
});
|
|
||||||
|
|
||||||
let chunks = chunk_parse_tree(tree, TEXT, 250);
|
|
||||||
assert_eq!(chunks.len(), 11);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_chunk_unparsable() {
|
|
||||||
// Even if a chunk is unparsable, we should still be able to chunk it
|
|
||||||
let language = setup_rust_language();
|
|
||||||
let grammar = language.grammar().unwrap();
|
|
||||||
|
|
||||||
let text = r#"fn main() {"#;
|
|
||||||
let tree = with_parser(|parser| {
|
|
||||||
parser
|
|
||||||
.set_language(&grammar.ts_language)
|
|
||||||
.expect("incompatible grammar");
|
|
||||||
parser.parse(text, None).expect("invalid language")
|
|
||||||
});
|
|
||||||
|
|
||||||
let chunks = chunk_parse_tree(tree, text, 250);
|
|
||||||
assert_eq!(chunks.len(), 1);
|
|
||||||
|
|
||||||
assert_eq!(chunks[0].range.start, 0);
|
|
||||||
assert_eq!(chunks[0].range.end, 11);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_empty_text() {
|
|
||||||
let language = setup_rust_language();
|
|
||||||
let grammar = language.grammar().unwrap();
|
|
||||||
|
|
||||||
let tree = with_parser(|parser| {
|
|
||||||
parser
|
|
||||||
.set_language(&grammar.ts_language)
|
|
||||||
.expect("incompatible grammar");
|
|
||||||
parser.parse("", None).expect("invalid language")
|
|
||||||
});
|
|
||||||
|
|
||||||
let chunks = chunk_parse_tree(tree, "", CHUNK_THRESHOLD);
|
|
||||||
assert!(chunks.is_empty(), "Chunks should be empty for empty text");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_single_large_node() {
|
|
||||||
let large_text = "static ".to_owned() + "a".repeat(CHUNK_THRESHOLD - 1).as_str() + " = 2";
|
|
||||||
|
|
||||||
let language = setup_rust_language();
|
|
||||||
let grammar = language.grammar().unwrap();
|
|
||||||
|
|
||||||
let tree = with_parser(|parser| {
|
|
||||||
parser
|
|
||||||
.set_language(&grammar.ts_language)
|
|
||||||
.expect("incompatible grammar");
|
|
||||||
parser.parse(&large_text, None).expect("invalid language")
|
|
||||||
});
|
|
||||||
|
|
||||||
let chunks = chunk_parse_tree(tree, &large_text, CHUNK_THRESHOLD);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
chunks.len(),
|
|
||||||
3,
|
|
||||||
"Large chunks are broken up according to grammar as best as possible"
|
|
||||||
);
|
|
||||||
|
|
||||||
// Expect chunks to be static, aaaaaa..., and = 2
|
|
||||||
assert_eq!(chunks[0].range.start, 0);
|
|
||||||
assert_eq!(chunks[0].range.end, "static".len());
|
|
||||||
|
|
||||||
assert_eq!(chunks[1].range.start, "static".len());
|
|
||||||
assert_eq!(chunks[1].range.end, "static".len() + CHUNK_THRESHOLD);
|
|
||||||
|
|
||||||
assert_eq!(chunks[2].range.start, "static".len() + CHUNK_THRESHOLD);
|
|
||||||
assert_eq!(chunks[2].range.end, large_text.len());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_multiple_small_nodes() {
|
|
||||||
let small_text = "a b c d e f g h i j k l m n o p q r s t u v w x y z";
|
|
||||||
let language = setup_rust_language();
|
|
||||||
let grammar = language.grammar().unwrap();
|
|
||||||
|
|
||||||
let tree = with_parser(|parser| {
|
|
||||||
parser
|
|
||||||
.set_language(&grammar.ts_language)
|
|
||||||
.expect("incompatible grammar");
|
|
||||||
parser.parse(small_text, None).expect("invalid language")
|
|
||||||
});
|
|
||||||
|
|
||||||
let chunks = chunk_parse_tree(tree, small_text, 5);
|
|
||||||
assert!(
|
|
||||||
chunks.len() > 1,
|
|
||||||
"Should have multiple chunks for multiple small nodes"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_node_with_children() {
|
|
||||||
let nested_text = "fn main() { let a = 1; let b = 2; }";
|
|
||||||
let language = setup_rust_language();
|
|
||||||
let grammar = language.grammar().unwrap();
|
|
||||||
|
|
||||||
let tree = with_parser(|parser| {
|
|
||||||
parser
|
|
||||||
.set_language(&grammar.ts_language)
|
|
||||||
.expect("incompatible grammar");
|
|
||||||
parser.parse(nested_text, None).expect("invalid language")
|
|
||||||
});
|
|
||||||
|
|
||||||
let chunks = chunk_parse_tree(tree, nested_text, 10);
|
|
||||||
assert!(
|
|
||||||
chunks.len() > 1,
|
|
||||||
"Should have multiple chunks for a node with children"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_text_with_unparsable_sections() {
|
|
||||||
// This test uses purposefully hit-or-miss sizing of 11 characters per likely chunk
|
|
||||||
let mixed_text = "fn main() { let a = 1; let b = 2; } unparsable bits here";
|
|
||||||
let language = setup_rust_language();
|
|
||||||
let grammar = language.grammar().unwrap();
|
|
||||||
|
|
||||||
let tree = with_parser(|parser| {
|
|
||||||
parser
|
|
||||||
.set_language(&grammar.ts_language)
|
|
||||||
.expect("incompatible grammar");
|
|
||||||
parser.parse(mixed_text, None).expect("invalid language")
|
|
||||||
});
|
|
||||||
|
|
||||||
let chunks = chunk_parse_tree(tree, mixed_text, 11);
|
|
||||||
assert!(
|
|
||||||
chunks.len() > 1,
|
|
||||||
"Should handle both parsable and unparsable sections correctly"
|
|
||||||
);
|
|
||||||
|
|
||||||
let expected_chunks = [
|
|
||||||
"fn main() {",
|
|
||||||
" let a = 1;",
|
|
||||||
" let b = 2;",
|
|
||||||
" }",
|
|
||||||
" unparsable",
|
|
||||||
" bits here",
|
|
||||||
];
|
|
||||||
|
|
||||||
for (i, chunk) in chunks.iter().enumerate() {
|
|
||||||
assert_eq!(
|
|
||||||
&mixed_text[chunk.range.clone()],
|
|
||||||
expected_chunks[i],
|
|
||||||
"Chunk {} should match",
|
|
||||||
i
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user