Rename Document to Span

2024-09-19 02:17:35 +03:00 · 2023-09-06 16:48:53 +02:00 · 2023-09-06 16:48:53 +02:00 · ce62173534
commit ce62173534
parent de0f53b39f
5 changed files with 109 additions and 112 deletions
--- a/crates/semantic_index/src/db.rs
+++ b/crates/semantic_index/src/db.rs
@ -1,6 +1,6 @@
 use crate::{
    embedding::Embedding,
-    parsing::{Document, DocumentDigest},
+    parsing::{Span, SpanDigest},
    SEMANTIC_INDEX_VERSION,
 };
 use anyhow::{anyhow, Context, Result};
@ -124,8 +124,8 @@ impl VectorDatabase {
            }

            log::trace!("vector database schema out of date. updating...");
-            db.execute("DROP TABLE IF EXISTS documents", [])
-                .context("failed to drop 'documents' table")?;
+            db.execute("DROP TABLE IF EXISTS spans", [])
+                .context("failed to drop 'spans' table")?;
            db.execute("DROP TABLE IF EXISTS files", [])
                .context("failed to drop 'files' table")?;
            db.execute("DROP TABLE IF EXISTS worktrees", [])
@ -174,7 +174,7 @@ impl VectorDatabase {
            )?;

            db.execute(
-                "CREATE TABLE documents (
+                "CREATE TABLE spans (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    file_id INTEGER NOT NULL,
                    start_byte INTEGER NOT NULL,
@ -211,7 +211,7 @@ impl VectorDatabase {
        worktree_id: i64,
        path: Arc<Path>,
        mtime: SystemTime,
-        documents: Vec<Document>,
+        spans: Vec<Span>,
    ) -> impl Future<Output = Result<()>> {
        self.transact(move |db| {
            // Return the existing ID, if both the file and mtime match
@ -231,7 +231,7 @@ impl VectorDatabase {
            let t0 = Instant::now();
            let mut query = db.prepare(
                "
-                INSERT INTO documents
+                INSERT INTO spans
                (file_id, start_byte, end_byte, name, embedding, digest)
                VALUES (?1, ?2, ?3, ?4, ?5, ?6)
                ",
@ -241,14 +241,14 @@ impl VectorDatabase {
                t0.elapsed().as_millis()
            );

-            for document in documents {
+            for span in spans {
                query.execute(params![
                    file_id,
-                    document.range.start.to_string(),
-                    document.range.end.to_string(),
-                    document.name,
-                    document.embedding,
-                    document.digest
+                    span.range.start.to_string(),
+                    span.range.end.to_string(),
+                    span.name,
+                    span.embedding,
+                    span.digest
                ])?;
            }

@ -278,13 +278,13 @@ impl VectorDatabase {
    pub fn embeddings_for_files(
        &self,
        worktree_id_file_paths: HashMap<i64, Vec<Arc<Path>>>,
-    ) -> impl Future<Output = Result<HashMap<DocumentDigest, Embedding>>> {
+    ) -> impl Future<Output = Result<HashMap<SpanDigest, Embedding>>> {
        self.transact(move |db| {
            let mut query = db.prepare(
                "
                SELECT digest, embedding
-                FROM documents
-                LEFT JOIN files ON files.id = documents.file_id
+                FROM spans
+                LEFT JOIN files ON files.id = spans.file_id
                WHERE files.worktree_id = ? AND files.relative_path IN rarray(?)
            ",
            )?;
@ -297,10 +297,7 @@ impl VectorDatabase {
                        .collect::<Vec<_>>(),
                );
                let rows = query.query_map(params![worktree_id, file_paths], |row| {
-                    Ok((
-                        row.get::<_, DocumentDigest>(0)?,
-                        row.get::<_, Embedding>(1)?,
-                    ))
+                    Ok((row.get::<_, SpanDigest>(0)?, row.get::<_, Embedding>(1)?))
                })?;

                for row in rows {
@ -379,7 +376,7 @@ impl VectorDatabase {
        let file_ids = file_ids.to_vec();
        self.transact(move |db| {
            let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
-            Self::for_each_document(db, &file_ids, |id, embedding| {
+            Self::for_each_span(db, &file_ids, |id, embedding| {
                let similarity = embedding.similarity(&query_embedding);
                let ix = match results.binary_search_by(|(_, s)| {
                    similarity.partial_cmp(&s).unwrap_or(Ordering::Equal)
@ -434,7 +431,7 @@ impl VectorDatabase {
        })
    }

-    fn for_each_document(
+    fn for_each_span(
        db: &rusqlite::Connection,
        file_ids: &[i64],
        mut f: impl FnMut(i64, Embedding),
@ -444,7 +441,7 @@ impl VectorDatabase {
            SELECT
                id, embedding
            FROM
-                documents
+                spans
            WHERE
                file_id IN rarray(?)
            ",
@ -459,7 +456,7 @@ impl VectorDatabase {
        Ok(())
    }

-    pub fn get_documents_by_ids(
+    pub fn spans_for_ids(
        &self,
        ids: &[i64],
    ) -> impl Future<Output = Result<Vec<(i64, PathBuf, Range<usize>)>>> {
@ -468,16 +465,16 @@ impl VectorDatabase {
            let mut statement = db.prepare(
                "
                    SELECT
-                        documents.id,
+                        spans.id,
                        files.worktree_id,
                        files.relative_path,
-                        documents.start_byte,
-                        documents.end_byte
+                        spans.start_byte,
+                        spans.end_byte
                    FROM
-                        documents, files
+                        spans, files
                    WHERE
-                        documents.file_id = files.id AND
-                        documents.id in rarray(?)
+                        spans.file_id = files.id AND
+                        spans.id in rarray(?)
                ",
            )?;

@ -500,7 +497,7 @@ impl VectorDatabase {
            for id in &ids {
                let value = values_by_id
                    .remove(id)
-                    .ok_or(anyhow!("missing document id {}", id))?;
+                    .ok_or(anyhow!("missing span id {}", id))?;
                results.push(value);
            }

--- a/crates/semantic_index/src/embedding_queue.rs
+++ b/crates/semantic_index/src/embedding_queue.rs
@ -1,4 +1,4 @@
-use crate::{embedding::EmbeddingProvider, parsing::Document, JobHandle};
+use crate::{embedding::EmbeddingProvider, parsing::Span, JobHandle};
 use gpui::executor::Background;
 use parking_lot::Mutex;
 use smol::channel;
@ -9,7 +9,7 @@ pub struct FileToEmbed {
    pub worktree_id: i64,
    pub path: Arc<Path>,
    pub mtime: SystemTime,
-    pub documents: Vec<Document>,
+    pub spans: Vec<Span>,
    pub job_handle: JobHandle,
 }

@ -19,7 +19,7 @@ impl std::fmt::Debug for FileToEmbed {
            .field("worktree_id", &self.worktree_id)
            .field("path", &self.path)
            .field("mtime", &self.mtime)
-            .field("document", &self.documents)
+            .field("spans", &self.spans)
            .finish_non_exhaustive()
    }
 }
@ -29,13 +29,13 @@ impl PartialEq for FileToEmbed {
        self.worktree_id == other.worktree_id
            && self.path == other.path
            && self.mtime == other.mtime
-            && self.documents == other.documents
+            && self.spans == other.spans
    }
 }

 pub struct EmbeddingQueue {
    embedding_provider: Arc<dyn EmbeddingProvider>,
-    pending_batch: Vec<FileToEmbedFragment>,
+    pending_batch: Vec<FileFragmentToEmbed>,
    executor: Arc<Background>,
    pending_batch_token_count: usize,
    finished_files_tx: channel::Sender<FileToEmbed>,
@ -43,9 +43,9 @@ pub struct EmbeddingQueue {
 }

 #[derive(Clone)]
-pub struct FileToEmbedFragment {
+pub struct FileFragmentToEmbed {
    file: Arc<Mutex<FileToEmbed>>,
-    document_range: Range<usize>,
+    span_range: Range<usize>,
 }

 impl EmbeddingQueue {
@ -62,41 +62,41 @@ impl EmbeddingQueue {
    }

    pub fn push(&mut self, file: FileToEmbed) {
-        if file.documents.is_empty() {
+        if file.spans.is_empty() {
            self.finished_files_tx.try_send(file).unwrap();
            return;
        }

        let file = Arc::new(Mutex::new(file));

-        self.pending_batch.push(FileToEmbedFragment {
+        self.pending_batch.push(FileFragmentToEmbed {
            file: file.clone(),
-            document_range: 0..0,
+            span_range: 0..0,
        });

-        let mut fragment_range = &mut self.pending_batch.last_mut().unwrap().document_range;
+        let mut fragment_range = &mut self.pending_batch.last_mut().unwrap().span_range;
        let mut saved_tokens = 0;
-        for (ix, document) in file.lock().documents.iter().enumerate() {
-            let document_token_count = if document.embedding.is_none() {
-                document.token_count
+        for (ix, span) in file.lock().spans.iter().enumerate() {
+            let span_token_count = if span.embedding.is_none() {
+                span.token_count
            } else {
-                saved_tokens += document.token_count;
+                saved_tokens += span.token_count;
                0
            };

-            let next_token_count = self.pending_batch_token_count + document_token_count;
+            let next_token_count = self.pending_batch_token_count + span_token_count;
            if next_token_count > self.embedding_provider.max_tokens_per_batch() {
                let range_end = fragment_range.end;
                self.flush();
-                self.pending_batch.push(FileToEmbedFragment {
+                self.pending_batch.push(FileFragmentToEmbed {
                    file: file.clone(),
-                    document_range: range_end..range_end,
+                    span_range: range_end..range_end,
                });
-                fragment_range = &mut self.pending_batch.last_mut().unwrap().document_range;
+                fragment_range = &mut self.pending_batch.last_mut().unwrap().span_range;
            }

            fragment_range.end = ix + 1;
-            self.pending_batch_token_count += document_token_count;
+            self.pending_batch_token_count += span_token_count;
        }
        log::trace!("Saved Tokens: {:?}", saved_tokens);
    }
@ -113,20 +113,20 @@ impl EmbeddingQueue {

        self.executor.spawn(async move {
            let mut spans = Vec::new();
-            let mut document_count = 0;
+            let mut span_count = 0;
            for fragment in &batch {
                let file = fragment.file.lock();
-                document_count += file.documents[fragment.document_range.clone()].len();
+                span_count += file.spans[fragment.span_range.clone()].len();
                spans.extend(
                    {
-                        file.documents[fragment.document_range.clone()]
+                        file.spans[fragment.span_range.clone()]
                            .iter().filter(|d| d.embedding.is_none())
                            .map(|d| d.content.clone())
                        }
                );
            }

-            log::trace!("Documents Length: {:?}", document_count);
+            log::trace!("Documents Length: {:?}", span_count);
            log::trace!("Span Length: {:?}", spans.clone().len());

            // If spans is 0, just send the fragment to the finished files if its the last one.
@ -143,11 +143,11 @@ impl EmbeddingQueue {
                Ok(embeddings) => {
                    let mut embeddings = embeddings.into_iter();
                    for fragment in batch {
-                        for document in
-                            &mut fragment.file.lock().documents[fragment.document_range.clone()].iter_mut().filter(|d| d.embedding.is_none())
+                        for span in
+                            &mut fragment.file.lock().spans[fragment.span_range.clone()].iter_mut().filter(|d| d.embedding.is_none())
                        {
                            if let Some(embedding) = embeddings.next() {
-                                document.embedding = Some(embedding);
+                                span.embedding = Some(embedding);
                            } else {
                                //
                                log::error!("number of embeddings returned different from number of documents");
--- a/crates/semantic_index/src/parsing.rs
+++ b/crates/semantic_index/src/parsing.rs
@ -16,9 +16,9 @@ use std::{
 use tree_sitter::{Parser, QueryCursor};

 #[derive(Debug, PartialEq, Eq, Clone, Hash)]
-pub struct DocumentDigest([u8; 20]);
+pub struct SpanDigest([u8; 20]);

-impl FromSql for DocumentDigest {
+impl FromSql for SpanDigest {
    fn column_result(value: ValueRef) -> FromSqlResult<Self> {
        let blob = value.as_blob()?;
        let bytes =
@ -27,17 +27,17 @@ impl FromSql for DocumentDigest {
                    expected_size: 20,
                    blob_size: blob.len(),
                })?;
-        return Ok(DocumentDigest(bytes));
+        return Ok(SpanDigest(bytes));
    }
 }

-impl ToSql for DocumentDigest {
+impl ToSql for SpanDigest {
    fn to_sql(&self) -> rusqlite::Result<ToSqlOutput> {
        self.0.to_sql()
    }
 }

-impl From<&'_ str> for DocumentDigest {
+impl From<&'_ str> for SpanDigest {
    fn from(value: &'_ str) -> Self {
        let mut sha1 = Sha1::new();
        sha1.update(value);
@ -46,12 +46,12 @@ impl From<&'_ str> for DocumentDigest {
 }

 #[derive(Debug, PartialEq, Clone)]
-pub struct Document {
+pub struct Span {
    pub name: String,
    pub range: Range<usize>,
    pub content: String,
    pub embedding: Option<Embedding>,
-    pub digest: DocumentDigest,
+    pub digest: SpanDigest,
    pub token_count: usize,
 }

@ -97,14 +97,14 @@ impl CodeContextRetriever {
        relative_path: &Path,
        language_name: Arc<str>,
        content: &str,
-    ) -> Result<Vec<Document>> {
+    ) -> Result<Vec<Span>> {
        let document_span = ENTIRE_FILE_TEMPLATE
            .replace("<path>", relative_path.to_string_lossy().as_ref())
            .replace("<language>", language_name.as_ref())
            .replace("<item>", &content);
-        let digest = DocumentDigest::from(document_span.as_str());
+        let digest = SpanDigest::from(document_span.as_str());
        let (document_span, token_count) = self.embedding_provider.truncate(&document_span);
-        Ok(vec![Document {
+        Ok(vec![Span {
            range: 0..content.len(),
            content: document_span,
            embedding: Default::default(),
@ -114,13 +114,13 @@ impl CodeContextRetriever {
        }])
    }

-    fn parse_markdown_file(&self, relative_path: &Path, content: &str) -> Result<Vec<Document>> {
+    fn parse_markdown_file(&self, relative_path: &Path, content: &str) -> Result<Vec<Span>> {
        let document_span = MARKDOWN_CONTEXT_TEMPLATE
            .replace("<path>", relative_path.to_string_lossy().as_ref())
            .replace("<item>", &content);
-        let digest = DocumentDigest::from(document_span.as_str());
+        let digest = SpanDigest::from(document_span.as_str());
        let (document_span, token_count) = self.embedding_provider.truncate(&document_span);
-        Ok(vec![Document {
+        Ok(vec![Span {
            range: 0..content.len(),
            content: document_span,
            embedding: None,
@ -191,32 +191,32 @@ impl CodeContextRetriever {
        relative_path: &Path,
        content: &str,
        language: Arc<Language>,
-    ) -> Result<Vec<Document>> {
+    ) -> Result<Vec<Span>> {
        let language_name = language.name();

        if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language_name.as_ref()) {
            return self.parse_entire_file(relative_path, language_name, &content);
-        } else if &language_name.to_string() == &"Markdown".to_string() {
+        } else if language_name.as_ref() == "Markdown" {
            return self.parse_markdown_file(relative_path, &content);
        }

-        let mut documents = self.parse_file(content, language)?;
-        for document in &mut documents {
+        let mut spans = self.parse_file(content, language)?;
+        for span in &mut spans {
            let document_content = CODE_CONTEXT_TEMPLATE
                .replace("<path>", relative_path.to_string_lossy().as_ref())
                .replace("<language>", language_name.as_ref())
-                .replace("item", &document.content);
+                .replace("item", &span.content);

            let (document_content, token_count) =
                self.embedding_provider.truncate(&document_content);

-            document.content = document_content;
-            document.token_count = token_count;
+            span.content = document_content;
+            span.token_count = token_count;
        }
-        Ok(documents)
+        Ok(spans)
    }

-    pub fn parse_file(&mut self, content: &str, language: Arc<Language>) -> Result<Vec<Document>> {
+    pub fn parse_file(&mut self, content: &str, language: Arc<Language>) -> Result<Vec<Span>> {
        let grammar = language
            .grammar()
            .ok_or_else(|| anyhow!("no grammar for language"))?;
@ -227,7 +227,7 @@ impl CodeContextRetriever {
        let language_scope = language.default_scope();
        let placeholder = language_scope.collapsed_placeholder();

-        let mut documents = Vec::new();
+        let mut spans = Vec::new();
        let mut collapsed_ranges_within = Vec::new();
        let mut parsed_name_ranges = HashSet::new();
        for (i, context_match) in matches.iter().enumerate() {
@ -267,22 +267,22 @@ impl CodeContextRetriever {

            collapsed_ranges_within.sort_by_key(|r| (r.start, Reverse(r.end)));

-            let mut document_content = String::new();
+            let mut span_content = String::new();
            for context_range in &context_match.context_ranges {
                add_content_from_range(
-                    &mut document_content,
+                    &mut span_content,
                    content,
                    context_range.clone(),
                    context_match.start_col,
                );
-                document_content.push_str("\n");
+                span_content.push_str("\n");
            }

            let mut offset = item_range.start;
            for collapsed_range in &collapsed_ranges_within {
                if collapsed_range.start > offset {
                    add_content_from_range(
-                        &mut document_content,
+                        &mut span_content,
                        content,
                        offset..collapsed_range.start,
                        context_match.start_col,
@ -291,24 +291,24 @@ impl CodeContextRetriever {
                }

                if collapsed_range.end > offset {
-                    document_content.push_str(placeholder);
+                    span_content.push_str(placeholder);
                    offset = collapsed_range.end;
                }
            }

            if offset < item_range.end {
                add_content_from_range(
-                    &mut document_content,
+                    &mut span_content,
                    content,
                    offset..item_range.end,
                    context_match.start_col,
                );
            }

-            let sha1 = DocumentDigest::from(document_content.as_str());
-            documents.push(Document {
+            let sha1 = SpanDigest::from(span_content.as_str());
+            spans.push(Span {
                name,
-                content: document_content,
+                content: span_content,
                range: item_range.clone(),
                embedding: None,
                digest: sha1,
@ -316,7 +316,7 @@ impl CodeContextRetriever {
            })
        }

-        return Ok(documents);
+        return Ok(spans);
    }
 }

--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@ -17,7 +17,7 @@ use futures::{future, FutureExt, StreamExt};
 use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle};
 use language::{Anchor, Bias, Buffer, Language, LanguageRegistry};
 use parking_lot::Mutex;
-use parsing::{CodeContextRetriever, DocumentDigest, PARSEABLE_ENTIRE_FILE_TYPES};
+use parsing::{CodeContextRetriever, SpanDigest, PARSEABLE_ENTIRE_FILE_TYPES};
 use postage::watch;
 use project::{search::PathMatcher, Fs, PathChange, Project, ProjectEntryId, Worktree, WorktreeId};
 use smol::channel;
@ -36,7 +36,7 @@ use util::{
    ResultExt,
 };

-const SEMANTIC_INDEX_VERSION: usize = 9;
+const SEMANTIC_INDEX_VERSION: usize = 10;
 const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(5 * 60);
 const EMBEDDING_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_millis(250);

@ -84,7 +84,7 @@ pub struct SemanticIndex {
    db: VectorDatabase,
    embedding_provider: Arc<dyn EmbeddingProvider>,
    language_registry: Arc<LanguageRegistry>,
-    parsing_files_tx: channel::Sender<(Arc<HashMap<DocumentDigest, Embedding>>, PendingFile)>,
+    parsing_files_tx: channel::Sender<(Arc<HashMap<SpanDigest, Embedding>>, PendingFile)>,
    _embedding_task: Task<()>,
    _parsing_files_tasks: Vec<Task<()>>,
    projects: HashMap<WeakModelHandle<Project>, ProjectState>,
@ -252,16 +252,16 @@ impl SemanticIndex {
                let db = db.clone();
                async move {
                    while let Ok(file) = embedded_files.recv().await {
-                        db.insert_file(file.worktree_id, file.path, file.mtime, file.documents)
+                        db.insert_file(file.worktree_id, file.path, file.mtime, file.spans)
                            .await
                            .log_err();
                    }
                }
            });

-            // Parse files into embeddable documents.
+            // Parse files into embeddable spans.
            let (parsing_files_tx, parsing_files_rx) =
-                channel::unbounded::<(Arc<HashMap<DocumentDigest, Embedding>>, PendingFile)>();
+                channel::unbounded::<(Arc<HashMap<SpanDigest, Embedding>>, PendingFile)>();
            let embedding_queue = Arc::new(Mutex::new(embedding_queue));
            let mut _parsing_files_tasks = Vec::new();
            for _ in 0..cx.background().num_cpus() {
@ -320,26 +320,26 @@ impl SemanticIndex {
        pending_file: PendingFile,
        retriever: &mut CodeContextRetriever,
        embedding_queue: &Arc<Mutex<EmbeddingQueue>>,
-        embeddings_for_digest: &HashMap<DocumentDigest, Embedding>,
+        embeddings_for_digest: &HashMap<SpanDigest, Embedding>,
    ) {
        let Some(language) = pending_file.language else {
            return;
        };

        if let Some(content) = fs.load(&pending_file.absolute_path).await.log_err() {
-            if let Some(mut documents) = retriever
+            if let Some(mut spans) = retriever
                .parse_file_with_template(&pending_file.relative_path, &content, language)
                .log_err()
            {
                log::trace!(
-                    "parsed path {:?}: {} documents",
+                    "parsed path {:?}: {} spans",
                    pending_file.relative_path,
-                    documents.len()
+                    spans.len()
                );

-                for document in documents.iter_mut() {
-                    if let Some(embedding) = embeddings_for_digest.get(&document.digest) {
-                        document.embedding = Some(embedding.to_owned());
+                for span in &mut spans {
+                    if let Some(embedding) = embeddings_for_digest.get(&span.digest) {
+                        span.embedding = Some(embedding.to_owned());
                    }
                }

@ -348,7 +348,7 @@ impl SemanticIndex {
                    path: pending_file.relative_path,
                    mtime: pending_file.modified_time,
                    job_handle: pending_file.job_handle,
-                    documents,
+                    spans: spans,
                });
            }
        }
@ -708,13 +708,13 @@ impl SemanticIndex {
            }

            let ids = results.into_iter().map(|(id, _)| id).collect::<Vec<i64>>();
-            let documents = database.get_documents_by_ids(ids.as_slice()).await?;
+            let spans = database.spans_for_ids(ids.as_slice()).await?;

            let mut tasks = Vec::new();
            let mut ranges = Vec::new();
            let weak_project = project.downgrade();
            project.update(&mut cx, |project, cx| {
-                for (worktree_db_id, file_path, byte_range) in documents {
+                for (worktree_db_id, file_path, byte_range) in spans {
                    let project_state =
                        if let Some(state) = this.read(cx).projects.get(&weak_project) {
                            state
--- a/crates/semantic_index/src/semantic_index_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@ -1,7 +1,7 @@
 use crate::{
    embedding::{DummyEmbeddings, Embedding, EmbeddingProvider},
    embedding_queue::EmbeddingQueue,
-    parsing::{subtract_ranges, CodeContextRetriever, Document, DocumentDigest},
+    parsing::{subtract_ranges, CodeContextRetriever, Span, SpanDigest},
    semantic_index_settings::SemanticIndexSettings,
    FileToEmbed, JobHandle, SearchResult, SemanticIndex, EMBEDDING_QUEUE_FLUSH_TIMEOUT,
 };
@ -204,15 +204,15 @@ async fn test_embedding_batching(cx: &mut TestAppContext, mut rng: StdRng) {
            worktree_id: 5,
            path: Path::new(&format!("path-{file_ix}")).into(),
            mtime: SystemTime::now(),
-            documents: (0..rng.gen_range(4..22))
+            spans: (0..rng.gen_range(4..22))
                .map(|document_ix| {
                    let content_len = rng.gen_range(10..100);
                    let content = RandomCharIter::new(&mut rng)
                        .with_simple_text()
                        .take(content_len)
                        .collect::<String>();
-                    let digest = DocumentDigest::from(content.as_str());
-                    Document {
+                    let digest = SpanDigest::from(content.as_str());
+                    Span {
                        range: 0..10,
                        embedding: None,
                        name: format!("document {document_ix}"),
@ -245,7 +245,7 @@ async fn test_embedding_batching(cx: &mut TestAppContext, mut rng: StdRng) {
        .iter()
        .map(|file| {
            let mut file = file.clone();
-            for doc in &mut file.documents {
+            for doc in &mut file.spans {
                doc.embedding = Some(embedding_provider.embed_sync(doc.content.as_ref()));
            }
            file
@ -437,7 +437,7 @@ async fn test_code_context_retrieval_json() {
 }

 fn assert_documents_eq(
-    documents: &[Document],
+    documents: &[Span],
    expected_contents_and_start_offsets: &[(String, usize)],
 ) {
    assert_eq!(