From 3c70b127bd2785a943ae81bc3643f73eeb8babae Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Tue, 5 Sep 2023 16:52:58 +0200 Subject: [PATCH] Simplify `SemanticIndex::index_project` Co-Authored-By: Kyle Caverly --- crates/search/src/project_search.rs | 40 ++-- crates/semantic_index/src/semantic_index.rs | 176 +++++++----------- .../src/semantic_index_tests.rs | 27 ++- 3 files changed, 99 insertions(+), 144 deletions(-) diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs index 6364183877..f1a0ff71d3 100644 --- a/crates/search/src/project_search.rs +++ b/crates/search/src/project_search.rs @@ -12,15 +12,13 @@ use editor::{ SelectAll, MAX_TAB_TITLE_LEN, }; use futures::StreamExt; - -use gpui::platform::PromptLevel; - use gpui::{ - actions, elements::*, platform::MouseButton, Action, AnyElement, AnyViewHandle, AppContext, - Entity, ModelContext, ModelHandle, Subscription, Task, View, ViewContext, ViewHandle, - WeakModelHandle, WeakViewHandle, + actions, + elements::*, + platform::{MouseButton, PromptLevel}, + Action, AnyElement, AnyViewHandle, AppContext, Entity, ModelContext, ModelHandle, Subscription, + Task, View, ViewContext, ViewHandle, WeakModelHandle, WeakViewHandle, }; - use menu::Confirm; use postage::stream::Stream; use project::{ @@ -132,8 +130,7 @@ pub struct ProjectSearchView { } struct SemanticSearchState { - file_count: usize, - outstanding_file_count: usize, + pending_file_count: usize, _progress_task: Task<()>, } @@ -319,12 +316,8 @@ impl View for ProjectSearchView { }; let semantic_status = if let Some(semantic) = &self.semantic_state { - if semantic.outstanding_file_count > 0 { - format!( - "Indexing: {} of {}...", - semantic.file_count - semantic.outstanding_file_count, - semantic.file_count - ) + if semantic.pending_file_count > 0 { + format!("Remaining files to index: {}", semantic.pending_file_count) } else { "Indexing complete".to_string() } @@ -641,26 +634,25 @@ impl ProjectSearchView { let project = self.model.read(cx).project.clone(); - let index_task = semantic_index.update(cx, |semantic_index, cx| { - semantic_index.index_project(project, cx) + let mut pending_file_count_rx = semantic_index.update(cx, |semantic_index, cx| { + semantic_index.index_project(project.clone(), cx); + semantic_index.pending_file_count(&project).unwrap() }); cx.spawn(|search_view, mut cx| async move { - let (files_to_index, mut files_remaining_rx) = index_task.await?; - search_view.update(&mut cx, |search_view, cx| { cx.notify(); + let pending_file_count = *pending_file_count_rx.borrow(); search_view.semantic_state = Some(SemanticSearchState { - file_count: files_to_index, - outstanding_file_count: files_to_index, + pending_file_count, _progress_task: cx.spawn(|search_view, mut cx| async move { - while let Some(count) = files_remaining_rx.recv().await { + while let Some(count) = pending_file_count_rx.recv().await { search_view .update(&mut cx, |search_view, cx| { if let Some(semantic_search_state) = &mut search_view.semantic_state { - semantic_search_state.outstanding_file_count = count; + semantic_search_state.pending_file_count = count; cx.notify(); if count == 0 { return; @@ -959,7 +951,7 @@ impl ProjectSearchView { match mode { SearchMode::Semantic => { if let Some(semantic) = &mut self.semantic_state { - if semantic.outstanding_file_count > 0 { + if semantic.pending_file_count > 0 { return; } diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs index 8122cffdc9..2de78ab7e3 100644 --- a/crates/semantic_index/src/semantic_index.rs +++ b/crates/semantic_index/src/semantic_index.rs @@ -66,7 +66,9 @@ pub fn init( if let Some(workspace) = workspace.upgrade(cx) { let project = workspace.read(cx).project().clone(); if project.read(cx).is_local() { - semantic_index.update(cx, |index, cx| index.register_project(project, cx)); + semantic_index.update(cx, |index, cx| { + index.register_project(project, cx); + }); } } } @@ -122,7 +124,6 @@ impl WorktreeState { fn paths_changed( &mut self, changes: Arc<[(Arc, ProjectEntryId, PathChange)]>, - changed_at: Instant, worktree: &Worktree, ) { let changed_paths = match self { @@ -140,7 +141,6 @@ impl WorktreeState { changed_paths.insert( path.clone(), ChangedPathInfo { - changed_at, mtime: entry.mtime, is_deleted: *change == PathChange::Removed, }, @@ -160,7 +160,6 @@ struct RegisteredWorktreeState { } struct ChangedPathInfo { - changed_at: Instant, mtime: SystemTime, is_deleted: bool, } @@ -409,43 +408,47 @@ impl SemanticIndex { return; }; - let change_time = Instant::now(); let worktree = worktree.read(cx); let worktree_state = if let Some(worktree_state) = project_state.worktree(worktree_id) { worktree_state } else { return; }; - worktree_state.paths_changed(changes, Instant::now(), worktree); + worktree_state.paths_changed(changes, worktree); if let WorktreeState::Registered(_) = worktree_state { cx.spawn_weak(|this, mut cx| async move { cx.background().timer(BACKGROUND_INDEXING_DELAY).await; if let Some((this, project)) = this.upgrade(&cx).zip(project.upgrade(&cx)) { - this.update(&mut cx, |this, cx| { - this.reindex_changed_paths(project, Some(change_time), cx) - }) - .await; + this.update(&mut cx, |this, cx| this.index_project(project, cx)); } }) .detach(); } } - pub fn register_project(&mut self, project: ModelHandle, cx: &mut ModelContext) { - log::trace!("Registering Project for Semantic Index"); + fn register_project( + &mut self, + project: ModelHandle, + cx: &mut ModelContext, + ) -> &mut ProjectState { + if !self.projects.contains_key(&project.downgrade()) { + log::trace!("Registering Project for Semantic Index"); - let subscription = cx.subscribe(&project, |this, project, event, cx| match event { - project::Event::WorktreeAdded | project::Event::WorktreeRemoved(_) => { - this.project_worktrees_changed(project.clone(), cx); - } - project::Event::WorktreeUpdatedEntries(worktree_id, changes) => { - this.project_entries_changed(project, *worktree_id, changes.clone(), cx); - } - _ => {} - }); - self.projects - .insert(project.downgrade(), ProjectState::new(subscription)); - self.project_worktrees_changed(project, cx); + let subscription = cx.subscribe(&project, |this, project, event, cx| match event { + project::Event::WorktreeAdded | project::Event::WorktreeRemoved(_) => { + this.project_worktrees_changed(project.clone(), cx); + } + project::Event::WorktreeUpdatedEntries(worktree_id, changes) => { + this.project_entries_changed(project, *worktree_id, changes.clone(), cx); + } + _ => {} + }); + self.projects + .insert(project.downgrade(), ProjectState::new(subscription)); + self.project_worktrees_changed(project.clone(), cx); + } + + self.projects.get_mut(&project.downgrade()).unwrap() } fn register_worktree( @@ -487,7 +490,6 @@ impl SemanticIndex { .background() .spawn(async move { let mut changed_paths = BTreeMap::new(); - let now = Instant::now(); for file in worktree.files(false, 0) { let absolute_path = worktree.absolutize(&file.path); @@ -518,7 +520,6 @@ impl SemanticIndex { changed_paths.insert( file.path.clone(), ChangedPathInfo { - changed_at: now, mtime: file.mtime, is_deleted: false, }, @@ -532,7 +533,6 @@ impl SemanticIndex { changed_paths.insert( path.into(), ChangedPathInfo { - changed_at: now, mtime, is_deleted: true, }, @@ -614,29 +614,7 @@ impl SemanticIndex { } } - pub fn index_project( - &mut self, - project: ModelHandle, - cx: &mut ModelContext, - ) -> Task)>> { - let project_state = if let Some(project_state) = self.projects.get_mut(&project.downgrade()) - { - project_state - } else { - return Task::ready(Err(anyhow!("project was not registered"))); - }; - let outstanding_job_count_rx = project_state.outstanding_job_count_rx.clone(); - cx.spawn(|this, mut cx| async move { - this.update(&mut cx, |this, cx| { - this.reindex_changed_paths(project.clone(), None, cx) - }) - .await; - let count = *outstanding_job_count_rx.borrow(); - Ok((count, outstanding_job_count_rx)) - }) - } - - pub fn outstanding_job_count_rx( + pub fn pending_file_count( &self, project: &ModelHandle, ) -> Option> { @@ -783,18 +761,8 @@ impl SemanticIndex { }) } - fn reindex_changed_paths( - &mut self, - project: ModelHandle, - last_changed_before: Option, - cx: &mut ModelContext, - ) -> Task<()> { - let project_state = if let Some(project_state) = self.projects.get_mut(&project.downgrade()) - { - project_state - } else { - return Task::ready(()); - }; + pub fn index_project(&mut self, project: ModelHandle, cx: &mut ModelContext) { + let project_state = self.register_project(project.clone(), cx); let mut pending_files = Vec::new(); let mut files_to_delete = Vec::new(); @@ -816,12 +784,6 @@ impl SemanticIndex { }; worktree_state.changed_paths.retain(|path, info| { - if let Some(last_changed_before) = last_changed_before { - if info.changed_at > last_changed_before { - return true; - } - } - if info.is_deleted { files_to_delete.push((worktree_state.db_id, path.clone())); } else { @@ -845,48 +807,50 @@ impl SemanticIndex { let db = self.db.clone(); let language_registry = self.language_registry.clone(); let parsing_files_tx = self.parsing_files_tx.clone(); - cx.background().spawn(async move { - for (worktree_db_id, path) in files_to_delete { - db.delete_file(worktree_db_id, path).await.log_err(); - } - - let embeddings_for_digest = { - let mut files = HashMap::default(); - for pending_file in &pending_files { - files - .entry(pending_file.worktree_db_id) - .or_insert(Vec::new()) - .push(pending_file.relative_path.clone()); + cx.background() + .spawn(async move { + for (worktree_db_id, path) in files_to_delete { + db.delete_file(worktree_db_id, path).await.log_err(); } - Arc::new( - db.embeddings_for_files(files) - .await - .log_err() - .unwrap_or_default(), - ) - }; - for mut pending_file in pending_files { - if let Ok(language) = language_registry - .language_for_file(&pending_file.relative_path, None) - .await - { - if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) - && &language.name().as_ref() != &"Markdown" - && language - .grammar() - .and_then(|grammar| grammar.embedding_config.as_ref()) - .is_none() - { - continue; + let embeddings_for_digest = { + let mut files = HashMap::default(); + for pending_file in &pending_files { + files + .entry(pending_file.worktree_db_id) + .or_insert(Vec::new()) + .push(pending_file.relative_path.clone()); } - pending_file.language = Some(language); + Arc::new( + db.embeddings_for_files(files) + .await + .log_err() + .unwrap_or_default(), + ) + }; + + for mut pending_file in pending_files { + if let Ok(language) = language_registry + .language_for_file(&pending_file.relative_path, None) + .await + { + if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) + && &language.name().as_ref() != &"Markdown" + && language + .grammar() + .and_then(|grammar| grammar.embedding_config.as_ref()) + .is_none() + { + continue; + } + pending_file.language = Some(language); + } + parsing_files_tx + .try_send((embeddings_for_digest.clone(), pending_file)) + .ok(); } - parsing_files_tx - .try_send((embeddings_for_digest.clone(), pending_file)) - .ok(); - } - }) + }) + .detach() } } diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs index 2f28184f20..008a9e0434 100644 --- a/crates/semantic_index/src/semantic_index_tests.rs +++ b/crates/semantic_index/src/semantic_index_tests.rs @@ -87,16 +87,18 @@ async fn test_semantic_index(deterministic: Arc, cx: &mut TestApp let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await; - semantic_index.update(cx, |store, cx| store.register_project(project.clone(), cx)); + semantic_index.update(cx, |store, cx| { + store.register_project(project.clone(), cx); + }); deterministic.run_until_parked(); - let (file_count, outstanding_file_count) = semantic_index - .update(cx, |store, cx| store.index_project(project.clone(), cx)) - .await - .unwrap(); - assert_eq!(file_count, 3); + let pending_file_count = + semantic_index.read_with(cx, |index, _| index.pending_file_count(&project).unwrap()); + semantic_index.update(cx, |store, cx| store.index_project(project.clone(), cx)); + deterministic.run_until_parked(); + assert_eq!(*pending_file_count.borrow(), 3); deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT); - assert_eq!(*outstanding_file_count.borrow(), 0); + assert_eq!(*pending_file_count.borrow(), 0); let search_results = semantic_index .update(cx, |store, cx| { @@ -188,14 +190,11 @@ async fn test_semantic_index(deterministic: Arc, cx: &mut TestApp deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT); let prev_embedding_count = embedding_provider.embedding_count(); - let (file_count, outstanding_file_count) = semantic_index - .update(cx, |store, cx| store.index_project(project.clone(), cx)) - .await - .unwrap(); - assert_eq!(file_count, 1); - + semantic_index.update(cx, |store, cx| store.index_project(project.clone(), cx)); + deterministic.run_until_parked(); + assert_eq!(*pending_file_count.borrow(), 1); deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT); - assert_eq!(*outstanding_file_count.borrow(), 0); + assert_eq!(*pending_file_count.borrow(), 0); assert_eq!( embedding_provider.embedding_count() - prev_embedding_count,