Simplify SemanticIndex::index_project

Co-Authored-By: Kyle Caverly <kyle@zed.dev>
This commit is contained in:
Antonio Scandurra 2023-09-05 16:52:58 +02:00
parent 6b1dc63fc0
commit 3c70b127bd
3 changed files with 99 additions and 144 deletions

View File

@ -12,15 +12,13 @@ use editor::{
SelectAll, MAX_TAB_TITLE_LEN, SelectAll, MAX_TAB_TITLE_LEN,
}; };
use futures::StreamExt; use futures::StreamExt;
use gpui::platform::PromptLevel;
use gpui::{ use gpui::{
actions, elements::*, platform::MouseButton, Action, AnyElement, AnyViewHandle, AppContext, actions,
Entity, ModelContext, ModelHandle, Subscription, Task, View, ViewContext, ViewHandle, elements::*,
WeakModelHandle, WeakViewHandle, platform::{MouseButton, PromptLevel},
Action, AnyElement, AnyViewHandle, AppContext, Entity, ModelContext, ModelHandle, Subscription,
Task, View, ViewContext, ViewHandle, WeakModelHandle, WeakViewHandle,
}; };
use menu::Confirm; use menu::Confirm;
use postage::stream::Stream; use postage::stream::Stream;
use project::{ use project::{
@ -132,8 +130,7 @@ pub struct ProjectSearchView {
} }
struct SemanticSearchState { struct SemanticSearchState {
file_count: usize, pending_file_count: usize,
outstanding_file_count: usize,
_progress_task: Task<()>, _progress_task: Task<()>,
} }
@ -319,12 +316,8 @@ impl View for ProjectSearchView {
}; };
let semantic_status = if let Some(semantic) = &self.semantic_state { let semantic_status = if let Some(semantic) = &self.semantic_state {
if semantic.outstanding_file_count > 0 { if semantic.pending_file_count > 0 {
format!( format!("Remaining files to index: {}", semantic.pending_file_count)
"Indexing: {} of {}...",
semantic.file_count - semantic.outstanding_file_count,
semantic.file_count
)
} else { } else {
"Indexing complete".to_string() "Indexing complete".to_string()
} }
@ -641,26 +634,25 @@ impl ProjectSearchView {
let project = self.model.read(cx).project.clone(); let project = self.model.read(cx).project.clone();
let index_task = semantic_index.update(cx, |semantic_index, cx| { let mut pending_file_count_rx = semantic_index.update(cx, |semantic_index, cx| {
semantic_index.index_project(project, cx) semantic_index.index_project(project.clone(), cx);
semantic_index.pending_file_count(&project).unwrap()
}); });
cx.spawn(|search_view, mut cx| async move { cx.spawn(|search_view, mut cx| async move {
let (files_to_index, mut files_remaining_rx) = index_task.await?;
search_view.update(&mut cx, |search_view, cx| { search_view.update(&mut cx, |search_view, cx| {
cx.notify(); cx.notify();
let pending_file_count = *pending_file_count_rx.borrow();
search_view.semantic_state = Some(SemanticSearchState { search_view.semantic_state = Some(SemanticSearchState {
file_count: files_to_index, pending_file_count,
outstanding_file_count: files_to_index,
_progress_task: cx.spawn(|search_view, mut cx| async move { _progress_task: cx.spawn(|search_view, mut cx| async move {
while let Some(count) = files_remaining_rx.recv().await { while let Some(count) = pending_file_count_rx.recv().await {
search_view search_view
.update(&mut cx, |search_view, cx| { .update(&mut cx, |search_view, cx| {
if let Some(semantic_search_state) = if let Some(semantic_search_state) =
&mut search_view.semantic_state &mut search_view.semantic_state
{ {
semantic_search_state.outstanding_file_count = count; semantic_search_state.pending_file_count = count;
cx.notify(); cx.notify();
if count == 0 { if count == 0 {
return; return;
@ -959,7 +951,7 @@ impl ProjectSearchView {
match mode { match mode {
SearchMode::Semantic => { SearchMode::Semantic => {
if let Some(semantic) = &mut self.semantic_state { if let Some(semantic) = &mut self.semantic_state {
if semantic.outstanding_file_count > 0 { if semantic.pending_file_count > 0 {
return; return;
} }

View File

@ -66,7 +66,9 @@ pub fn init(
if let Some(workspace) = workspace.upgrade(cx) { if let Some(workspace) = workspace.upgrade(cx) {
let project = workspace.read(cx).project().clone(); let project = workspace.read(cx).project().clone();
if project.read(cx).is_local() { if project.read(cx).is_local() {
semantic_index.update(cx, |index, cx| index.register_project(project, cx)); semantic_index.update(cx, |index, cx| {
index.register_project(project, cx);
});
} }
} }
} }
@ -122,7 +124,6 @@ impl WorktreeState {
fn paths_changed( fn paths_changed(
&mut self, &mut self,
changes: Arc<[(Arc<Path>, ProjectEntryId, PathChange)]>, changes: Arc<[(Arc<Path>, ProjectEntryId, PathChange)]>,
changed_at: Instant,
worktree: &Worktree, worktree: &Worktree,
) { ) {
let changed_paths = match self { let changed_paths = match self {
@ -140,7 +141,6 @@ impl WorktreeState {
changed_paths.insert( changed_paths.insert(
path.clone(), path.clone(),
ChangedPathInfo { ChangedPathInfo {
changed_at,
mtime: entry.mtime, mtime: entry.mtime,
is_deleted: *change == PathChange::Removed, is_deleted: *change == PathChange::Removed,
}, },
@ -160,7 +160,6 @@ struct RegisteredWorktreeState {
} }
struct ChangedPathInfo { struct ChangedPathInfo {
changed_at: Instant,
mtime: SystemTime, mtime: SystemTime,
is_deleted: bool, is_deleted: bool,
} }
@ -409,43 +408,47 @@ impl SemanticIndex {
return; return;
}; };
let change_time = Instant::now();
let worktree = worktree.read(cx); let worktree = worktree.read(cx);
let worktree_state = if let Some(worktree_state) = project_state.worktree(worktree_id) { let worktree_state = if let Some(worktree_state) = project_state.worktree(worktree_id) {
worktree_state worktree_state
} else { } else {
return; return;
}; };
worktree_state.paths_changed(changes, Instant::now(), worktree); worktree_state.paths_changed(changes, worktree);
if let WorktreeState::Registered(_) = worktree_state { if let WorktreeState::Registered(_) = worktree_state {
cx.spawn_weak(|this, mut cx| async move { cx.spawn_weak(|this, mut cx| async move {
cx.background().timer(BACKGROUND_INDEXING_DELAY).await; cx.background().timer(BACKGROUND_INDEXING_DELAY).await;
if let Some((this, project)) = this.upgrade(&cx).zip(project.upgrade(&cx)) { if let Some((this, project)) = this.upgrade(&cx).zip(project.upgrade(&cx)) {
this.update(&mut cx, |this, cx| { this.update(&mut cx, |this, cx| this.index_project(project, cx));
this.reindex_changed_paths(project, Some(change_time), cx)
})
.await;
} }
}) })
.detach(); .detach();
} }
} }
pub fn register_project(&mut self, project: ModelHandle<Project>, cx: &mut ModelContext<Self>) { fn register_project(
log::trace!("Registering Project for Semantic Index"); &mut self,
project: ModelHandle<Project>,
cx: &mut ModelContext<Self>,
) -> &mut ProjectState {
if !self.projects.contains_key(&project.downgrade()) {
log::trace!("Registering Project for Semantic Index");
let subscription = cx.subscribe(&project, |this, project, event, cx| match event { let subscription = cx.subscribe(&project, |this, project, event, cx| match event {
project::Event::WorktreeAdded | project::Event::WorktreeRemoved(_) => { project::Event::WorktreeAdded | project::Event::WorktreeRemoved(_) => {
this.project_worktrees_changed(project.clone(), cx); this.project_worktrees_changed(project.clone(), cx);
} }
project::Event::WorktreeUpdatedEntries(worktree_id, changes) => { project::Event::WorktreeUpdatedEntries(worktree_id, changes) => {
this.project_entries_changed(project, *worktree_id, changes.clone(), cx); this.project_entries_changed(project, *worktree_id, changes.clone(), cx);
} }
_ => {} _ => {}
}); });
self.projects self.projects
.insert(project.downgrade(), ProjectState::new(subscription)); .insert(project.downgrade(), ProjectState::new(subscription));
self.project_worktrees_changed(project, cx); self.project_worktrees_changed(project.clone(), cx);
}
self.projects.get_mut(&project.downgrade()).unwrap()
} }
fn register_worktree( fn register_worktree(
@ -487,7 +490,6 @@ impl SemanticIndex {
.background() .background()
.spawn(async move { .spawn(async move {
let mut changed_paths = BTreeMap::new(); let mut changed_paths = BTreeMap::new();
let now = Instant::now();
for file in worktree.files(false, 0) { for file in worktree.files(false, 0) {
let absolute_path = worktree.absolutize(&file.path); let absolute_path = worktree.absolutize(&file.path);
@ -518,7 +520,6 @@ impl SemanticIndex {
changed_paths.insert( changed_paths.insert(
file.path.clone(), file.path.clone(),
ChangedPathInfo { ChangedPathInfo {
changed_at: now,
mtime: file.mtime, mtime: file.mtime,
is_deleted: false, is_deleted: false,
}, },
@ -532,7 +533,6 @@ impl SemanticIndex {
changed_paths.insert( changed_paths.insert(
path.into(), path.into(),
ChangedPathInfo { ChangedPathInfo {
changed_at: now,
mtime, mtime,
is_deleted: true, is_deleted: true,
}, },
@ -614,29 +614,7 @@ impl SemanticIndex {
} }
} }
pub fn index_project( pub fn pending_file_count(
&mut self,
project: ModelHandle<Project>,
cx: &mut ModelContext<Self>,
) -> Task<Result<(usize, watch::Receiver<usize>)>> {
let project_state = if let Some(project_state) = self.projects.get_mut(&project.downgrade())
{
project_state
} else {
return Task::ready(Err(anyhow!("project was not registered")));
};
let outstanding_job_count_rx = project_state.outstanding_job_count_rx.clone();
cx.spawn(|this, mut cx| async move {
this.update(&mut cx, |this, cx| {
this.reindex_changed_paths(project.clone(), None, cx)
})
.await;
let count = *outstanding_job_count_rx.borrow();
Ok((count, outstanding_job_count_rx))
})
}
pub fn outstanding_job_count_rx(
&self, &self,
project: &ModelHandle<Project>, project: &ModelHandle<Project>,
) -> Option<watch::Receiver<usize>> { ) -> Option<watch::Receiver<usize>> {
@ -783,18 +761,8 @@ impl SemanticIndex {
}) })
} }
fn reindex_changed_paths( pub fn index_project(&mut self, project: ModelHandle<Project>, cx: &mut ModelContext<Self>) {
&mut self, let project_state = self.register_project(project.clone(), cx);
project: ModelHandle<Project>,
last_changed_before: Option<Instant>,
cx: &mut ModelContext<Self>,
) -> Task<()> {
let project_state = if let Some(project_state) = self.projects.get_mut(&project.downgrade())
{
project_state
} else {
return Task::ready(());
};
let mut pending_files = Vec::new(); let mut pending_files = Vec::new();
let mut files_to_delete = Vec::new(); let mut files_to_delete = Vec::new();
@ -816,12 +784,6 @@ impl SemanticIndex {
}; };
worktree_state.changed_paths.retain(|path, info| { worktree_state.changed_paths.retain(|path, info| {
if let Some(last_changed_before) = last_changed_before {
if info.changed_at > last_changed_before {
return true;
}
}
if info.is_deleted { if info.is_deleted {
files_to_delete.push((worktree_state.db_id, path.clone())); files_to_delete.push((worktree_state.db_id, path.clone()));
} else { } else {
@ -845,48 +807,50 @@ impl SemanticIndex {
let db = self.db.clone(); let db = self.db.clone();
let language_registry = self.language_registry.clone(); let language_registry = self.language_registry.clone();
let parsing_files_tx = self.parsing_files_tx.clone(); let parsing_files_tx = self.parsing_files_tx.clone();
cx.background().spawn(async move { cx.background()
for (worktree_db_id, path) in files_to_delete { .spawn(async move {
db.delete_file(worktree_db_id, path).await.log_err(); for (worktree_db_id, path) in files_to_delete {
} db.delete_file(worktree_db_id, path).await.log_err();
let embeddings_for_digest = {
let mut files = HashMap::default();
for pending_file in &pending_files {
files
.entry(pending_file.worktree_db_id)
.or_insert(Vec::new())
.push(pending_file.relative_path.clone());
} }
Arc::new(
db.embeddings_for_files(files)
.await
.log_err()
.unwrap_or_default(),
)
};
for mut pending_file in pending_files { let embeddings_for_digest = {
if let Ok(language) = language_registry let mut files = HashMap::default();
.language_for_file(&pending_file.relative_path, None) for pending_file in &pending_files {
.await files
{ .entry(pending_file.worktree_db_id)
if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) .or_insert(Vec::new())
&& &language.name().as_ref() != &"Markdown" .push(pending_file.relative_path.clone());
&& language
.grammar()
.and_then(|grammar| grammar.embedding_config.as_ref())
.is_none()
{
continue;
} }
pending_file.language = Some(language); Arc::new(
db.embeddings_for_files(files)
.await
.log_err()
.unwrap_or_default(),
)
};
for mut pending_file in pending_files {
if let Ok(language) = language_registry
.language_for_file(&pending_file.relative_path, None)
.await
{
if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref())
&& &language.name().as_ref() != &"Markdown"
&& language
.grammar()
.and_then(|grammar| grammar.embedding_config.as_ref())
.is_none()
{
continue;
}
pending_file.language = Some(language);
}
parsing_files_tx
.try_send((embeddings_for_digest.clone(), pending_file))
.ok();
} }
parsing_files_tx })
.try_send((embeddings_for_digest.clone(), pending_file)) .detach()
.ok();
}
})
} }
} }

View File

@ -87,16 +87,18 @@ async fn test_semantic_index(deterministic: Arc<Deterministic>, cx: &mut TestApp
let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await; let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
semantic_index.update(cx, |store, cx| store.register_project(project.clone(), cx)); semantic_index.update(cx, |store, cx| {
store.register_project(project.clone(), cx);
});
deterministic.run_until_parked(); deterministic.run_until_parked();
let (file_count, outstanding_file_count) = semantic_index let pending_file_count =
.update(cx, |store, cx| store.index_project(project.clone(), cx)) semantic_index.read_with(cx, |index, _| index.pending_file_count(&project).unwrap());
.await semantic_index.update(cx, |store, cx| store.index_project(project.clone(), cx));
.unwrap(); deterministic.run_until_parked();
assert_eq!(file_count, 3); assert_eq!(*pending_file_count.borrow(), 3);
deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT); deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
assert_eq!(*outstanding_file_count.borrow(), 0); assert_eq!(*pending_file_count.borrow(), 0);
let search_results = semantic_index let search_results = semantic_index
.update(cx, |store, cx| { .update(cx, |store, cx| {
@ -188,14 +190,11 @@ async fn test_semantic_index(deterministic: Arc<Deterministic>, cx: &mut TestApp
deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT); deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
let prev_embedding_count = embedding_provider.embedding_count(); let prev_embedding_count = embedding_provider.embedding_count();
let (file_count, outstanding_file_count) = semantic_index semantic_index.update(cx, |store, cx| store.index_project(project.clone(), cx));
.update(cx, |store, cx| store.index_project(project.clone(), cx)) deterministic.run_until_parked();
.await assert_eq!(*pending_file_count.borrow(), 1);
.unwrap();
assert_eq!(file_count, 1);
deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT); deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
assert_eq!(*outstanding_file_count.borrow(), 0); assert_eq!(*pending_file_count.borrow(), 0);
assert_eq!( assert_eq!(
embedding_provider.embedding_count() - prev_embedding_count, embedding_provider.embedding_count() - prev_embedding_count,