Move retrieval of embeddings from the db into reindex_changed_files

Co-Authored-By: Kyle Caverly <kyle@zed.dev>
This commit is contained in:
Antonio Scandurra 2023-09-05 16:09:24 +02:00
parent d4cff68475
commit 7b5a41dda2

View File

@ -418,30 +418,12 @@ impl SemanticIndex {
}; };
worktree_state.paths_changed(changes, Instant::now(), worktree); worktree_state.paths_changed(changes, Instant::now(), worktree);
if let WorktreeState::Registered(worktree_state) = worktree_state { if let WorktreeState::Registered(worktree_state) = worktree_state {
let embeddings_for_digest = {
let worktree_paths = worktree_state
.changed_paths
.iter()
.map(|(path, _)| path.clone())
.collect::<Vec<_>>();
let mut worktree_id_file_paths = HashMap::default();
worktree_id_file_paths.insert(worktree_state.db_id, worktree_paths);
self.db.embeddings_for_files(worktree_id_file_paths)
};
cx.spawn_weak(|this, mut cx| async move { cx.spawn_weak(|this, mut cx| async move {
let embeddings_for_digest =
embeddings_for_digest.await.log_err().unwrap_or_default();
cx.background().timer(BACKGROUND_INDEXING_DELAY).await; cx.background().timer(BACKGROUND_INDEXING_DELAY).await;
if let Some((this, project)) = this.upgrade(&cx).zip(project.upgrade(&cx)) { if let Some((this, project)) = this.upgrade(&cx).zip(project.upgrade(&cx)) {
Self::reindex_changed_paths( this.update(&mut cx, |this, cx| {
this, this.reindex_changed_paths(project, Some(change_time), cx)
project, })
Some(change_time),
&mut cx,
Arc::new(embeddings_for_digest),
)
.await; .await;
} }
}) })
@ -644,31 +626,10 @@ impl SemanticIndex {
return Task::ready(Err(anyhow!("project was not registered"))); return Task::ready(Err(anyhow!("project was not registered")));
}; };
let outstanding_job_count_rx = project_state.outstanding_job_count_rx.clone(); let outstanding_job_count_rx = project_state.outstanding_job_count_rx.clone();
let mut worktree_id_file_paths = HashMap::default();
for worktree in project_state.worktrees.values() {
if let WorktreeState::Registered(worktree_state) = worktree {
for (path, _) in &worktree_state.changed_paths {
worktree_id_file_paths
.entry(worktree_state.db_id)
.or_insert(Vec::new())
.push(path.clone());
}
}
}
cx.spawn(|this, mut cx| async move { cx.spawn(|this, mut cx| async move {
let embeddings_for_digest = this.read_with(&cx, |this, _| { this.update(&mut cx, |this, cx| {
this.db.embeddings_for_files(worktree_id_file_paths) this.reindex_changed_paths(project.clone(), None, cx)
}); })
let embeddings_for_digest = Arc::new(embeddings_for_digest.await?);
Self::reindex_changed_paths(
this.clone(),
project.clone(),
None,
&mut cx,
embeddings_for_digest,
)
.await; .await;
let count = *outstanding_job_count_rx.borrow(); let count = *outstanding_job_count_rx.borrow();
Ok((count, outstanding_job_count_rx)) Ok((count, outstanding_job_count_rx))
@ -822,94 +783,113 @@ impl SemanticIndex {
}) })
} }
async fn reindex_changed_paths( fn reindex_changed_paths(
this: ModelHandle<SemanticIndex>, &mut self,
project: ModelHandle<Project>, project: ModelHandle<Project>,
last_changed_before: Option<Instant>, last_changed_before: Option<Instant>,
cx: &mut AsyncAppContext, cx: &mut ModelContext<Self>,
embeddings_for_digest: Arc<HashMap<DocumentDigest, Embedding>>, ) -> Task<()> {
) { let project_state = if let Some(project_state) = self.projects.get_mut(&project.downgrade())
{
project_state
} else {
return Task::ready(());
};
let mut pending_files = Vec::new(); let mut pending_files = Vec::new();
let mut files_to_delete = Vec::new(); let mut files_to_delete = Vec::new();
let (db, language_registry, parsing_files_tx) = this.update(cx, |this, cx| { let outstanding_job_count_tx = &project_state.outstanding_job_count_tx;
if let Some(project_state) = this.projects.get_mut(&project.downgrade()) { project_state
let outstanding_job_count_tx = &project_state.outstanding_job_count_tx; .worktrees
project_state .retain(|worktree_id, worktree_state| {
.worktrees let worktree =
.retain(|worktree_id, worktree_state| { if let Some(worktree) = project.read(cx).worktree_for_id(*worktree_id, cx) {
let worktree = if let Some(worktree) = worktree
project.read(cx).worktree_for_id(*worktree_id, cx) } else {
{ return false;
worktree };
} else { let worktree_state =
return false; if let WorktreeState::Registered(worktree_state) = worktree_state {
}; worktree_state
let worktree_state = } else {
if let WorktreeState::Registered(worktree_state) = worktree_state { return true;
worktree_state };
} else {
return true;
};
worktree_state.changed_paths.retain(|path, info| { worktree_state.changed_paths.retain(|path, info| {
if let Some(last_changed_before) = last_changed_before { if let Some(last_changed_before) = last_changed_before {
if info.changed_at > last_changed_before { if info.changed_at > last_changed_before {
return true; return true;
} }
} }
if info.is_deleted { if info.is_deleted {
files_to_delete.push((worktree_state.db_id, path.clone())); files_to_delete.push((worktree_state.db_id, path.clone()));
} else { } else {
let absolute_path = worktree.read(cx).absolutize(path); let absolute_path = worktree.read(cx).absolutize(path);
let job_handle = JobHandle::new(&outstanding_job_count_tx); let job_handle = JobHandle::new(&outstanding_job_count_tx);
pending_files.push(PendingFile { pending_files.push(PendingFile {
absolute_path, absolute_path,
relative_path: path.clone(), relative_path: path.clone(),
language: None, language: None,
job_handle, job_handle,
modified_time: info.mtime, modified_time: info.mtime,
worktree_db_id: worktree_state.db_id, worktree_db_id: worktree_state.db_id,
});
}
false
}); });
true }
});
}
( false
this.db.clone(), });
this.language_registry.clone(), true
this.parsing_files_tx.clone(), });
)
});
for (worktree_db_id, path) in files_to_delete { let mut worktree_id_file_paths = HashMap::default();
db.delete_file(worktree_db_id, path).await.log_err(); for worktree in project_state.worktrees.values() {
} if let WorktreeState::Registered(worktree_state) = worktree {
for (path, _) in &worktree_state.changed_paths {
for mut pending_file in pending_files { worktree_id_file_paths
if let Ok(language) = language_registry .entry(worktree_state.db_id)
.language_for_file(&pending_file.relative_path, None) .or_insert(Vec::new())
.await .push(path.clone());
{
if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref())
&& &language.name().as_ref() != &"Markdown"
&& language
.grammar()
.and_then(|grammar| grammar.embedding_config.as_ref())
.is_none()
{
continue;
} }
pending_file.language = Some(language);
} }
parsing_files_tx
.try_send((embeddings_for_digest.clone(), pending_file))
.ok();
} }
let db = self.db.clone();
let language_registry = self.language_registry.clone();
let parsing_files_tx = self.parsing_files_tx.clone();
cx.background().spawn(async move {
for (worktree_db_id, path) in files_to_delete {
db.delete_file(worktree_db_id, path).await.log_err();
}
let embeddings_for_digest = Arc::new(
db.embeddings_for_files(worktree_id_file_paths)
.await
.log_err()
.unwrap_or_default(),
);
for mut pending_file in pending_files {
if let Ok(language) = language_registry
.language_for_file(&pending_file.relative_path, None)
.await
{
if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref())
&& &language.name().as_ref() != &"Markdown"
&& language
.grammar()
.and_then(|grammar| grammar.embedding_config.as_ref())
.is_none()
{
continue;
}
pending_file.language = Some(language);
}
parsing_files_tx
.try_send((embeddings_for_digest.clone(), pending_file))
.ok();
}
})
} }
} }