From 6181ac6bade22e7ed074c47793a7fde298a5536b Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Thu, 13 Jun 2024 16:40:06 -0400 Subject: [PATCH] rustdoc: Index crates progressively (#13011) This PR updates the rustdoc indexing to be more progressive. Rather than waiting until we've crawled the entire crate to begin writing to the database, we instead start writing the docs as we go. This makes it so you can start getting completions while the indexing is still running. Release Notes: - N/A --- .../src/slash_command/rustdoc_command.rs | 2 +- crates/rustdoc/src/{crawler.rs => indexer.rs} | 43 +++++++++++-------- crates/rustdoc/src/rustdoc.rs | 3 +- crates/rustdoc/src/store.rs | 23 ++-------- 4 files changed, 31 insertions(+), 40 deletions(-) rename crates/rustdoc/src/{crawler.rs => indexer.rs} (87%) diff --git a/crates/assistant/src/slash_command/rustdoc_command.rs b/crates/assistant/src/slash_command/rustdoc_command.rs index 827a961a49..385e48d67d 100644 --- a/crates/assistant/src/slash_command/rustdoc_command.rs +++ b/crates/assistant/src/slash_command/rustdoc_command.rs @@ -10,7 +10,7 @@ use gpui::{AppContext, Model, Task, WeakView}; use http::{AsyncBody, HttpClient, HttpClientWithUrl}; use language::LspAdapterDelegate; use project::{Project, ProjectPath}; -use rustdoc::crawler::LocalProvider; +use rustdoc::LocalProvider; use rustdoc::{convert_rustdoc_to_markdown, RustdocStore}; use ui::{prelude::*, ButtonLike, ElevationIndex}; use workspace::Workspace; diff --git a/crates/rustdoc/src/crawler.rs b/crates/rustdoc/src/indexer.rs similarity index 87% rename from crates/rustdoc/src/crawler.rs rename to crates/rustdoc/src/indexer.rs index 80f3e0fc4a..f56b2eda1a 100644 --- a/crates/rustdoc/src/crawler.rs +++ b/crates/rustdoc/src/indexer.rs @@ -7,9 +7,8 @@ use collections::{HashSet, VecDeque}; use fs::Fs; use futures::AsyncReadExt; use http::{AsyncBody, HttpClient, HttpClientWithUrl}; -use indexmap::IndexMap; -use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind}; +use crate::{convert_rustdoc_to_markdown, RustdocDatabase, RustdocItem, RustdocItemKind}; #[derive(Debug, Clone, Copy)] pub enum RustdocSource { @@ -129,29 +128,32 @@ struct RustdocItemWithHistory { pub history: Vec, } -pub struct CrateDocs { - pub crate_root_markdown: String, - pub items: IndexMap, -} - -pub struct RustdocCrawler { +pub(crate) struct RustdocIndexer { + database: Arc, provider: Box, } -impl RustdocCrawler { - pub fn new(provider: Box) -> Self { - Self { provider } +impl RustdocIndexer { + pub fn new( + database: Arc, + provider: Box, + ) -> Self { + Self { database, provider } } - pub async fn crawl(&self, crate_name: String) -> Result> { + /// Indexes the crate with the given name. + pub async fn index(&self, crate_name: String) -> Result<()> { let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else { - return Ok(None); + return Ok(()); }; let (crate_root_markdown, items) = convert_rustdoc_to_markdown(crate_root_content.as_bytes())?; - let mut docs_by_item = IndexMap::new(); + self.database + .insert(crate_name.clone(), None, crate_root_markdown) + .await?; + let mut seen_items = HashSet::from_iter(items.clone()); let mut items_to_visit: VecDeque = VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory { @@ -189,7 +191,13 @@ impl RustdocCrawler { let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?; - docs_by_item.insert(item.clone(), markdown); + self.database + .insert( + format!("{crate_name}::{}", item.display()), + Some(item), + markdown, + ) + .await?; let parent_item = item; for mut item in referenced_items { @@ -219,9 +227,6 @@ impl RustdocCrawler { } } - Ok(Some(CrateDocs { - crate_root_markdown, - items: docs_by_item, - })) + Ok(()) } } diff --git a/crates/rustdoc/src/rustdoc.rs b/crates/rustdoc/src/rustdoc.rs index a35fdac62a..5bf300a5f4 100644 --- a/crates/rustdoc/src/rustdoc.rs +++ b/crates/rustdoc/src/rustdoc.rs @@ -1,8 +1,9 @@ -pub mod crawler; +mod indexer; mod item; mod store; mod to_markdown; +pub use crate::indexer::{DocsDotRsProvider, LocalProvider, RustdocSource}; pub use crate::item::*; pub use crate::store::*; pub use crate::to_markdown::convert_rustdoc_to_markdown; diff --git a/crates/rustdoc/src/store.rs b/crates/rustdoc/src/store.rs index 9e14d245c8..3372d281b6 100644 --- a/crates/rustdoc/src/store.rs +++ b/crates/rustdoc/src/store.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; use util::paths::SUPPORT_DIR; use util::ResultExt; -use crate::crawler::{RustdocCrawler, RustdocProvider}; +use crate::indexer::{RustdocIndexer, RustdocProvider}; use crate::{RustdocItem, RustdocItemKind}; struct GlobalRustdocStore(Arc); @@ -75,25 +75,10 @@ impl RustdocStore { ) -> Task> { let database_future = self.database_future.clone(); self.executor.spawn(async move { - let crawler = RustdocCrawler::new(provider); - - let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else { - return Ok(()); - }; - let database = database_future.await.map_err(|err| anyhow!(err))?; + let indexer = RustdocIndexer::new(database, provider); - database - .insert(crate_name.clone(), None, crate_docs.crate_root_markdown) - .await?; - - for (item, item_docs) in crate_docs.items { - database - .insert(crate_name.clone(), Some(&item), item_docs) - .await?; - } - - Ok(()) + indexer.index(crate_name.clone()).await }) } @@ -151,7 +136,7 @@ impl RustdocDatabaseEntry { } } -struct RustdocDatabase { +pub(crate) struct RustdocDatabase { executor: BackgroundExecutor, env: heed::Env, entries: Database, SerdeBincode>,