From ca80343486a77903327bf413a14faf119bdf05ae Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Fri, 12 Jul 2024 13:22:52 -0400 Subject: [PATCH] assistant: Add docs provider for `docs.rs` (#14356) This PR adds an indexed docs provider for retrieving docs from `docs.rs` using the `/docs` slash command. Release Notes: - N/A --- .../src/slash_command/docs_command.rs | 39 +- crates/indexed_docs/src/providers/rustdoc.rs | 351 +++++++++--------- crates/indexed_docs/src/store.rs | 6 - 3 files changed, 211 insertions(+), 185 deletions(-) diff --git a/crates/assistant/src/slash_command/docs_command.rs b/crates/assistant/src/slash_command/docs_command.rs index 831b4210ff..cd673b9320 100644 --- a/crates/assistant/src/slash_command/docs_command.rs +++ b/crates/assistant/src/slash_command/docs_command.rs @@ -8,7 +8,8 @@ use assistant_slash_command::{ }; use gpui::{AppContext, Model, Task, WeakView}; use indexed_docs::{ - IndexedDocsRegistry, IndexedDocsStore, LocalProvider, PackageName, ProviderId, RustdocIndexer, + DocsDotRsProvider, IndexedDocsRegistry, IndexedDocsStore, LocalRustdocProvider, PackageName, + ProviderId, }; use language::LspAdapterDelegate; use project::{Project, ProjectPath}; @@ -34,22 +35,22 @@ impl DocsSlashCommand { )) } - /// Ensures that the rustdoc provider is registered. + /// Ensures that the indexed doc providers for Rust are registered. /// /// Ideally we would do this sooner, but we need to wait until we're able to /// access the workspace so we can read the project. - fn ensure_rustdoc_provider_is_registered( + fn ensure_rust_doc_providers_are_registered( &self, workspace: Option>, cx: &mut AppContext, ) { let indexed_docs_registry = IndexedDocsRegistry::global(cx); if indexed_docs_registry - .get_provider_store(ProviderId::rustdoc()) + .get_provider_store(LocalRustdocProvider::id()) .is_none() { let index_provider_deps = maybe!({ - let workspace = workspace.ok_or_else(|| anyhow!("no workspace"))?; + let workspace = workspace.clone().ok_or_else(|| anyhow!("no workspace"))?; let workspace = workspace .upgrade() .ok_or_else(|| anyhow!("workspace was dropped"))?; @@ -63,9 +64,29 @@ impl DocsSlashCommand { }); if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() { - indexed_docs_registry.register_provider(Box::new(RustdocIndexer::new(Box::new( - LocalProvider::new(fs, cargo_workspace_root), - )))); + indexed_docs_registry.register_provider(Box::new(LocalRustdocProvider::new( + fs, + cargo_workspace_root, + ))); + } + } + + if indexed_docs_registry + .get_provider_store(DocsDotRsProvider::id()) + .is_none() + { + let http_client = maybe!({ + let workspace = workspace.ok_or_else(|| anyhow!("no workspace"))?; + let workspace = workspace + .upgrade() + .ok_or_else(|| anyhow!("workspace was dropped"))?; + let project = workspace.read(cx).project().clone(); + anyhow::Ok(project.read(cx).client().http_client().clone()) + }); + + if let Some(http_client) = http_client.log_err() { + indexed_docs_registry + .register_provider(Box::new(DocsDotRsProvider::new(http_client))); } } } @@ -95,7 +116,7 @@ impl SlashCommand for DocsSlashCommand { workspace: Option>, cx: &mut AppContext, ) -> Task>> { - self.ensure_rustdoc_provider_is_registered(workspace, cx); + self.ensure_rust_doc_providers_are_registered(workspace, cx); let indexed_docs_registry = IndexedDocsRegistry::global(cx); let args = DocsSlashCommandArgs::parse(&query); diff --git a/crates/indexed_docs/src/providers/rustdoc.rs b/crates/indexed_docs/src/providers/rustdoc.rs index b8b345c39f..749af38af6 100644 --- a/crates/indexed_docs/src/providers/rustdoc.rs +++ b/crates/indexed_docs/src/providers/rustdoc.rs @@ -1,6 +1,7 @@ mod item; mod to_markdown; +use futures::future::BoxFuture; pub use item::*; pub use to_markdown::convert_rustdoc_to_markdown; @@ -11,7 +12,7 @@ use anyhow::{bail, Context, Result}; use async_trait::async_trait; use collections::{HashSet, VecDeque}; use fs::Fs; -use futures::AsyncReadExt; +use futures::{AsyncReadExt, FutureExt}; use http::{AsyncBody, HttpClient, HttpClientWithUrl}; use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId}; @@ -23,124 +24,16 @@ struct RustdocItemWithHistory { pub history: Vec, } -#[async_trait] -pub trait RustdocProvider { - async fn fetch_page( - &self, - package: &PackageName, - item: Option<&RustdocItem>, - ) -> Result>; -} - -pub struct RustdocIndexer { - provider: Box, -} - -impl RustdocIndexer { - pub fn new(provider: Box) -> Self { - Self { provider } - } -} - -#[async_trait] -impl IndexedDocsProvider for RustdocIndexer { - fn id(&self) -> ProviderId { - ProviderId::rustdoc() - } - - fn database_path(&self) -> PathBuf { - paths::support_dir().join("docs/rust/rustdoc-db.1.mdb") - } - - async fn index(&self, package: PackageName, database: Arc) -> Result<()> { - let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else { - return Ok(()); - }; - - let (crate_root_markdown, items) = - convert_rustdoc_to_markdown(package_root_content.as_bytes())?; - - database - .insert(package.to_string(), crate_root_markdown) - .await?; - - let mut seen_items = HashSet::from_iter(items.clone()); - let mut items_to_visit: VecDeque = - VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory { - item, - #[cfg(debug_assertions)] - history: Vec::new(), - })); - - while let Some(item_with_history) = items_to_visit.pop_front() { - let item = &item_with_history.item; - - let Some(result) = self - .provider - .fetch_page(&package, Some(&item)) - .await - .with_context(|| { - #[cfg(debug_assertions)] - { - format!( - "failed to fetch {item:?}: {history:?}", - history = item_with_history.history - ) - } - - #[cfg(not(debug_assertions))] - { - format!("failed to fetch {item:?}") - } - })? - else { - continue; - }; - - let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?; - - database - .insert(format!("{package}::{}", item.display()), markdown) - .await?; - - let parent_item = item; - for mut item in referenced_items { - if seen_items.contains(&item) { - continue; - } - - seen_items.insert(item.clone()); - - item.path.extend(parent_item.path.clone()); - match parent_item.kind { - RustdocItemKind::Mod => { - item.path.push(parent_item.name.clone()); - } - _ => {} - } - - items_to_visit.push_back(RustdocItemWithHistory { - #[cfg(debug_assertions)] - history: { - let mut history = item_with_history.history.clone(); - history.push(item.url_path()); - history - }, - item, - }); - } - } - - Ok(()) - } -} - -pub struct LocalProvider { +pub struct LocalRustdocProvider { fs: Arc, cargo_workspace_root: PathBuf, } -impl LocalProvider { +impl LocalRustdocProvider { + pub fn id() -> ProviderId { + ProviderId("rustdoc".into()) + } + pub fn new(fs: Arc, cargo_workspace_root: PathBuf) -> Self { Self { fs, @@ -150,30 +43,46 @@ impl LocalProvider { } #[async_trait] -impl RustdocProvider for LocalProvider { - async fn fetch_page( - &self, - crate_name: &PackageName, - item: Option<&RustdocItem>, - ) -> Result> { - let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc"); - local_cargo_doc_path.push(crate_name.as_ref()); +impl IndexedDocsProvider for LocalRustdocProvider { + fn id(&self) -> ProviderId { + Self::id() + } - if !self.fs.is_dir(&local_cargo_doc_path).await { - bail!("docs directory for '{crate_name}' does not exist. run `cargo doc`"); - } + fn database_path(&self) -> PathBuf { + paths::support_dir().join("docs/rust/rustdoc-db.1.mdb") + } - if let Some(item) = item { - local_cargo_doc_path.push(item.url_path()); - } else { - local_cargo_doc_path.push("index.html"); - } + async fn index(&self, package: PackageName, database: Arc) -> Result<()> { + index_rustdoc(package, database, { + move |crate_name, item| { + let fs = self.fs.clone(); + let cargo_workspace_root = self.cargo_workspace_root.clone(); + let crate_name = crate_name.clone(); + let item = item.cloned(); + async move { + let mut local_cargo_doc_path = cargo_workspace_root.join("target/doc"); + local_cargo_doc_path.push(crate_name.as_ref()); - let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else { - return Ok(None); - }; + if !fs.is_dir(&local_cargo_doc_path).await { + bail!("docs directory for '{crate_name}' does not exist. run `cargo doc`"); + } - Ok(Some(contents)) + if let Some(item) = item { + local_cargo_doc_path.push(item.url_path()); + } else { + local_cargo_doc_path.push("index.html"); + } + + let Ok(contents) = fs.load(&local_cargo_doc_path).await else { + return Ok(None); + }; + + Ok(Some(contents)) + } + .boxed() + } + }) + .await } } @@ -182,50 +91,152 @@ pub struct DocsDotRsProvider { } impl DocsDotRsProvider { + pub fn id() -> ProviderId { + ProviderId("docs-rs".into()) + } + pub fn new(http_client: Arc) -> Self { Self { http_client } } } #[async_trait] -impl RustdocProvider for DocsDotRsProvider { - async fn fetch_page( - &self, - crate_name: &PackageName, - item: Option<&RustdocItem>, - ) -> Result> { - let version = "latest"; - let path = format!( - "{crate_name}/{version}/{crate_name}{item_path}", - item_path = item - .map(|item| format!("/{}", item.url_path())) - .unwrap_or_default() - ); +impl IndexedDocsProvider for DocsDotRsProvider { + fn id(&self) -> ProviderId { + Self::id() + } - let mut response = self - .http_client - .get( - &format!("https://docs.rs/{path}"), - AsyncBody::default(), - true, - ) - .await?; + fn database_path(&self) -> PathBuf { + paths::support_dir().join("docs/rust/docs-rs-db.1.mdb") + } - let mut body = Vec::new(); - response - .body_mut() - .read_to_end(&mut body) - .await - .context("error reading docs.rs response body")?; + async fn index(&self, package: PackageName, database: Arc) -> Result<()> { + index_rustdoc(package, database, { + move |crate_name, item| { + let http_client = self.http_client.clone(); + let crate_name = crate_name.clone(); + let item = item.cloned(); + async move { + let version = "latest"; + let path = format!( + "{crate_name}/{version}/{crate_name}{item_path}", + item_path = item + .map(|item| format!("/{}", item.url_path())) + .unwrap_or_default() + ); - if response.status().is_client_error() { - let text = String::from_utf8_lossy(body.as_slice()); - bail!( - "status error {}, response: {text:?}", - response.status().as_u16() - ); - } + let mut response = http_client + .get( + &format!("https://docs.rs/{path}"), + AsyncBody::default(), + true, + ) + .await?; - Ok(Some(String::from_utf8(body)?)) + let mut body = Vec::new(); + response + .body_mut() + .read_to_end(&mut body) + .await + .context("error reading docs.rs response body")?; + + if response.status().is_client_error() { + let text = String::from_utf8_lossy(body.as_slice()); + bail!( + "status error {}, response: {text:?}", + response.status().as_u16() + ); + } + + Ok(Some(String::from_utf8(body)?)) + } + .boxed() + } + }) + .await } } + +async fn index_rustdoc( + package: PackageName, + database: Arc, + fetch_page: impl Fn(&PackageName, Option<&RustdocItem>) -> BoxFuture<'static, Result>> + + Send + + Sync, +) -> Result<()> { + let Some(package_root_content) = fetch_page(&package, None).await? else { + return Ok(()); + }; + + let (crate_root_markdown, items) = + convert_rustdoc_to_markdown(package_root_content.as_bytes())?; + + database + .insert(package.to_string(), crate_root_markdown) + .await?; + + let mut seen_items = HashSet::from_iter(items.clone()); + let mut items_to_visit: VecDeque = + VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory { + item, + #[cfg(debug_assertions)] + history: Vec::new(), + })); + + while let Some(item_with_history) = items_to_visit.pop_front() { + let item = &item_with_history.item; + + let Some(result) = fetch_page(&package, Some(&item)).await.with_context(|| { + #[cfg(debug_assertions)] + { + format!( + "failed to fetch {item:?}: {history:?}", + history = item_with_history.history + ) + } + + #[cfg(not(debug_assertions))] + { + format!("failed to fetch {item:?}") + } + })? + else { + continue; + }; + + let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?; + + database + .insert(format!("{package}::{}", item.display()), markdown) + .await?; + + let parent_item = item; + for mut item in referenced_items { + if seen_items.contains(&item) { + continue; + } + + seen_items.insert(item.clone()); + + item.path.extend(parent_item.path.clone()); + match parent_item.kind { + RustdocItemKind::Mod => { + item.path.push(parent_item.name.clone()); + } + _ => {} + } + + items_to_visit.push_back(RustdocItemWithHistory { + #[cfg(debug_assertions)] + history: { + let mut history = item_with_history.history.clone(); + history.push(item.url_path()); + history + }, + item, + }); + } + } + + Ok(()) +} diff --git a/crates/indexed_docs/src/store.rs b/crates/indexed_docs/src/store.rs index f1a66ff3e4..7129dfce09 100644 --- a/crates/indexed_docs/src/store.rs +++ b/crates/indexed_docs/src/store.rs @@ -21,12 +21,6 @@ use crate::IndexedDocsRegistry; #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)] pub struct ProviderId(pub Arc); -impl ProviderId { - pub fn rustdoc() -> Self { - Self("rustdoc".into()) - } -} - /// The name of a package. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)] pub struct PackageName(Arc);