From 746038128523dcefaec47add5a13e1d9afdf55bc Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Tue, 2 Jul 2024 13:14:56 -0400 Subject: [PATCH] Start work on genericizing `/rustdoc` (#13745) This PR begins the process of making the backing infrastructure for the `/rustdoc` command more generic such that it can be applied to additional documentation providers. In this PR we: - Rename the `rustdoc` crate to `indexed_docs` as a more general-purpose name - Start moving rustdoc-specific functionality into `indexed_docs::providers::rustdoc` - Add an `IndexedDocsRegistry` to hold multiple `IndexedDocsStore`s (one per provider) We haven't yet removed the rustdoc-specific bits in the `DocsIndexer`. That will follow soon. Release Notes: - N/A --- Cargo.lock | 52 ++-- Cargo.toml | 4 +- crates/assistant/Cargo.toml | 4 +- crates/assistant/src/assistant.rs | 11 +- crates/assistant/src/assistant_panel.rs | 8 +- .../src/slash_command/rustdoc_command.rs | 18 +- crates/{rustdoc => indexed_docs}/Cargo.toml | 4 +- crates/{rustdoc => indexed_docs}/LICENSE-GPL | 0 crates/indexed_docs/src/indexed_docs.rs | 8 + crates/indexed_docs/src/indexer.rs | 122 ++++++++++ crates/indexed_docs/src/providers.rs | 1 + crates/indexed_docs/src/providers/rustdoc.rs | 117 +++++++++ .../src/providers/rustdoc}/item.rs | 0 .../src/providers/rustdoc}/to_markdown.rs | 0 crates/indexed_docs/src/registry.rs | 47 ++++ crates/{rustdoc => indexed_docs}/src/store.rs | 149 ++++++------ crates/rustdoc/src/indexer.rs | 226 ------------------ crates/rustdoc/src/rustdoc.rs | 9 - 18 files changed, 425 insertions(+), 355 deletions(-) rename crates/{rustdoc => indexed_docs}/Cargo.toml (92%) rename crates/{rustdoc => indexed_docs}/LICENSE-GPL (100%) create mode 100644 crates/indexed_docs/src/indexed_docs.rs create mode 100644 crates/indexed_docs/src/indexer.rs create mode 100644 crates/indexed_docs/src/providers.rs create mode 100644 crates/indexed_docs/src/providers/rustdoc.rs rename crates/{rustdoc/src => indexed_docs/src/providers/rustdoc}/item.rs (100%) rename crates/{rustdoc/src => indexed_docs/src/providers/rustdoc}/to_markdown.rs (100%) create mode 100644 crates/indexed_docs/src/registry.rs rename crates/{rustdoc => indexed_docs}/src/store.rs (63%) delete mode 100644 crates/rustdoc/src/indexer.rs delete mode 100644 crates/rustdoc/src/rustdoc.rs diff --git a/Cargo.lock b/Cargo.lock index 76ed7eedac..d0e0284d1e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -391,6 +391,7 @@ dependencies = [ "heed", "html_to_markdown 0.1.0", "http 0.1.0", + "indexed_docs", "indoc", "language", "log", @@ -406,7 +407,6 @@ dependencies = [ "rand 0.8.5", "regex", "rope", - "rustdoc", "schemars", "search", "semantic_index", @@ -5493,6 +5493,31 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126" +[[package]] +name = "indexed_docs" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "collections", + "derive_more", + "fs", + "futures 0.3.28", + "fuzzy", + "gpui", + "heed", + "html_to_markdown 0.1.0", + "http 0.1.0", + "indexmap 1.9.3", + "indoc", + "parking_lot", + "paths", + "pretty_assertions", + "serde", + "strum", + "util", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -9019,31 +9044,6 @@ dependencies = [ "semver", ] -[[package]] -name = "rustdoc" -version = "0.1.0" -dependencies = [ - "anyhow", - "async-trait", - "collections", - "derive_more", - "fs", - "futures 0.3.28", - "fuzzy", - "gpui", - "heed", - "html_to_markdown 0.1.0", - "http 0.1.0", - "indexmap 1.9.3", - "indoc", - "parking_lot", - "paths", - "pretty_assertions", - "serde", - "strum", - "util", -] - [[package]] name = "rustix" version = "0.37.23" diff --git a/Cargo.toml b/Cargo.toml index d034661f9e..d5d27267ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ members = [ "crates/html_to_markdown", "crates/http", "crates/image_viewer", + "crates/indexed_docs", "crates/inline_completion_button", "crates/install_cli", "crates/journal", @@ -82,7 +83,6 @@ members = [ "crates/rich_text", "crates/rope", "crates/rpc", - "crates/rustdoc", "crates/search", "crates/semantic_index", "crates/semantic_version", @@ -198,6 +198,7 @@ headless = { path = "crates/headless" } html_to_markdown = { path = "crates/html_to_markdown" } http = { path = "crates/http" } image_viewer = { path = "crates/image_viewer" } +indexed_docs = { path = "crates/indexed_docs" } inline_completion_button = { path = "crates/inline_completion_button" } install_cli = { path = "crates/install_cli" } journal = { path = "crates/journal" } @@ -235,7 +236,6 @@ repl = { path = "crates/repl" } rich_text = { path = "crates/rich_text" } rope = { path = "crates/rope" } rpc = { path = "crates/rpc" } -rustdoc = { path = "crates/rustdoc" } search = { path = "crates/search" } semantic_index = { path = "crates/semantic_index" } semantic_version = { path = "crates/semantic_version" } diff --git a/crates/assistant/Cargo.toml b/crates/assistant/Cargo.toml index a9397bf3c1..97f308e084 100644 --- a/crates/assistant/Cargo.toml +++ b/crates/assistant/Cargo.toml @@ -13,8 +13,8 @@ path = "src/assistant.rs" doctest = false [dependencies] -anyhow.workspace = true anthropic = { workspace = true, features = ["schemars"] } +anyhow.workspace = true assistant_slash_command.workspace = true async-watch.workspace = true cargo_toml.workspace = true @@ -32,6 +32,7 @@ gpui.workspace = true heed.workspace = true html_to_markdown.workspace = true http.workspace = true +indexed_docs.workspace = true indoc.workspace = true language.workspace = true log.workspace = true @@ -45,7 +46,6 @@ paths.workspace = true project.workspace = true regex.workspace = true rope.workspace = true -rustdoc.workspace = true schemars.workspace = true search.workspace = true semantic_index.workspace = true diff --git a/crates/assistant/src/assistant.rs b/crates/assistant/src/assistant.rs index 8a2bbcb0f0..47c7ce5cd4 100644 --- a/crates/assistant/src/assistant.rs +++ b/crates/assistant/src/assistant.rs @@ -20,9 +20,9 @@ pub(crate) use completion_provider::*; pub(crate) use context_store::*; use fs::Fs; use gpui::{actions, AppContext, Global, SharedString, UpdateGlobal}; +use indexed_docs::{IndexedDocsRegistry, Provider}; pub(crate) use inline_assistant::*; pub(crate) use model_selector::*; -use rustdoc::RustdocStore; use semantic_index::{CloudEmbeddingProvider, SemanticIndex}; use serde::{Deserialize, Serialize}; use settings::{Settings, SettingsStore}; @@ -292,7 +292,8 @@ pub fn init(fs: Arc, client: Arc, cx: &mut AppContext) { assistant_panel::init(cx); inline_assistant::init(fs.clone(), client.telemetry().clone(), cx); terminal_inline_assistant::init(fs.clone(), client.telemetry().clone(), cx); - RustdocStore::init_global(cx); + IndexedDocsRegistry::init_global(cx); + register_indexed_docs_providers(cx); CommandPaletteFilter::update_global(cx, |filter, _cx| { filter.hide_namespace(Assistant::NAMESPACE); @@ -327,6 +328,12 @@ fn register_slash_commands(cx: &mut AppContext) { slash_command_registry.register_command(fetch_command::FetchSlashCommand, false); } +fn register_indexed_docs_providers(cx: &mut AppContext) { + let indexed_docs_registry = IndexedDocsRegistry::global(cx); + + indexed_docs_registry.register_provider(Provider::rustdoc()); +} + pub fn humanize_token_count(count: usize) -> String { match count { 0..=999 => count.to_string(), diff --git a/crates/assistant/src/assistant_panel.rs b/crates/assistant/src/assistant_panel.rs index c99abb7cf9..529c14b094 100644 --- a/crates/assistant/src/assistant_panel.rs +++ b/crates/assistant/src/assistant_panel.rs @@ -39,6 +39,7 @@ use gpui::{ Subscription, Task, Transformation, UpdateGlobal, View, ViewContext, VisualContext, WeakView, WindowContext, }; +use indexed_docs::{IndexedDocsStore, PackageName, ProviderId}; use language::{ language_settings::SoftWrap, AnchorRangeExt as _, AutoindentMode, Buffer, LanguageRegistry, LspAdapterDelegate, OffsetRangeExt as _, Point, ToOffset as _, @@ -47,7 +48,6 @@ use multi_buffer::MultiBufferRow; use paths::contexts_dir; use picker::{Picker, PickerDelegate}; use project::{Project, ProjectLspAdapterDelegate, ProjectTransaction}; -use rustdoc::{CrateName, RustdocStore}; use search::{buffer_search::DivRegistrar, BufferSearchBar}; use settings::Settings; use std::{ @@ -3410,7 +3410,9 @@ fn render_rustdoc_slash_command_trailer( command: PendingSlashCommand, cx: &mut WindowContext, ) -> AnyElement { - let rustdoc_store = RustdocStore::global(cx); + let Some(rustdoc_store) = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx).ok() else { + return Empty.into_any(); + }; let Some((crate_name, _)) = command .argument @@ -3420,7 +3422,7 @@ fn render_rustdoc_slash_command_trailer( return Empty.into_any(); }; - let crate_name = CrateName::from(crate_name); + let crate_name = PackageName::from(crate_name); if !rustdoc_store.is_indexing(&crate_name) { return Empty.into_any(); } diff --git a/crates/assistant/src/slash_command/rustdoc_command.rs b/crates/assistant/src/slash_command/rustdoc_command.rs index 1658852e3c..72a4ae8d1d 100644 --- a/crates/assistant/src/slash_command/rustdoc_command.rs +++ b/crates/assistant/src/slash_command/rustdoc_command.rs @@ -8,9 +8,12 @@ use fs::Fs; use futures::AsyncReadExt; use gpui::{AppContext, Model, Task, WeakView}; use http::{AsyncBody, HttpClient, HttpClientWithUrl}; +use indexed_docs::{ + convert_rustdoc_to_markdown, IndexedDocsStore, LocalProvider, PackageName, ProviderId, + RustdocSource, +}; use language::LspAdapterDelegate; use project::{Project, ProjectPath}; -use rustdoc::{convert_rustdoc_to_markdown, CrateName, LocalProvider, RustdocSource, RustdocStore}; use ui::prelude::*; use util::{maybe, ResultExt}; use workspace::Workspace; @@ -21,7 +24,7 @@ impl RustdocSlashCommand { async fn build_message( fs: Arc, http_client: Arc, - crate_name: CrateName, + crate_name: PackageName, module_path: Vec, path_to_cargo_toml: Option<&Path>, ) -> Result<(RustdocSource, String)> { @@ -127,8 +130,10 @@ impl SlashCommand for RustdocSlashCommand { anyhow::Ok((fs, cargo_workspace_root)) }); - let store = RustdocStore::global(cx); + let store = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx); cx.background_executor().spawn(async move { + let store = store?; + if let Some((crate_name, rest)) = query.split_once(':') { if rest.is_empty() { if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() { @@ -169,16 +174,17 @@ impl SlashCommand for RustdocSlashCommand { .next() .ok_or_else(|| anyhow!("missing crate name")) { - Ok(crate_name) => CrateName::from(crate_name), + Ok(crate_name) => PackageName::from(crate_name), Err(err) => return Task::ready(Err(err)), }; let item_path = path_components.map(ToString::to_string).collect::>(); let text = cx.background_executor().spawn({ - let rustdoc_store = RustdocStore::global(cx); + let rustdoc_store = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx); let crate_name = crate_name.clone(); let item_path = item_path.clone(); async move { + let rustdoc_store = rustdoc_store?; let item_docs = rustdoc_store .load( crate_name.clone(), @@ -191,7 +197,7 @@ impl SlashCommand for RustdocSlashCommand { .await; if let Ok(item_docs) = item_docs { - anyhow::Ok((RustdocSource::Index, item_docs.docs().to_owned())) + anyhow::Ok((RustdocSource::Index, item_docs.to_string())) } else { Self::build_message( fs, diff --git a/crates/rustdoc/Cargo.toml b/crates/indexed_docs/Cargo.toml similarity index 92% rename from crates/rustdoc/Cargo.toml rename to crates/indexed_docs/Cargo.toml index 9d63736de4..e1609116d1 100644 --- a/crates/rustdoc/Cargo.toml +++ b/crates/indexed_docs/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "rustdoc" +name = "indexed_docs" version = "0.1.0" edition = "2021" publish = false @@ -9,7 +9,7 @@ license = "GPL-3.0-or-later" workspace = true [lib] -path = "src/rustdoc.rs" +path = "src/indexed_docs.rs" [dependencies] anyhow.workspace = true diff --git a/crates/rustdoc/LICENSE-GPL b/crates/indexed_docs/LICENSE-GPL similarity index 100% rename from crates/rustdoc/LICENSE-GPL rename to crates/indexed_docs/LICENSE-GPL diff --git a/crates/indexed_docs/src/indexed_docs.rs b/crates/indexed_docs/src/indexed_docs.rs new file mode 100644 index 0000000000..c49f2ca238 --- /dev/null +++ b/crates/indexed_docs/src/indexed_docs.rs @@ -0,0 +1,8 @@ +mod indexer; +mod providers; +mod registry; +mod store; + +pub use crate::providers::rustdoc::*; +pub use crate::registry::*; +pub use crate::store::*; diff --git a/crates/indexed_docs/src/indexer.rs b/crates/indexed_docs/src/indexer.rs new file mode 100644 index 0000000000..75069c13db --- /dev/null +++ b/crates/indexed_docs/src/indexer.rs @@ -0,0 +1,122 @@ +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use collections::{HashSet, VecDeque}; + +use crate::{ + convert_rustdoc_to_markdown, IndexedDocsDatabase, PackageName, RustdocItem, RustdocItemKind, +}; + +#[async_trait] +pub trait IndexedDocsProvider { + async fn fetch_page( + &self, + package: &PackageName, + item: Option<&RustdocItem>, + ) -> Result>; +} + +#[derive(Debug)] +struct RustdocItemWithHistory { + pub item: RustdocItem, + #[cfg(debug_assertions)] + pub history: Vec, +} + +pub(crate) struct DocsIndexer { + database: Arc, + provider: Box, +} + +impl DocsIndexer { + pub fn new( + database: Arc, + provider: Box, + ) -> Self { + Self { database, provider } + } + + /// Indexes the package with the given name. + pub async fn index(&self, package: PackageName) -> Result<()> { + let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else { + return Ok(()); + }; + + let (crate_root_markdown, items) = + convert_rustdoc_to_markdown(package_root_content.as_bytes())?; + + self.database + .insert(package.clone(), None, crate_root_markdown) + .await?; + + let mut seen_items = HashSet::from_iter(items.clone()); + let mut items_to_visit: VecDeque = + VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory { + item, + #[cfg(debug_assertions)] + history: Vec::new(), + })); + + while let Some(item_with_history) = items_to_visit.pop_front() { + let item = &item_with_history.item; + + let Some(result) = self + .provider + .fetch_page(&package, Some(&item)) + .await + .with_context(|| { + #[cfg(debug_assertions)] + { + format!( + "failed to fetch {item:?}: {history:?}", + history = item_with_history.history + ) + } + + #[cfg(not(debug_assertions))] + { + format!("failed to fetch {item:?}") + } + })? + else { + continue; + }; + + let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?; + + self.database + .insert(package.clone(), Some(item), markdown) + .await?; + + let parent_item = item; + for mut item in referenced_items { + if seen_items.contains(&item) { + continue; + } + + seen_items.insert(item.clone()); + + item.path.extend(parent_item.path.clone()); + match parent_item.kind { + RustdocItemKind::Mod => { + item.path.push(parent_item.name.clone()); + } + _ => {} + } + + items_to_visit.push_back(RustdocItemWithHistory { + #[cfg(debug_assertions)] + history: { + let mut history = item_with_history.history.clone(); + history.push(item.url_path()); + history + }, + item, + }); + } + } + + Ok(()) + } +} diff --git a/crates/indexed_docs/src/providers.rs b/crates/indexed_docs/src/providers.rs new file mode 100644 index 0000000000..c6505a2ab6 --- /dev/null +++ b/crates/indexed_docs/src/providers.rs @@ -0,0 +1 @@ +pub mod rustdoc; diff --git a/crates/indexed_docs/src/providers/rustdoc.rs b/crates/indexed_docs/src/providers/rustdoc.rs new file mode 100644 index 0000000000..3073504adf --- /dev/null +++ b/crates/indexed_docs/src/providers/rustdoc.rs @@ -0,0 +1,117 @@ +mod item; +mod to_markdown; + +pub use item::*; +pub use to_markdown::convert_rustdoc_to_markdown; + +use std::path::PathBuf; +use std::sync::Arc; + +use anyhow::{bail, Context, Result}; +use async_trait::async_trait; +use fs::Fs; +use futures::AsyncReadExt; +use http::{AsyncBody, HttpClient, HttpClientWithUrl}; + +use crate::indexer::IndexedDocsProvider; +use crate::PackageName; + +#[derive(Debug, Clone, Copy)] +pub enum RustdocSource { + /// The docs were sourced from Zed's rustdoc index. + Index, + /// The docs were sourced from local `cargo doc` output. + Local, + /// The docs were sourced from `docs.rs`. + DocsDotRs, +} + +pub struct LocalProvider { + fs: Arc, + cargo_workspace_root: PathBuf, +} + +impl LocalProvider { + pub fn new(fs: Arc, cargo_workspace_root: PathBuf) -> Self { + Self { + fs, + cargo_workspace_root, + } + } +} + +#[async_trait] +impl IndexedDocsProvider for LocalProvider { + async fn fetch_page( + &self, + crate_name: &PackageName, + item: Option<&RustdocItem>, + ) -> Result> { + let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc"); + local_cargo_doc_path.push(crate_name.as_ref()); + if let Some(item) = item { + local_cargo_doc_path.push(item.url_path()); + } else { + local_cargo_doc_path.push("index.html"); + } + + let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else { + return Ok(None); + }; + + Ok(Some(contents)) + } +} + +pub struct DocsDotRsProvider { + http_client: Arc, +} + +impl DocsDotRsProvider { + pub fn new(http_client: Arc) -> Self { + Self { http_client } + } +} + +#[async_trait] +impl IndexedDocsProvider for DocsDotRsProvider { + async fn fetch_page( + &self, + crate_name: &PackageName, + item: Option<&RustdocItem>, + ) -> Result> { + let version = "latest"; + let path = format!( + "{crate_name}/{version}/{crate_name}{item_path}", + item_path = item + .map(|item| format!("/{}", item.url_path())) + .unwrap_or_default() + ); + + let mut response = self + .http_client + .get( + &format!("https://docs.rs/{path}"), + AsyncBody::default(), + true, + ) + .await?; + + let mut body = Vec::new(); + response + .body_mut() + .read_to_end(&mut body) + .await + .context("error reading docs.rs response body")?; + + if response.status().is_client_error() { + let text = String::from_utf8_lossy(body.as_slice()); + bail!( + "status error {}, response: {text:?}", + response.status().as_u16() + ); + } + + Ok(Some(String::from_utf8(body)?)) + } +} diff --git a/crates/rustdoc/src/item.rs b/crates/indexed_docs/src/providers/rustdoc/item.rs similarity index 100% rename from crates/rustdoc/src/item.rs rename to crates/indexed_docs/src/providers/rustdoc/item.rs diff --git a/crates/rustdoc/src/to_markdown.rs b/crates/indexed_docs/src/providers/rustdoc/to_markdown.rs similarity index 100% rename from crates/rustdoc/src/to_markdown.rs rename to crates/indexed_docs/src/providers/rustdoc/to_markdown.rs diff --git a/crates/indexed_docs/src/registry.rs b/crates/indexed_docs/src/registry.rs new file mode 100644 index 0000000000..b47f46e151 --- /dev/null +++ b/crates/indexed_docs/src/registry.rs @@ -0,0 +1,47 @@ +use std::sync::Arc; + +use collections::HashMap; +use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, UpdateGlobal}; +use parking_lot::RwLock; + +use crate::{IndexedDocsStore, Provider, ProviderId}; + +struct GlobalIndexedDocsRegistry(Arc); + +impl Global for GlobalIndexedDocsRegistry {} + +pub struct IndexedDocsRegistry { + executor: BackgroundExecutor, + stores_by_provider: RwLock>>, +} + +impl IndexedDocsRegistry { + pub fn global(cx: &AppContext) -> Arc { + GlobalIndexedDocsRegistry::global(cx).0.clone() + } + + pub fn init_global(cx: &mut AppContext) { + GlobalIndexedDocsRegistry::set_global( + cx, + GlobalIndexedDocsRegistry(Arc::new(Self::new(cx.background_executor().clone()))), + ); + } + + pub fn new(executor: BackgroundExecutor) -> Self { + Self { + executor, + stores_by_provider: RwLock::new(HashMap::default()), + } + } + + pub fn register_provider(&self, provider: Provider) { + self.stores_by_provider.write().insert( + provider.id.clone(), + Arc::new(IndexedDocsStore::new(provider, self.executor.clone())), + ); + } + + pub fn get_provider_store(&self, provider_id: ProviderId) -> Option> { + self.stores_by_provider.read().get(&provider_id).cloned() + } +} diff --git a/crates/rustdoc/src/store.rs b/crates/indexed_docs/src/store.rs similarity index 63% rename from crates/rustdoc/src/store.rs rename to crates/indexed_docs/src/store.rs index 8a8a2fd9f7..8017d87819 100644 --- a/crates/rustdoc/src/store.rs +++ b/crates/indexed_docs/src/store.rs @@ -8,59 +8,71 @@ use derive_more::{Deref, Display}; use futures::future::{self, BoxFuture, Shared}; use futures::FutureExt; use fuzzy::StringMatchCandidate; -use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal}; +use gpui::{AppContext, BackgroundExecutor, Task}; use heed::types::SerdeBincode; use heed::Database; use parking_lot::RwLock; use serde::{Deserialize, Serialize}; use util::ResultExt; -use crate::indexer::{RustdocIndexer, RustdocProvider}; -use crate::{RustdocItem, RustdocItemKind}; +use crate::indexer::{DocsIndexer, IndexedDocsProvider}; +use crate::{IndexedDocsRegistry, RustdocItem}; -/// The name of a crate. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)] -pub struct CrateName(Arc); +pub struct ProviderId(Arc); -impl From<&str> for CrateName { +impl ProviderId { + pub fn rustdoc() -> Self { + Self("rustdoc".into()) + } +} + +pub struct Provider { + pub id: ProviderId, + pub database_path: PathBuf, +} + +impl Provider { + pub fn rustdoc() -> Self { + Self { + id: ProviderId("rustdoc".into()), + database_path: paths::support_dir().join("docs/rust/rustdoc-db.1.mdb"), + } + } +} + +/// The name of a package. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)] +pub struct PackageName(Arc); + +impl From<&str> for PackageName { fn from(value: &str) -> Self { Self(value.into()) } } -struct GlobalRustdocStore(Arc); - -impl Global for GlobalRustdocStore {} - -pub struct RustdocStore { +/// A store for indexed docs. +pub struct IndexedDocsStore { executor: BackgroundExecutor, - database_future: Shared, Arc>>>, - indexing_tasks_by_crate: - RwLock>>>>>, + database_future: + Shared, Arc>>>, + indexing_tasks_by_package: + RwLock>>>>>, } -impl RustdocStore { - pub fn global(cx: &AppContext) -> Arc { - GlobalRustdocStore::global(cx).0.clone() +impl IndexedDocsStore { + pub fn try_global(provider: ProviderId, cx: &AppContext) -> Result> { + let registry = IndexedDocsRegistry::global(cx); + registry + .get_provider_store(provider.clone()) + .ok_or_else(|| anyhow!("no indexed docs store found for {provider}")) } - pub fn init_global(cx: &mut AppContext) { - GlobalRustdocStore::set_global( - cx, - GlobalRustdocStore(Arc::new(Self::new(cx.background_executor().clone()))), - ); - } - - pub fn new(executor: BackgroundExecutor) -> Self { + pub fn new(provider: Provider, executor: BackgroundExecutor) -> Self { let database_future = executor .spawn({ let executor = executor.clone(); - async move { - RustdocDatabase::new( - paths::support_dir().join("docs/rust/rustdoc-db.0.mdb"), - executor, - ) - } + async move { IndexedDocsDatabase::new(provider.database_path, executor) } }) .then(|result| future::ready(result.map(Arc::new).map_err(Arc::new))) .boxed() @@ -69,34 +81,34 @@ impl RustdocStore { Self { executor, database_future, - indexing_tasks_by_crate: RwLock::new(HashMap::default()), + indexing_tasks_by_package: RwLock::new(HashMap::default()), } } - /// Returns whether the crate with the given name is currently being indexed. - pub fn is_indexing(&self, crate_name: &CrateName) -> bool { - self.indexing_tasks_by_crate.read().contains_key(crate_name) + /// Returns whether the package with the given name is currently being indexed. + pub fn is_indexing(&self, package: &PackageName) -> bool { + self.indexing_tasks_by_package.read().contains_key(package) } pub async fn load( &self, - crate_name: CrateName, + package: PackageName, item_path: Option, - ) -> Result { + ) -> Result { self.database_future .clone() .await .map_err(|err| anyhow!(err))? - .load(crate_name, item_path) + .load(package, item_path) .await } pub fn index( self: Arc, - crate_name: CrateName, - provider: Box, + package: PackageName, + provider: Box, ) -> Shared>>> { - if let Some(existing_task) = self.indexing_tasks_by_crate.read().get(&crate_name) { + if let Some(existing_task) = self.indexing_tasks_by_package.read().get(&package) { return existing_task.clone(); } @@ -104,13 +116,13 @@ impl RustdocStore { .executor .spawn({ let this = self.clone(); - let crate_name = crate_name.clone(); + let package = package.clone(); async move { let _finally = util::defer({ let this = this.clone(); - let crate_name = crate_name.clone(); + let package = package.clone(); move || { - this.indexing_tasks_by_crate.write().remove(&crate_name); + this.indexing_tasks_by_package.write().remove(&package); } }); @@ -120,9 +132,9 @@ impl RustdocStore { .clone() .await .map_err(|err| anyhow!(err))?; - let indexer = RustdocIndexer::new(database, provider); + let indexer = DocsIndexer::new(database, provider); - indexer.index(crate_name.clone()).await + indexer.index(package.clone()).await }; index_task.await.map_err(Arc::new) @@ -130,9 +142,9 @@ impl RustdocStore { }) .shared(); - self.indexing_tasks_by_crate + self.indexing_tasks_by_package .write() - .insert(crate_name, indexing_task.clone()); + .insert(package, indexing_task.clone()); indexing_task } @@ -177,27 +189,16 @@ impl RustdocStore { } } -#[derive(Serialize, Deserialize)] -pub enum RustdocDatabaseEntry { - Crate { docs: String }, - Item { kind: RustdocItemKind, docs: String }, -} +#[derive(Debug, PartialEq, Eq, Clone, Display, Serialize, Deserialize)] +pub struct MarkdownDocs(pub String); -impl RustdocDatabaseEntry { - pub fn docs(&self) -> &str { - match self { - Self::Crate { docs } | Self::Item { docs, .. } => &docs, - } - } -} - -pub(crate) struct RustdocDatabase { +pub(crate) struct IndexedDocsDatabase { executor: BackgroundExecutor, env: heed::Env, - entries: Database, SerdeBincode>, + entries: Database, SerdeBincode>, } -impl RustdocDatabase { +impl IndexedDocsDatabase { pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result { std::fs::create_dir_all(&path)?; @@ -238,15 +239,15 @@ impl RustdocDatabase { pub fn load( &self, - crate_name: CrateName, + package: PackageName, item_path: Option, - ) -> Task> { + ) -> Task> { let env = self.env.clone(); let entries = self.entries; let item_path = if let Some(item_path) = item_path { - format!("{crate_name}::{item_path}") + format!("{package}::{item_path}") } else { - crate_name.to_string() + package.to_string() }; self.executor.spawn(async move { @@ -259,22 +260,16 @@ impl RustdocDatabase { pub fn insert( &self, - crate_name: CrateName, + package: PackageName, item: Option<&RustdocItem>, docs: String, ) -> Task> { let env = self.env.clone(); let entries = self.entries; let (item_path, entry) = if let Some(item) = item { - ( - format!("{crate_name}::{}", item.display()), - RustdocDatabaseEntry::Item { - kind: item.kind, - docs, - }, - ) + (format!("{package}::{}", item.display()), MarkdownDocs(docs)) } else { - (crate_name.to_string(), RustdocDatabaseEntry::Crate { docs }) + (package.to_string(), MarkdownDocs(docs)) }; self.executor.spawn(async move { diff --git a/crates/rustdoc/src/indexer.rs b/crates/rustdoc/src/indexer.rs deleted file mode 100644 index 81253e1d14..0000000000 --- a/crates/rustdoc/src/indexer.rs +++ /dev/null @@ -1,226 +0,0 @@ -use std::path::PathBuf; -use std::sync::Arc; - -use anyhow::{bail, Context, Result}; -use async_trait::async_trait; -use collections::{HashSet, VecDeque}; -use fs::Fs; -use futures::AsyncReadExt; -use http::{AsyncBody, HttpClient, HttpClientWithUrl}; - -use crate::{ - convert_rustdoc_to_markdown, CrateName, RustdocDatabase, RustdocItem, RustdocItemKind, -}; - -#[derive(Debug, Clone, Copy)] -pub enum RustdocSource { - /// The docs were sourced from Zed's rustdoc index. - Index, - /// The docs were sourced from local `cargo doc` output. - Local, - /// The docs were sourced from `docs.rs`. - DocsDotRs, -} - -#[async_trait] -pub trait RustdocProvider { - async fn fetch_page( - &self, - crate_name: &CrateName, - item: Option<&RustdocItem>, - ) -> Result>; -} - -pub struct LocalProvider { - fs: Arc, - cargo_workspace_root: PathBuf, -} - -impl LocalProvider { - pub fn new(fs: Arc, cargo_workspace_root: PathBuf) -> Self { - Self { - fs, - cargo_workspace_root, - } - } -} - -#[async_trait] -impl RustdocProvider for LocalProvider { - async fn fetch_page( - &self, - crate_name: &CrateName, - item: Option<&RustdocItem>, - ) -> Result> { - let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc"); - local_cargo_doc_path.push(crate_name.as_ref()); - if let Some(item) = item { - local_cargo_doc_path.push(item.url_path()); - } else { - local_cargo_doc_path.push("index.html"); - } - - let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else { - return Ok(None); - }; - - Ok(Some(contents)) - } -} - -pub struct DocsDotRsProvider { - http_client: Arc, -} - -impl DocsDotRsProvider { - pub fn new(http_client: Arc) -> Self { - Self { http_client } - } -} - -#[async_trait] -impl RustdocProvider for DocsDotRsProvider { - async fn fetch_page( - &self, - crate_name: &CrateName, - item: Option<&RustdocItem>, - ) -> Result> { - let version = "latest"; - let path = format!( - "{crate_name}/{version}/{crate_name}{item_path}", - item_path = item - .map(|item| format!("/{}", item.url_path())) - .unwrap_or_default() - ); - - let mut response = self - .http_client - .get( - &format!("https://docs.rs/{path}"), - AsyncBody::default(), - true, - ) - .await?; - - let mut body = Vec::new(); - response - .body_mut() - .read_to_end(&mut body) - .await - .context("error reading docs.rs response body")?; - - if response.status().is_client_error() { - let text = String::from_utf8_lossy(body.as_slice()); - bail!( - "status error {}, response: {text:?}", - response.status().as_u16() - ); - } - - Ok(Some(String::from_utf8(body)?)) - } -} - -#[derive(Debug)] -struct RustdocItemWithHistory { - pub item: RustdocItem, - #[cfg(debug_assertions)] - pub history: Vec, -} - -pub(crate) struct RustdocIndexer { - database: Arc, - provider: Box, -} - -impl RustdocIndexer { - pub fn new( - database: Arc, - provider: Box, - ) -> Self { - Self { database, provider } - } - - /// Indexes the crate with the given name. - pub async fn index(&self, crate_name: CrateName) -> Result<()> { - let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else { - return Ok(()); - }; - - let (crate_root_markdown, items) = - convert_rustdoc_to_markdown(crate_root_content.as_bytes())?; - - self.database - .insert(crate_name.clone(), None, crate_root_markdown) - .await?; - - let mut seen_items = HashSet::from_iter(items.clone()); - let mut items_to_visit: VecDeque = - VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory { - item, - #[cfg(debug_assertions)] - history: Vec::new(), - })); - - while let Some(item_with_history) = items_to_visit.pop_front() { - let item = &item_with_history.item; - - let Some(result) = self - .provider - .fetch_page(&crate_name, Some(&item)) - .await - .with_context(|| { - #[cfg(debug_assertions)] - { - format!( - "failed to fetch {item:?}: {history:?}", - history = item_with_history.history - ) - } - - #[cfg(not(debug_assertions))] - { - format!("failed to fetch {item:?}") - } - })? - else { - continue; - }; - - let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?; - - self.database - .insert(crate_name.clone(), Some(item), markdown) - .await?; - - let parent_item = item; - for mut item in referenced_items { - if seen_items.contains(&item) { - continue; - } - - seen_items.insert(item.clone()); - - item.path.extend(parent_item.path.clone()); - match parent_item.kind { - RustdocItemKind::Mod => { - item.path.push(parent_item.name.clone()); - } - _ => {} - } - - items_to_visit.push_back(RustdocItemWithHistory { - #[cfg(debug_assertions)] - history: { - let mut history = item_with_history.history.clone(); - history.push(item.url_path()); - history - }, - item, - }); - } - } - - Ok(()) - } -} diff --git a/crates/rustdoc/src/rustdoc.rs b/crates/rustdoc/src/rustdoc.rs deleted file mode 100644 index 5bf300a5f4..0000000000 --- a/crates/rustdoc/src/rustdoc.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod indexer; -mod item; -mod store; -mod to_markdown; - -pub use crate::indexer::{DocsDotRsProvider, LocalProvider, RustdocSource}; -pub use crate::item::*; -pub use crate::store::*; -pub use crate::to_markdown::convert_rustdoc_to_markdown;