Start work on genericizing /rustdoc (#13745)

This PR begins the process of making the backing infrastructure for the
`/rustdoc` command more generic such that it can be applied to
additional documentation providers.

In this PR we:

- Rename the `rustdoc` crate to `indexed_docs` as a more general-purpose
name
- Start moving rustdoc-specific functionality into
`indexed_docs::providers::rustdoc`
- Add an `IndexedDocsRegistry` to hold multiple `IndexedDocsStore`s (one
per provider)

We haven't yet removed the rustdoc-specific bits in the `DocsIndexer`.
That will follow soon.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-07-02 13:14:56 -04:00 committed by GitHub
parent eab98eb9c9
commit 7460381285
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 425 additions and 355 deletions

52
Cargo.lock generated
View File

@ -391,6 +391,7 @@ dependencies = [
"heed", "heed",
"html_to_markdown 0.1.0", "html_to_markdown 0.1.0",
"http 0.1.0", "http 0.1.0",
"indexed_docs",
"indoc", "indoc",
"language", "language",
"log", "log",
@ -406,7 +407,6 @@ dependencies = [
"rand 0.8.5", "rand 0.8.5",
"regex", "regex",
"rope", "rope",
"rustdoc",
"schemars", "schemars",
"search", "search",
"semantic_index", "semantic_index",
@ -5493,6 +5493,31 @@ version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126" checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126"
[[package]]
name = "indexed_docs"
version = "0.1.0"
dependencies = [
"anyhow",
"async-trait",
"collections",
"derive_more",
"fs",
"futures 0.3.28",
"fuzzy",
"gpui",
"heed",
"html_to_markdown 0.1.0",
"http 0.1.0",
"indexmap 1.9.3",
"indoc",
"parking_lot",
"paths",
"pretty_assertions",
"serde",
"strum",
"util",
]
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "1.9.3" version = "1.9.3"
@ -9019,31 +9044,6 @@ dependencies = [
"semver", "semver",
] ]
[[package]]
name = "rustdoc"
version = "0.1.0"
dependencies = [
"anyhow",
"async-trait",
"collections",
"derive_more",
"fs",
"futures 0.3.28",
"fuzzy",
"gpui",
"heed",
"html_to_markdown 0.1.0",
"http 0.1.0",
"indexmap 1.9.3",
"indoc",
"parking_lot",
"paths",
"pretty_assertions",
"serde",
"strum",
"util",
]
[[package]] [[package]]
name = "rustix" name = "rustix"
version = "0.37.23" version = "0.37.23"

View File

@ -45,6 +45,7 @@ members = [
"crates/html_to_markdown", "crates/html_to_markdown",
"crates/http", "crates/http",
"crates/image_viewer", "crates/image_viewer",
"crates/indexed_docs",
"crates/inline_completion_button", "crates/inline_completion_button",
"crates/install_cli", "crates/install_cli",
"crates/journal", "crates/journal",
@ -82,7 +83,6 @@ members = [
"crates/rich_text", "crates/rich_text",
"crates/rope", "crates/rope",
"crates/rpc", "crates/rpc",
"crates/rustdoc",
"crates/search", "crates/search",
"crates/semantic_index", "crates/semantic_index",
"crates/semantic_version", "crates/semantic_version",
@ -198,6 +198,7 @@ headless = { path = "crates/headless" }
html_to_markdown = { path = "crates/html_to_markdown" } html_to_markdown = { path = "crates/html_to_markdown" }
http = { path = "crates/http" } http = { path = "crates/http" }
image_viewer = { path = "crates/image_viewer" } image_viewer = { path = "crates/image_viewer" }
indexed_docs = { path = "crates/indexed_docs" }
inline_completion_button = { path = "crates/inline_completion_button" } inline_completion_button = { path = "crates/inline_completion_button" }
install_cli = { path = "crates/install_cli" } install_cli = { path = "crates/install_cli" }
journal = { path = "crates/journal" } journal = { path = "crates/journal" }
@ -235,7 +236,6 @@ repl = { path = "crates/repl" }
rich_text = { path = "crates/rich_text" } rich_text = { path = "crates/rich_text" }
rope = { path = "crates/rope" } rope = { path = "crates/rope" }
rpc = { path = "crates/rpc" } rpc = { path = "crates/rpc" }
rustdoc = { path = "crates/rustdoc" }
search = { path = "crates/search" } search = { path = "crates/search" }
semantic_index = { path = "crates/semantic_index" } semantic_index = { path = "crates/semantic_index" }
semantic_version = { path = "crates/semantic_version" } semantic_version = { path = "crates/semantic_version" }

View File

@ -13,8 +13,8 @@ path = "src/assistant.rs"
doctest = false doctest = false
[dependencies] [dependencies]
anyhow.workspace = true
anthropic = { workspace = true, features = ["schemars"] } anthropic = { workspace = true, features = ["schemars"] }
anyhow.workspace = true
assistant_slash_command.workspace = true assistant_slash_command.workspace = true
async-watch.workspace = true async-watch.workspace = true
cargo_toml.workspace = true cargo_toml.workspace = true
@ -32,6 +32,7 @@ gpui.workspace = true
heed.workspace = true heed.workspace = true
html_to_markdown.workspace = true html_to_markdown.workspace = true
http.workspace = true http.workspace = true
indexed_docs.workspace = true
indoc.workspace = true indoc.workspace = true
language.workspace = true language.workspace = true
log.workspace = true log.workspace = true
@ -45,7 +46,6 @@ paths.workspace = true
project.workspace = true project.workspace = true
regex.workspace = true regex.workspace = true
rope.workspace = true rope.workspace = true
rustdoc.workspace = true
schemars.workspace = true schemars.workspace = true
search.workspace = true search.workspace = true
semantic_index.workspace = true semantic_index.workspace = true

View File

@ -20,9 +20,9 @@ pub(crate) use completion_provider::*;
pub(crate) use context_store::*; pub(crate) use context_store::*;
use fs::Fs; use fs::Fs;
use gpui::{actions, AppContext, Global, SharedString, UpdateGlobal}; use gpui::{actions, AppContext, Global, SharedString, UpdateGlobal};
use indexed_docs::{IndexedDocsRegistry, Provider};
pub(crate) use inline_assistant::*; pub(crate) use inline_assistant::*;
pub(crate) use model_selector::*; pub(crate) use model_selector::*;
use rustdoc::RustdocStore;
use semantic_index::{CloudEmbeddingProvider, SemanticIndex}; use semantic_index::{CloudEmbeddingProvider, SemanticIndex};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore}; use settings::{Settings, SettingsStore};
@ -292,7 +292,8 @@ pub fn init(fs: Arc<dyn Fs>, client: Arc<Client>, cx: &mut AppContext) {
assistant_panel::init(cx); assistant_panel::init(cx);
inline_assistant::init(fs.clone(), client.telemetry().clone(), cx); inline_assistant::init(fs.clone(), client.telemetry().clone(), cx);
terminal_inline_assistant::init(fs.clone(), client.telemetry().clone(), cx); terminal_inline_assistant::init(fs.clone(), client.telemetry().clone(), cx);
RustdocStore::init_global(cx); IndexedDocsRegistry::init_global(cx);
register_indexed_docs_providers(cx);
CommandPaletteFilter::update_global(cx, |filter, _cx| { CommandPaletteFilter::update_global(cx, |filter, _cx| {
filter.hide_namespace(Assistant::NAMESPACE); filter.hide_namespace(Assistant::NAMESPACE);
@ -327,6 +328,12 @@ fn register_slash_commands(cx: &mut AppContext) {
slash_command_registry.register_command(fetch_command::FetchSlashCommand, false); slash_command_registry.register_command(fetch_command::FetchSlashCommand, false);
} }
fn register_indexed_docs_providers(cx: &mut AppContext) {
let indexed_docs_registry = IndexedDocsRegistry::global(cx);
indexed_docs_registry.register_provider(Provider::rustdoc());
}
pub fn humanize_token_count(count: usize) -> String { pub fn humanize_token_count(count: usize) -> String {
match count { match count {
0..=999 => count.to_string(), 0..=999 => count.to_string(),

View File

@ -39,6 +39,7 @@ use gpui::{
Subscription, Task, Transformation, UpdateGlobal, View, ViewContext, VisualContext, WeakView, Subscription, Task, Transformation, UpdateGlobal, View, ViewContext, VisualContext, WeakView,
WindowContext, WindowContext,
}; };
use indexed_docs::{IndexedDocsStore, PackageName, ProviderId};
use language::{ use language::{
language_settings::SoftWrap, AnchorRangeExt as _, AutoindentMode, Buffer, LanguageRegistry, language_settings::SoftWrap, AnchorRangeExt as _, AutoindentMode, Buffer, LanguageRegistry,
LspAdapterDelegate, OffsetRangeExt as _, Point, ToOffset as _, LspAdapterDelegate, OffsetRangeExt as _, Point, ToOffset as _,
@ -47,7 +48,6 @@ use multi_buffer::MultiBufferRow;
use paths::contexts_dir; use paths::contexts_dir;
use picker::{Picker, PickerDelegate}; use picker::{Picker, PickerDelegate};
use project::{Project, ProjectLspAdapterDelegate, ProjectTransaction}; use project::{Project, ProjectLspAdapterDelegate, ProjectTransaction};
use rustdoc::{CrateName, RustdocStore};
use search::{buffer_search::DivRegistrar, BufferSearchBar}; use search::{buffer_search::DivRegistrar, BufferSearchBar};
use settings::Settings; use settings::Settings;
use std::{ use std::{
@ -3410,7 +3410,9 @@ fn render_rustdoc_slash_command_trailer(
command: PendingSlashCommand, command: PendingSlashCommand,
cx: &mut WindowContext, cx: &mut WindowContext,
) -> AnyElement { ) -> AnyElement {
let rustdoc_store = RustdocStore::global(cx); let Some(rustdoc_store) = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx).ok() else {
return Empty.into_any();
};
let Some((crate_name, _)) = command let Some((crate_name, _)) = command
.argument .argument
@ -3420,7 +3422,7 @@ fn render_rustdoc_slash_command_trailer(
return Empty.into_any(); return Empty.into_any();
}; };
let crate_name = CrateName::from(crate_name); let crate_name = PackageName::from(crate_name);
if !rustdoc_store.is_indexing(&crate_name) { if !rustdoc_store.is_indexing(&crate_name) {
return Empty.into_any(); return Empty.into_any();
} }

View File

@ -8,9 +8,12 @@ use fs::Fs;
use futures::AsyncReadExt; use futures::AsyncReadExt;
use gpui::{AppContext, Model, Task, WeakView}; use gpui::{AppContext, Model, Task, WeakView};
use http::{AsyncBody, HttpClient, HttpClientWithUrl}; use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use indexed_docs::{
convert_rustdoc_to_markdown, IndexedDocsStore, LocalProvider, PackageName, ProviderId,
RustdocSource,
};
use language::LspAdapterDelegate; use language::LspAdapterDelegate;
use project::{Project, ProjectPath}; use project::{Project, ProjectPath};
use rustdoc::{convert_rustdoc_to_markdown, CrateName, LocalProvider, RustdocSource, RustdocStore};
use ui::prelude::*; use ui::prelude::*;
use util::{maybe, ResultExt}; use util::{maybe, ResultExt};
use workspace::Workspace; use workspace::Workspace;
@ -21,7 +24,7 @@ impl RustdocSlashCommand {
async fn build_message( async fn build_message(
fs: Arc<dyn Fs>, fs: Arc<dyn Fs>,
http_client: Arc<HttpClientWithUrl>, http_client: Arc<HttpClientWithUrl>,
crate_name: CrateName, crate_name: PackageName,
module_path: Vec<String>, module_path: Vec<String>,
path_to_cargo_toml: Option<&Path>, path_to_cargo_toml: Option<&Path>,
) -> Result<(RustdocSource, String)> { ) -> Result<(RustdocSource, String)> {
@ -127,8 +130,10 @@ impl SlashCommand for RustdocSlashCommand {
anyhow::Ok((fs, cargo_workspace_root)) anyhow::Ok((fs, cargo_workspace_root))
}); });
let store = RustdocStore::global(cx); let store = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx);
cx.background_executor().spawn(async move { cx.background_executor().spawn(async move {
let store = store?;
if let Some((crate_name, rest)) = query.split_once(':') { if let Some((crate_name, rest)) = query.split_once(':') {
if rest.is_empty() { if rest.is_empty() {
if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() { if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() {
@ -169,16 +174,17 @@ impl SlashCommand for RustdocSlashCommand {
.next() .next()
.ok_or_else(|| anyhow!("missing crate name")) .ok_or_else(|| anyhow!("missing crate name"))
{ {
Ok(crate_name) => CrateName::from(crate_name), Ok(crate_name) => PackageName::from(crate_name),
Err(err) => return Task::ready(Err(err)), Err(err) => return Task::ready(Err(err)),
}; };
let item_path = path_components.map(ToString::to_string).collect::<Vec<_>>(); let item_path = path_components.map(ToString::to_string).collect::<Vec<_>>();
let text = cx.background_executor().spawn({ let text = cx.background_executor().spawn({
let rustdoc_store = RustdocStore::global(cx); let rustdoc_store = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx);
let crate_name = crate_name.clone(); let crate_name = crate_name.clone();
let item_path = item_path.clone(); let item_path = item_path.clone();
async move { async move {
let rustdoc_store = rustdoc_store?;
let item_docs = rustdoc_store let item_docs = rustdoc_store
.load( .load(
crate_name.clone(), crate_name.clone(),
@ -191,7 +197,7 @@ impl SlashCommand for RustdocSlashCommand {
.await; .await;
if let Ok(item_docs) = item_docs { if let Ok(item_docs) = item_docs {
anyhow::Ok((RustdocSource::Index, item_docs.docs().to_owned())) anyhow::Ok((RustdocSource::Index, item_docs.to_string()))
} else { } else {
Self::build_message( Self::build_message(
fs, fs,

View File

@ -1,5 +1,5 @@
[package] [package]
name = "rustdoc" name = "indexed_docs"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
publish = false publish = false
@ -9,7 +9,7 @@ license = "GPL-3.0-or-later"
workspace = true workspace = true
[lib] [lib]
path = "src/rustdoc.rs" path = "src/indexed_docs.rs"
[dependencies] [dependencies]
anyhow.workspace = true anyhow.workspace = true

View File

@ -0,0 +1,8 @@
mod indexer;
mod providers;
mod registry;
mod store;
pub use crate::providers::rustdoc::*;
pub use crate::registry::*;
pub use crate::store::*;

View File

@ -0,0 +1,122 @@
use std::sync::Arc;
use anyhow::{Context, Result};
use async_trait::async_trait;
use collections::{HashSet, VecDeque};
use crate::{
convert_rustdoc_to_markdown, IndexedDocsDatabase, PackageName, RustdocItem, RustdocItemKind,
};
#[async_trait]
pub trait IndexedDocsProvider {
async fn fetch_page(
&self,
package: &PackageName,
item: Option<&RustdocItem>,
) -> Result<Option<String>>;
}
#[derive(Debug)]
struct RustdocItemWithHistory {
pub item: RustdocItem,
#[cfg(debug_assertions)]
pub history: Vec<String>,
}
pub(crate) struct DocsIndexer {
database: Arc<IndexedDocsDatabase>,
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
}
impl DocsIndexer {
pub fn new(
database: Arc<IndexedDocsDatabase>,
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
) -> Self {
Self { database, provider }
}
/// Indexes the package with the given name.
pub async fn index(&self, package: PackageName) -> Result<()> {
let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
return Ok(());
};
let (crate_root_markdown, items) =
convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
self.database
.insert(package.clone(), None, crate_root_markdown)
.await?;
let mut seen_items = HashSet::from_iter(items.clone());
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
item,
#[cfg(debug_assertions)]
history: Vec::new(),
}));
while let Some(item_with_history) = items_to_visit.pop_front() {
let item = &item_with_history.item;
let Some(result) = self
.provider
.fetch_page(&package, Some(&item))
.await
.with_context(|| {
#[cfg(debug_assertions)]
{
format!(
"failed to fetch {item:?}: {history:?}",
history = item_with_history.history
)
}
#[cfg(not(debug_assertions))]
{
format!("failed to fetch {item:?}")
}
})?
else {
continue;
};
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
self.database
.insert(package.clone(), Some(item), markdown)
.await?;
let parent_item = item;
for mut item in referenced_items {
if seen_items.contains(&item) {
continue;
}
seen_items.insert(item.clone());
item.path.extend(parent_item.path.clone());
match parent_item.kind {
RustdocItemKind::Mod => {
item.path.push(parent_item.name.clone());
}
_ => {}
}
items_to_visit.push_back(RustdocItemWithHistory {
#[cfg(debug_assertions)]
history: {
let mut history = item_with_history.history.clone();
history.push(item.url_path());
history
},
item,
});
}
}
Ok(())
}
}

View File

@ -0,0 +1 @@
pub mod rustdoc;

View File

@ -0,0 +1,117 @@
mod item;
mod to_markdown;
pub use item::*;
pub use to_markdown::convert_rustdoc_to_markdown;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::{bail, Context, Result};
use async_trait::async_trait;
use fs::Fs;
use futures::AsyncReadExt;
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use crate::indexer::IndexedDocsProvider;
use crate::PackageName;
#[derive(Debug, Clone, Copy)]
pub enum RustdocSource {
/// The docs were sourced from Zed's rustdoc index.
Index,
/// The docs were sourced from local `cargo doc` output.
Local,
/// The docs were sourced from `docs.rs`.
DocsDotRs,
}
pub struct LocalProvider {
fs: Arc<dyn Fs>,
cargo_workspace_root: PathBuf,
}
impl LocalProvider {
pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
Self {
fs,
cargo_workspace_root,
}
}
}
#[async_trait]
impl IndexedDocsProvider for LocalProvider {
async fn fetch_page(
&self,
crate_name: &PackageName,
item: Option<&RustdocItem>,
) -> Result<Option<String>> {
let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
local_cargo_doc_path.push(crate_name.as_ref());
if let Some(item) = item {
local_cargo_doc_path.push(item.url_path());
} else {
local_cargo_doc_path.push("index.html");
}
let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
return Ok(None);
};
Ok(Some(contents))
}
}
pub struct DocsDotRsProvider {
http_client: Arc<HttpClientWithUrl>,
}
impl DocsDotRsProvider {
pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
Self { http_client }
}
}
#[async_trait]
impl IndexedDocsProvider for DocsDotRsProvider {
async fn fetch_page(
&self,
crate_name: &PackageName,
item: Option<&RustdocItem>,
) -> Result<Option<String>> {
let version = "latest";
let path = format!(
"{crate_name}/{version}/{crate_name}{item_path}",
item_path = item
.map(|item| format!("/{}", item.url_path()))
.unwrap_or_default()
);
let mut response = self
.http_client
.get(
&format!("https://docs.rs/{path}"),
AsyncBody::default(),
true,
)
.await?;
let mut body = Vec::new();
response
.body_mut()
.read_to_end(&mut body)
.await
.context("error reading docs.rs response body")?;
if response.status().is_client_error() {
let text = String::from_utf8_lossy(body.as_slice());
bail!(
"status error {}, response: {text:?}",
response.status().as_u16()
);
}
Ok(Some(String::from_utf8(body)?))
}
}

View File

@ -0,0 +1,47 @@
use std::sync::Arc;
use collections::HashMap;
use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, UpdateGlobal};
use parking_lot::RwLock;
use crate::{IndexedDocsStore, Provider, ProviderId};
struct GlobalIndexedDocsRegistry(Arc<IndexedDocsRegistry>);
impl Global for GlobalIndexedDocsRegistry {}
pub struct IndexedDocsRegistry {
executor: BackgroundExecutor,
stores_by_provider: RwLock<HashMap<ProviderId, Arc<IndexedDocsStore>>>,
}
impl IndexedDocsRegistry {
pub fn global(cx: &AppContext) -> Arc<Self> {
GlobalIndexedDocsRegistry::global(cx).0.clone()
}
pub fn init_global(cx: &mut AppContext) {
GlobalIndexedDocsRegistry::set_global(
cx,
GlobalIndexedDocsRegistry(Arc::new(Self::new(cx.background_executor().clone()))),
);
}
pub fn new(executor: BackgroundExecutor) -> Self {
Self {
executor,
stores_by_provider: RwLock::new(HashMap::default()),
}
}
pub fn register_provider(&self, provider: Provider) {
self.stores_by_provider.write().insert(
provider.id.clone(),
Arc::new(IndexedDocsStore::new(provider, self.executor.clone())),
);
}
pub fn get_provider_store(&self, provider_id: ProviderId) -> Option<Arc<IndexedDocsStore>> {
self.stores_by_provider.read().get(&provider_id).cloned()
}
}

View File

@ -8,59 +8,71 @@ use derive_more::{Deref, Display};
use futures::future::{self, BoxFuture, Shared}; use futures::future::{self, BoxFuture, Shared};
use futures::FutureExt; use futures::FutureExt;
use fuzzy::StringMatchCandidate; use fuzzy::StringMatchCandidate;
use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal}; use gpui::{AppContext, BackgroundExecutor, Task};
use heed::types::SerdeBincode; use heed::types::SerdeBincode;
use heed::Database; use heed::Database;
use parking_lot::RwLock; use parking_lot::RwLock;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use util::ResultExt; use util::ResultExt;
use crate::indexer::{RustdocIndexer, RustdocProvider}; use crate::indexer::{DocsIndexer, IndexedDocsProvider};
use crate::{RustdocItem, RustdocItemKind}; use crate::{IndexedDocsRegistry, RustdocItem};
/// The name of a crate.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
pub struct CrateName(Arc<str>); pub struct ProviderId(Arc<str>);
impl From<&str> for CrateName { impl ProviderId {
pub fn rustdoc() -> Self {
Self("rustdoc".into())
}
}
pub struct Provider {
pub id: ProviderId,
pub database_path: PathBuf,
}
impl Provider {
pub fn rustdoc() -> Self {
Self {
id: ProviderId("rustdoc".into()),
database_path: paths::support_dir().join("docs/rust/rustdoc-db.1.mdb"),
}
}
}
/// The name of a package.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
pub struct PackageName(Arc<str>);
impl From<&str> for PackageName {
fn from(value: &str) -> Self { fn from(value: &str) -> Self {
Self(value.into()) Self(value.into())
} }
} }
struct GlobalRustdocStore(Arc<RustdocStore>); /// A store for indexed docs.
pub struct IndexedDocsStore {
impl Global for GlobalRustdocStore {}
pub struct RustdocStore {
executor: BackgroundExecutor, executor: BackgroundExecutor,
database_future: Shared<BoxFuture<'static, Result<Arc<RustdocDatabase>, Arc<anyhow::Error>>>>, database_future:
indexing_tasks_by_crate: Shared<BoxFuture<'static, Result<Arc<IndexedDocsDatabase>, Arc<anyhow::Error>>>>,
RwLock<HashMap<CrateName, Shared<Task<Result<(), Arc<anyhow::Error>>>>>>, indexing_tasks_by_package:
RwLock<HashMap<PackageName, Shared<Task<Result<(), Arc<anyhow::Error>>>>>>,
} }
impl RustdocStore { impl IndexedDocsStore {
pub fn global(cx: &AppContext) -> Arc<Self> { pub fn try_global(provider: ProviderId, cx: &AppContext) -> Result<Arc<Self>> {
GlobalRustdocStore::global(cx).0.clone() let registry = IndexedDocsRegistry::global(cx);
registry
.get_provider_store(provider.clone())
.ok_or_else(|| anyhow!("no indexed docs store found for {provider}"))
} }
pub fn init_global(cx: &mut AppContext) { pub fn new(provider: Provider, executor: BackgroundExecutor) -> Self {
GlobalRustdocStore::set_global(
cx,
GlobalRustdocStore(Arc::new(Self::new(cx.background_executor().clone()))),
);
}
pub fn new(executor: BackgroundExecutor) -> Self {
let database_future = executor let database_future = executor
.spawn({ .spawn({
let executor = executor.clone(); let executor = executor.clone();
async move { async move { IndexedDocsDatabase::new(provider.database_path, executor) }
RustdocDatabase::new(
paths::support_dir().join("docs/rust/rustdoc-db.0.mdb"),
executor,
)
}
}) })
.then(|result| future::ready(result.map(Arc::new).map_err(Arc::new))) .then(|result| future::ready(result.map(Arc::new).map_err(Arc::new)))
.boxed() .boxed()
@ -69,34 +81,34 @@ impl RustdocStore {
Self { Self {
executor, executor,
database_future, database_future,
indexing_tasks_by_crate: RwLock::new(HashMap::default()), indexing_tasks_by_package: RwLock::new(HashMap::default()),
} }
} }
/// Returns whether the crate with the given name is currently being indexed. /// Returns whether the package with the given name is currently being indexed.
pub fn is_indexing(&self, crate_name: &CrateName) -> bool { pub fn is_indexing(&self, package: &PackageName) -> bool {
self.indexing_tasks_by_crate.read().contains_key(crate_name) self.indexing_tasks_by_package.read().contains_key(package)
} }
pub async fn load( pub async fn load(
&self, &self,
crate_name: CrateName, package: PackageName,
item_path: Option<String>, item_path: Option<String>,
) -> Result<RustdocDatabaseEntry> { ) -> Result<MarkdownDocs> {
self.database_future self.database_future
.clone() .clone()
.await .await
.map_err(|err| anyhow!(err))? .map_err(|err| anyhow!(err))?
.load(crate_name, item_path) .load(package, item_path)
.await .await
} }
pub fn index( pub fn index(
self: Arc<Self>, self: Arc<Self>,
crate_name: CrateName, package: PackageName,
provider: Box<dyn RustdocProvider + Send + Sync + 'static>, provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
) -> Shared<Task<Result<(), Arc<anyhow::Error>>>> { ) -> Shared<Task<Result<(), Arc<anyhow::Error>>>> {
if let Some(existing_task) = self.indexing_tasks_by_crate.read().get(&crate_name) { if let Some(existing_task) = self.indexing_tasks_by_package.read().get(&package) {
return existing_task.clone(); return existing_task.clone();
} }
@ -104,13 +116,13 @@ impl RustdocStore {
.executor .executor
.spawn({ .spawn({
let this = self.clone(); let this = self.clone();
let crate_name = crate_name.clone(); let package = package.clone();
async move { async move {
let _finally = util::defer({ let _finally = util::defer({
let this = this.clone(); let this = this.clone();
let crate_name = crate_name.clone(); let package = package.clone();
move || { move || {
this.indexing_tasks_by_crate.write().remove(&crate_name); this.indexing_tasks_by_package.write().remove(&package);
} }
}); });
@ -120,9 +132,9 @@ impl RustdocStore {
.clone() .clone()
.await .await
.map_err(|err| anyhow!(err))?; .map_err(|err| anyhow!(err))?;
let indexer = RustdocIndexer::new(database, provider); let indexer = DocsIndexer::new(database, provider);
indexer.index(crate_name.clone()).await indexer.index(package.clone()).await
}; };
index_task.await.map_err(Arc::new) index_task.await.map_err(Arc::new)
@ -130,9 +142,9 @@ impl RustdocStore {
}) })
.shared(); .shared();
self.indexing_tasks_by_crate self.indexing_tasks_by_package
.write() .write()
.insert(crate_name, indexing_task.clone()); .insert(package, indexing_task.clone());
indexing_task indexing_task
} }
@ -177,27 +189,16 @@ impl RustdocStore {
} }
} }
#[derive(Serialize, Deserialize)] #[derive(Debug, PartialEq, Eq, Clone, Display, Serialize, Deserialize)]
pub enum RustdocDatabaseEntry { pub struct MarkdownDocs(pub String);
Crate { docs: String },
Item { kind: RustdocItemKind, docs: String },
}
impl RustdocDatabaseEntry { pub(crate) struct IndexedDocsDatabase {
pub fn docs(&self) -> &str {
match self {
Self::Crate { docs } | Self::Item { docs, .. } => &docs,
}
}
}
pub(crate) struct RustdocDatabase {
executor: BackgroundExecutor, executor: BackgroundExecutor,
env: heed::Env, env: heed::Env,
entries: Database<SerdeBincode<String>, SerdeBincode<RustdocDatabaseEntry>>, entries: Database<SerdeBincode<String>, SerdeBincode<MarkdownDocs>>,
} }
impl RustdocDatabase { impl IndexedDocsDatabase {
pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result<Self> { pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result<Self> {
std::fs::create_dir_all(&path)?; std::fs::create_dir_all(&path)?;
@ -238,15 +239,15 @@ impl RustdocDatabase {
pub fn load( pub fn load(
&self, &self,
crate_name: CrateName, package: PackageName,
item_path: Option<String>, item_path: Option<String>,
) -> Task<Result<RustdocDatabaseEntry>> { ) -> Task<Result<MarkdownDocs>> {
let env = self.env.clone(); let env = self.env.clone();
let entries = self.entries; let entries = self.entries;
let item_path = if let Some(item_path) = item_path { let item_path = if let Some(item_path) = item_path {
format!("{crate_name}::{item_path}") format!("{package}::{item_path}")
} else { } else {
crate_name.to_string() package.to_string()
}; };
self.executor.spawn(async move { self.executor.spawn(async move {
@ -259,22 +260,16 @@ impl RustdocDatabase {
pub fn insert( pub fn insert(
&self, &self,
crate_name: CrateName, package: PackageName,
item: Option<&RustdocItem>, item: Option<&RustdocItem>,
docs: String, docs: String,
) -> Task<Result<()>> { ) -> Task<Result<()>> {
let env = self.env.clone(); let env = self.env.clone();
let entries = self.entries; let entries = self.entries;
let (item_path, entry) = if let Some(item) = item { let (item_path, entry) = if let Some(item) = item {
( (format!("{package}::{}", item.display()), MarkdownDocs(docs))
format!("{crate_name}::{}", item.display()),
RustdocDatabaseEntry::Item {
kind: item.kind,
docs,
},
)
} else { } else {
(crate_name.to_string(), RustdocDatabaseEntry::Crate { docs }) (package.to_string(), MarkdownDocs(docs))
}; };
self.executor.spawn(async move { self.executor.spawn(async move {

View File

@ -1,226 +0,0 @@
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::{bail, Context, Result};
use async_trait::async_trait;
use collections::{HashSet, VecDeque};
use fs::Fs;
use futures::AsyncReadExt;
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use crate::{
convert_rustdoc_to_markdown, CrateName, RustdocDatabase, RustdocItem, RustdocItemKind,
};
#[derive(Debug, Clone, Copy)]
pub enum RustdocSource {
/// The docs were sourced from Zed's rustdoc index.
Index,
/// The docs were sourced from local `cargo doc` output.
Local,
/// The docs were sourced from `docs.rs`.
DocsDotRs,
}
#[async_trait]
pub trait RustdocProvider {
async fn fetch_page(
&self,
crate_name: &CrateName,
item: Option<&RustdocItem>,
) -> Result<Option<String>>;
}
pub struct LocalProvider {
fs: Arc<dyn Fs>,
cargo_workspace_root: PathBuf,
}
impl LocalProvider {
pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
Self {
fs,
cargo_workspace_root,
}
}
}
#[async_trait]
impl RustdocProvider for LocalProvider {
async fn fetch_page(
&self,
crate_name: &CrateName,
item: Option<&RustdocItem>,
) -> Result<Option<String>> {
let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
local_cargo_doc_path.push(crate_name.as_ref());
if let Some(item) = item {
local_cargo_doc_path.push(item.url_path());
} else {
local_cargo_doc_path.push("index.html");
}
let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
return Ok(None);
};
Ok(Some(contents))
}
}
pub struct DocsDotRsProvider {
http_client: Arc<HttpClientWithUrl>,
}
impl DocsDotRsProvider {
pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
Self { http_client }
}
}
#[async_trait]
impl RustdocProvider for DocsDotRsProvider {
async fn fetch_page(
&self,
crate_name: &CrateName,
item: Option<&RustdocItem>,
) -> Result<Option<String>> {
let version = "latest";
let path = format!(
"{crate_name}/{version}/{crate_name}{item_path}",
item_path = item
.map(|item| format!("/{}", item.url_path()))
.unwrap_or_default()
);
let mut response = self
.http_client
.get(
&format!("https://docs.rs/{path}"),
AsyncBody::default(),
true,
)
.await?;
let mut body = Vec::new();
response
.body_mut()
.read_to_end(&mut body)
.await
.context("error reading docs.rs response body")?;
if response.status().is_client_error() {
let text = String::from_utf8_lossy(body.as_slice());
bail!(
"status error {}, response: {text:?}",
response.status().as_u16()
);
}
Ok(Some(String::from_utf8(body)?))
}
}
#[derive(Debug)]
struct RustdocItemWithHistory {
pub item: RustdocItem,
#[cfg(debug_assertions)]
pub history: Vec<String>,
}
pub(crate) struct RustdocIndexer {
database: Arc<RustdocDatabase>,
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
}
impl RustdocIndexer {
pub fn new(
database: Arc<RustdocDatabase>,
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
) -> Self {
Self { database, provider }
}
/// Indexes the crate with the given name.
pub async fn index(&self, crate_name: CrateName) -> Result<()> {
let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
return Ok(());
};
let (crate_root_markdown, items) =
convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
self.database
.insert(crate_name.clone(), None, crate_root_markdown)
.await?;
let mut seen_items = HashSet::from_iter(items.clone());
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
item,
#[cfg(debug_assertions)]
history: Vec::new(),
}));
while let Some(item_with_history) = items_to_visit.pop_front() {
let item = &item_with_history.item;
let Some(result) = self
.provider
.fetch_page(&crate_name, Some(&item))
.await
.with_context(|| {
#[cfg(debug_assertions)]
{
format!(
"failed to fetch {item:?}: {history:?}",
history = item_with_history.history
)
}
#[cfg(not(debug_assertions))]
{
format!("failed to fetch {item:?}")
}
})?
else {
continue;
};
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
self.database
.insert(crate_name.clone(), Some(item), markdown)
.await?;
let parent_item = item;
for mut item in referenced_items {
if seen_items.contains(&item) {
continue;
}
seen_items.insert(item.clone());
item.path.extend(parent_item.path.clone());
match parent_item.kind {
RustdocItemKind::Mod => {
item.path.push(parent_item.name.clone());
}
_ => {}
}
items_to_visit.push_back(RustdocItemWithHistory {
#[cfg(debug_assertions)]
history: {
let mut history = item_with_history.history.clone();
history.push(item.url_path());
history
},
item,
});
}
}
Ok(())
}
}

View File

@ -1,9 +0,0 @@
mod indexer;
mod item;
mod store;
mod to_markdown;
pub use crate::indexer::{DocsDotRsProvider, LocalProvider, RustdocSource};
pub use crate::item::*;
pub use crate::store::*;
pub use crate::to_markdown::convert_rustdoc_to_markdown;