assistant: Add docs provider for docs.rs (#14356)

This PR adds an indexed docs provider for retrieving docs from `docs.rs`
using the `/docs` slash command.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-07-12 13:22:52 -04:00 committed by GitHub
parent 739038ddaf
commit ca80343486
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 211 additions and 185 deletions

View File

@ -8,7 +8,8 @@ use assistant_slash_command::{
}; };
use gpui::{AppContext, Model, Task, WeakView}; use gpui::{AppContext, Model, Task, WeakView};
use indexed_docs::{ use indexed_docs::{
IndexedDocsRegistry, IndexedDocsStore, LocalProvider, PackageName, ProviderId, RustdocIndexer, DocsDotRsProvider, IndexedDocsRegistry, IndexedDocsStore, LocalRustdocProvider, PackageName,
ProviderId,
}; };
use language::LspAdapterDelegate; use language::LspAdapterDelegate;
use project::{Project, ProjectPath}; use project::{Project, ProjectPath};
@ -34,22 +35,22 @@ impl DocsSlashCommand {
)) ))
} }
/// Ensures that the rustdoc provider is registered. /// Ensures that the indexed doc providers for Rust are registered.
/// ///
/// Ideally we would do this sooner, but we need to wait until we're able to /// Ideally we would do this sooner, but we need to wait until we're able to
/// access the workspace so we can read the project. /// access the workspace so we can read the project.
fn ensure_rustdoc_provider_is_registered( fn ensure_rust_doc_providers_are_registered(
&self, &self,
workspace: Option<WeakView<Workspace>>, workspace: Option<WeakView<Workspace>>,
cx: &mut AppContext, cx: &mut AppContext,
) { ) {
let indexed_docs_registry = IndexedDocsRegistry::global(cx); let indexed_docs_registry = IndexedDocsRegistry::global(cx);
if indexed_docs_registry if indexed_docs_registry
.get_provider_store(ProviderId::rustdoc()) .get_provider_store(LocalRustdocProvider::id())
.is_none() .is_none()
{ {
let index_provider_deps = maybe!({ let index_provider_deps = maybe!({
let workspace = workspace.ok_or_else(|| anyhow!("no workspace"))?; let workspace = workspace.clone().ok_or_else(|| anyhow!("no workspace"))?;
let workspace = workspace let workspace = workspace
.upgrade() .upgrade()
.ok_or_else(|| anyhow!("workspace was dropped"))?; .ok_or_else(|| anyhow!("workspace was dropped"))?;
@ -63,9 +64,29 @@ impl DocsSlashCommand {
}); });
if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() { if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() {
indexed_docs_registry.register_provider(Box::new(RustdocIndexer::new(Box::new( indexed_docs_registry.register_provider(Box::new(LocalRustdocProvider::new(
LocalProvider::new(fs, cargo_workspace_root), fs,
)))); cargo_workspace_root,
)));
}
}
if indexed_docs_registry
.get_provider_store(DocsDotRsProvider::id())
.is_none()
{
let http_client = maybe!({
let workspace = workspace.ok_or_else(|| anyhow!("no workspace"))?;
let workspace = workspace
.upgrade()
.ok_or_else(|| anyhow!("workspace was dropped"))?;
let project = workspace.read(cx).project().clone();
anyhow::Ok(project.read(cx).client().http_client().clone())
});
if let Some(http_client) = http_client.log_err() {
indexed_docs_registry
.register_provider(Box::new(DocsDotRsProvider::new(http_client)));
} }
} }
} }
@ -95,7 +116,7 @@ impl SlashCommand for DocsSlashCommand {
workspace: Option<WeakView<Workspace>>, workspace: Option<WeakView<Workspace>>,
cx: &mut AppContext, cx: &mut AppContext,
) -> Task<Result<Vec<ArgumentCompletion>>> { ) -> Task<Result<Vec<ArgumentCompletion>>> {
self.ensure_rustdoc_provider_is_registered(workspace, cx); self.ensure_rust_doc_providers_are_registered(workspace, cx);
let indexed_docs_registry = IndexedDocsRegistry::global(cx); let indexed_docs_registry = IndexedDocsRegistry::global(cx);
let args = DocsSlashCommandArgs::parse(&query); let args = DocsSlashCommandArgs::parse(&query);

View File

@ -1,6 +1,7 @@
mod item; mod item;
mod to_markdown; mod to_markdown;
use futures::future::BoxFuture;
pub use item::*; pub use item::*;
pub use to_markdown::convert_rustdoc_to_markdown; pub use to_markdown::convert_rustdoc_to_markdown;
@ -11,7 +12,7 @@ use anyhow::{bail, Context, Result};
use async_trait::async_trait; use async_trait::async_trait;
use collections::{HashSet, VecDeque}; use collections::{HashSet, VecDeque};
use fs::Fs; use fs::Fs;
use futures::AsyncReadExt; use futures::{AsyncReadExt, FutureExt};
use http::{AsyncBody, HttpClient, HttpClientWithUrl}; use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId}; use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
@ -23,124 +24,16 @@ struct RustdocItemWithHistory {
pub history: Vec<String>, pub history: Vec<String>,
} }
#[async_trait] pub struct LocalRustdocProvider {
pub trait RustdocProvider {
async fn fetch_page(
&self,
package: &PackageName,
item: Option<&RustdocItem>,
) -> Result<Option<String>>;
}
pub struct RustdocIndexer {
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
}
impl RustdocIndexer {
pub fn new(provider: Box<dyn RustdocProvider + Send + Sync + 'static>) -> Self {
Self { provider }
}
}
#[async_trait]
impl IndexedDocsProvider for RustdocIndexer {
fn id(&self) -> ProviderId {
ProviderId::rustdoc()
}
fn database_path(&self) -> PathBuf {
paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
}
async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
return Ok(());
};
let (crate_root_markdown, items) =
convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
database
.insert(package.to_string(), crate_root_markdown)
.await?;
let mut seen_items = HashSet::from_iter(items.clone());
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
item,
#[cfg(debug_assertions)]
history: Vec::new(),
}));
while let Some(item_with_history) = items_to_visit.pop_front() {
let item = &item_with_history.item;
let Some(result) = self
.provider
.fetch_page(&package, Some(&item))
.await
.with_context(|| {
#[cfg(debug_assertions)]
{
format!(
"failed to fetch {item:?}: {history:?}",
history = item_with_history.history
)
}
#[cfg(not(debug_assertions))]
{
format!("failed to fetch {item:?}")
}
})?
else {
continue;
};
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
database
.insert(format!("{package}::{}", item.display()), markdown)
.await?;
let parent_item = item;
for mut item in referenced_items {
if seen_items.contains(&item) {
continue;
}
seen_items.insert(item.clone());
item.path.extend(parent_item.path.clone());
match parent_item.kind {
RustdocItemKind::Mod => {
item.path.push(parent_item.name.clone());
}
_ => {}
}
items_to_visit.push_back(RustdocItemWithHistory {
#[cfg(debug_assertions)]
history: {
let mut history = item_with_history.history.clone();
history.push(item.url_path());
history
},
item,
});
}
}
Ok(())
}
}
pub struct LocalProvider {
fs: Arc<dyn Fs>, fs: Arc<dyn Fs>,
cargo_workspace_root: PathBuf, cargo_workspace_root: PathBuf,
} }
impl LocalProvider { impl LocalRustdocProvider {
pub fn id() -> ProviderId {
ProviderId("rustdoc".into())
}
pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self { pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
Self { Self {
fs, fs,
@ -150,30 +43,46 @@ impl LocalProvider {
} }
#[async_trait] #[async_trait]
impl RustdocProvider for LocalProvider { impl IndexedDocsProvider for LocalRustdocProvider {
async fn fetch_page( fn id(&self) -> ProviderId {
&self, Self::id()
crate_name: &PackageName, }
item: Option<&RustdocItem>,
) -> Result<Option<String>> {
let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
local_cargo_doc_path.push(crate_name.as_ref());
if !self.fs.is_dir(&local_cargo_doc_path).await { fn database_path(&self) -> PathBuf {
bail!("docs directory for '{crate_name}' does not exist. run `cargo doc`"); paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
} }
if let Some(item) = item { async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
local_cargo_doc_path.push(item.url_path()); index_rustdoc(package, database, {
} else { move |crate_name, item| {
local_cargo_doc_path.push("index.html"); let fs = self.fs.clone();
} let cargo_workspace_root = self.cargo_workspace_root.clone();
let crate_name = crate_name.clone();
let item = item.cloned();
async move {
let mut local_cargo_doc_path = cargo_workspace_root.join("target/doc");
local_cargo_doc_path.push(crate_name.as_ref());
let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else { if !fs.is_dir(&local_cargo_doc_path).await {
return Ok(None); bail!("docs directory for '{crate_name}' does not exist. run `cargo doc`");
}; }
Ok(Some(contents)) if let Some(item) = item {
local_cargo_doc_path.push(item.url_path());
} else {
local_cargo_doc_path.push("index.html");
}
let Ok(contents) = fs.load(&local_cargo_doc_path).await else {
return Ok(None);
};
Ok(Some(contents))
}
.boxed()
}
})
.await
} }
} }
@ -182,50 +91,152 @@ pub struct DocsDotRsProvider {
} }
impl DocsDotRsProvider { impl DocsDotRsProvider {
pub fn id() -> ProviderId {
ProviderId("docs-rs".into())
}
pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self { pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
Self { http_client } Self { http_client }
} }
} }
#[async_trait] #[async_trait]
impl RustdocProvider for DocsDotRsProvider { impl IndexedDocsProvider for DocsDotRsProvider {
async fn fetch_page( fn id(&self) -> ProviderId {
&self, Self::id()
crate_name: &PackageName, }
item: Option<&RustdocItem>,
) -> Result<Option<String>> {
let version = "latest";
let path = format!(
"{crate_name}/{version}/{crate_name}{item_path}",
item_path = item
.map(|item| format!("/{}", item.url_path()))
.unwrap_or_default()
);
let mut response = self fn database_path(&self) -> PathBuf {
.http_client paths::support_dir().join("docs/rust/docs-rs-db.1.mdb")
.get( }
&format!("https://docs.rs/{path}"),
AsyncBody::default(),
true,
)
.await?;
let mut body = Vec::new(); async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
response index_rustdoc(package, database, {
.body_mut() move |crate_name, item| {
.read_to_end(&mut body) let http_client = self.http_client.clone();
.await let crate_name = crate_name.clone();
.context("error reading docs.rs response body")?; let item = item.cloned();
async move {
let version = "latest";
let path = format!(
"{crate_name}/{version}/{crate_name}{item_path}",
item_path = item
.map(|item| format!("/{}", item.url_path()))
.unwrap_or_default()
);
if response.status().is_client_error() { let mut response = http_client
let text = String::from_utf8_lossy(body.as_slice()); .get(
bail!( &format!("https://docs.rs/{path}"),
"status error {}, response: {text:?}", AsyncBody::default(),
response.status().as_u16() true,
); )
} .await?;
Ok(Some(String::from_utf8(body)?)) let mut body = Vec::new();
response
.body_mut()
.read_to_end(&mut body)
.await
.context("error reading docs.rs response body")?;
if response.status().is_client_error() {
let text = String::from_utf8_lossy(body.as_slice());
bail!(
"status error {}, response: {text:?}",
response.status().as_u16()
);
}
Ok(Some(String::from_utf8(body)?))
}
.boxed()
}
})
.await
} }
} }
async fn index_rustdoc(
package: PackageName,
database: Arc<IndexedDocsDatabase>,
fetch_page: impl Fn(&PackageName, Option<&RustdocItem>) -> BoxFuture<'static, Result<Option<String>>>
+ Send
+ Sync,
) -> Result<()> {
let Some(package_root_content) = fetch_page(&package, None).await? else {
return Ok(());
};
let (crate_root_markdown, items) =
convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
database
.insert(package.to_string(), crate_root_markdown)
.await?;
let mut seen_items = HashSet::from_iter(items.clone());
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
item,
#[cfg(debug_assertions)]
history: Vec::new(),
}));
while let Some(item_with_history) = items_to_visit.pop_front() {
let item = &item_with_history.item;
let Some(result) = fetch_page(&package, Some(&item)).await.with_context(|| {
#[cfg(debug_assertions)]
{
format!(
"failed to fetch {item:?}: {history:?}",
history = item_with_history.history
)
}
#[cfg(not(debug_assertions))]
{
format!("failed to fetch {item:?}")
}
})?
else {
continue;
};
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
database
.insert(format!("{package}::{}", item.display()), markdown)
.await?;
let parent_item = item;
for mut item in referenced_items {
if seen_items.contains(&item) {
continue;
}
seen_items.insert(item.clone());
item.path.extend(parent_item.path.clone());
match parent_item.kind {
RustdocItemKind::Mod => {
item.path.push(parent_item.name.clone());
}
_ => {}
}
items_to_visit.push_back(RustdocItemWithHistory {
#[cfg(debug_assertions)]
history: {
let mut history = item_with_history.history.clone();
history.push(item.url_path());
history
},
item,
});
}
}
Ok(())
}

View File

@ -21,12 +21,6 @@ use crate::IndexedDocsRegistry;
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
pub struct ProviderId(pub Arc<str>); pub struct ProviderId(pub Arc<str>);
impl ProviderId {
pub fn rustdoc() -> Self {
Self("rustdoc".into())
}
}
/// The name of a package. /// The name of a package.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
pub struct PackageName(Arc<str>); pub struct PackageName(Arc<str>);