From 0ac9af94e0c5a83567ce4ad65d33df837cea3689 Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Wed, 12 Jun 2024 19:33:31 -0400 Subject: [PATCH] assistant: Add MVP for `/rustdoc` using indexed docs (#12952) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds an MVP of retrieving docs using the `/rustdoc` command from an indexed set of docs. To try this out: 1. Build local docs using `cargo doc` 2. Index the docs for the crate you want to search using `/rustdoc --index ` - Note: This may take a while, depending on the size of the crate 3. Search for docs using `/rustdoc my_crate::path::to::item` - You should get completions for the available items Here are some screenshots of it in action: Screenshot 2024-06-12 at 6 19 20 PM Screenshot 2024-06-12 at 6 52 56 PM Release Notes: - N/A --- Cargo.lock | 3 + crates/assistant/src/assistant.rs | 2 + .../src/slash_command/rustdoc_command.rs | 140 +++++++++++++++--- crates/rustdoc/Cargo.toml | 3 + crates/rustdoc/src/crawler.rs | 64 +++++--- crates/rustdoc/src/item.rs | 7 + crates/rustdoc/src/rustdoc.rs | 2 + crates/rustdoc/src/store.rs | 116 +++++++++++++++ 8 files changed, 295 insertions(+), 42 deletions(-) create mode 100644 crates/rustdoc/src/store.rs diff --git a/Cargo.lock b/Cargo.lock index b1849fe63b..79e749b5d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8696,10 +8696,13 @@ dependencies = [ "collections", "fs", "futures 0.3.28", + "fuzzy", + "gpui", "html_to_markdown", "http 0.1.0", "indexmap 1.9.3", "indoc", + "parking_lot", "pretty_assertions", "strum", ] diff --git a/crates/assistant/src/assistant.rs b/crates/assistant/src/assistant.rs index 07488fdc5b..f8b36330ec 100644 --- a/crates/assistant/src/assistant.rs +++ b/crates/assistant/src/assistant.rs @@ -21,6 +21,7 @@ pub(crate) use context_store::*; use gpui::{actions, AppContext, Global, SharedString, UpdateGlobal}; pub(crate) use inline_assistant::*; pub(crate) use model_selector::*; +use rustdoc::RustdocStore; use semantic_index::{CloudEmbeddingProvider, SemanticIndex}; use serde::{Deserialize, Serialize}; use settings::{Settings, SettingsStore}; @@ -286,6 +287,7 @@ pub fn init(client: Arc, cx: &mut AppContext) { register_slash_commands(cx); assistant_panel::init(cx); inline_assistant::init(client.telemetry().clone(), cx); + RustdocStore::init_global(cx); CommandPaletteFilter::update_global(cx, |filter, _cx| { filter.hide_namespace(Assistant::NAMESPACE); diff --git a/crates/assistant/src/slash_command/rustdoc_command.rs b/crates/assistant/src/slash_command/rustdoc_command.rs index 853665ddc9..baa6568d1c 100644 --- a/crates/assistant/src/slash_command/rustdoc_command.rs +++ b/crates/assistant/src/slash_command/rustdoc_command.rs @@ -10,7 +10,8 @@ use gpui::{AppContext, Model, Task, WeakView}; use http::{AsyncBody, HttpClient, HttpClientWithUrl}; use language::LspAdapterDelegate; use project::{Project, ProjectPath}; -use rustdoc::convert_rustdoc_to_markdown; +use rustdoc::crawler::LocalProvider; +use rustdoc::{convert_rustdoc_to_markdown, RustdocStore}; use ui::{prelude::*, ButtonLike, ElevationIndex}; use workspace::Workspace; @@ -115,12 +116,19 @@ impl SlashCommand for RustdocSlashCommand { fn complete_argument( &self, - _query: String, + query: String, _cancel: Arc, _workspace: Option>, - _cx: &mut AppContext, + cx: &mut AppContext, ) -> Task>> { - Task::ready(Ok(Vec::new())) + let store = RustdocStore::global(cx); + cx.background_executor().spawn(async move { + let items = store.search(query).await; + Ok(items + .into_iter() + .map(|(crate_name, item)| format!("{crate_name}::{}", item.display())) + .collect()) + }) } fn run( @@ -140,7 +148,67 @@ impl SlashCommand for RustdocSlashCommand { let project = workspace.read(cx).project().clone(); let fs = project.read(cx).fs().clone(); let http_client = workspace.read(cx).client().http_client(); - let mut path_components = argument.split("::"); + let path_to_cargo_toml = Self::path_to_cargo_toml(project, cx); + + let mut item_path = String::new(); + let mut crate_name_to_index = None; + + let mut args = argument.split(' ').map(|word| word.trim()); + while let Some(arg) = args.next() { + if arg == "--index" { + let Some(crate_name) = args.next() else { + return Task::ready(Err(anyhow!("no crate name provided to --index"))); + }; + crate_name_to_index = Some(crate_name.to_string()); + continue; + } + + item_path.push_str(arg); + } + + if let Some(crate_name_to_index) = crate_name_to_index { + let index_task = cx.background_executor().spawn({ + let rustdoc_store = RustdocStore::global(cx); + let fs = fs.clone(); + let crate_name_to_index = crate_name_to_index.clone(); + async move { + let cargo_workspace_root = path_to_cargo_toml + .and_then(|path| path.parent().map(|path| path.to_path_buf())) + .ok_or_else(|| anyhow!("no Cargo workspace root found"))?; + + let provider = Box::new(LocalProvider::new(fs, cargo_workspace_root)); + + rustdoc_store + .index(crate_name_to_index.clone(), provider) + .await?; + + anyhow::Ok(format!("Indexed {crate_name_to_index}")) + } + }); + + return cx.foreground_executor().spawn(async move { + let text = index_task.await?; + let range = 0..text.len(); + Ok(SlashCommandOutput { + text, + sections: vec![SlashCommandOutputSection { + range, + render_placeholder: Arc::new(move |id, unfold, _cx| { + RustdocIndexPlaceholder { + id, + unfold, + source: RustdocSource::Local, + crate_name: SharedString::from(crate_name_to_index.clone()), + } + .into_any_element() + }), + }], + run_commands_in_text: false, + }) + }); + } + + let mut path_components = item_path.split("::"); let crate_name = match path_components .next() .ok_or_else(|| anyhow!("missing crate name")) @@ -148,29 +216,37 @@ impl SlashCommand for RustdocSlashCommand { Ok(crate_name) => crate_name.to_string(), Err(err) => return Task::ready(Err(err)), }; - let module_path = path_components.map(ToString::to_string).collect::>(); - let path_to_cargo_toml = Self::path_to_cargo_toml(project, cx); + let item_path = path_components.map(ToString::to_string).collect::>(); let text = cx.background_executor().spawn({ + let rustdoc_store = RustdocStore::global(cx); let crate_name = crate_name.clone(); - let module_path = module_path.clone(); + let item_path = item_path.clone(); async move { - Self::build_message( - fs, - http_client, - crate_name, - module_path, - path_to_cargo_toml.as_deref(), - ) - .await + let item_docs = rustdoc_store + .load(crate_name.clone(), Some(item_path.join("::"))) + .await; + + if let Ok(item_docs) = item_docs { + anyhow::Ok((RustdocSource::Local, item_docs)) + } else { + Self::build_message( + fs, + http_client, + crate_name, + item_path, + path_to_cargo_toml.as_deref(), + ) + .await + } } }); let crate_name = SharedString::from(crate_name); - let module_path = if module_path.is_empty() { + let module_path = if item_path.is_empty() { None } else { - Some(SharedString::from(module_path.join("::"))) + Some(SharedString::from(item_path.join("::"))) }; cx.foreground_executor().spawn(async move { let (source, text) = text.await?; @@ -228,3 +304,31 @@ impl RenderOnce for RustdocPlaceholder { .on_click(move |_, cx| unfold(cx)) } } + +#[derive(IntoElement)] +struct RustdocIndexPlaceholder { + pub id: ElementId, + pub unfold: Arc, + pub source: RustdocSource, + pub crate_name: SharedString, +} + +impl RenderOnce for RustdocIndexPlaceholder { + fn render(self, _cx: &mut WindowContext) -> impl IntoElement { + let unfold = self.unfold; + + ButtonLike::new(self.id) + .style(ButtonStyle::Filled) + .layer(ElevationIndex::ElevatedSurface) + .child(Icon::new(IconName::FileRust)) + .child(Label::new(format!( + "rustdoc index ({source}): {crate_name}", + crate_name = self.crate_name, + source = match self.source { + RustdocSource::Local => "local", + RustdocSource::DocsDotRs => "docs.rs", + } + ))) + .on_click(move |_, cx| unfold(cx)) + } +} diff --git a/crates/rustdoc/Cargo.toml b/crates/rustdoc/Cargo.toml index 15f0013233..6aa0f5bb4b 100644 --- a/crates/rustdoc/Cargo.toml +++ b/crates/rustdoc/Cargo.toml @@ -17,9 +17,12 @@ async-trait.workspace = true collections.workspace = true fs.workspace = true futures.workspace = true +fuzzy.workspace = true +gpui.workspace = true html_to_markdown.workspace = true http.workspace = true indexmap.workspace = true +parking_lot.workspace = true strum.workspace = true [dev-dependencies] diff --git a/crates/rustdoc/src/crawler.rs b/crates/rustdoc/src/crawler.rs index 8e536a3075..80f3e0fc4a 100644 --- a/crates/rustdoc/src/crawler.rs +++ b/crates/rustdoc/src/crawler.rs @@ -7,6 +7,7 @@ use collections::{HashSet, VecDeque}; use fs::Fs; use futures::AsyncReadExt; use http::{AsyncBody, HttpClient, HttpClientWithUrl}; +use indexmap::IndexMap; use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind}; @@ -51,11 +52,12 @@ impl RustdocProvider for LocalProvider { let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc"); local_cargo_doc_path.push(&crate_name); if let Some(item) = item { - if !item.path.is_empty() { - local_cargo_doc_path.push(item.path.join("/")); - } + local_cargo_doc_path.push(item.url_path()); + } else { + local_cargo_doc_path.push("index.html"); } - local_cargo_doc_path.push("index.html"); + + println!("Fetching {}", local_cargo_doc_path.display()); let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else { return Ok(None); @@ -120,12 +122,18 @@ impl RustdocProvider for DocsDotRsProvider { } } -pub struct RustdocItemWithHistory { +#[derive(Debug)] +struct RustdocItemWithHistory { pub item: RustdocItem, #[cfg(debug_assertions)] pub history: Vec, } +pub struct CrateDocs { + pub crate_root_markdown: String, + pub items: IndexMap, +} + pub struct RustdocCrawler { provider: Box, } @@ -135,14 +143,16 @@ impl RustdocCrawler { Self { provider } } - pub async fn crawl(&self, crate_name: String) -> Result> { - let Some(crate_index_content) = self.provider.fetch_page(&crate_name, None).await? else { + pub async fn crawl(&self, crate_name: String) -> Result> { + let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else { return Ok(None); }; - let (_markdown, items) = convert_rustdoc_to_markdown(crate_index_content.as_bytes())?; + let (crate_root_markdown, items) = + convert_rustdoc_to_markdown(crate_root_content.as_bytes())?; - let mut seen_items = HashSet::default(); + let mut docs_by_item = IndexMap::new(); + let mut seen_items = HashSet::from_iter(items.clone()); let mut items_to_visit: VecDeque = VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory { item, @@ -152,6 +162,7 @@ impl RustdocCrawler { while let Some(item_with_history) = items_to_visit.pop_front() { let item = &item_with_history.item; + println!("Visiting {:?} {:?} {}", &item.kind, &item.path, &item.name); let Some(result) = self @@ -176,23 +187,27 @@ impl RustdocCrawler { continue; }; - let (_markdown, mut items) = convert_rustdoc_to_markdown(result.as_bytes())?; + let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?; - seen_items.insert(item.clone()); + docs_by_item.insert(item.clone(), markdown); - for child in &mut items { - child.path.extend(item.path.clone()); - match item.kind { + let parent_item = item; + for mut item in referenced_items { + if seen_items.contains(&item) { + continue; + } + + seen_items.insert(item.clone()); + + item.path.extend(parent_item.path.clone()); + match parent_item.kind { RustdocItemKind::Mod => { - child.path.push(item.name.clone()); + item.path.push(parent_item.name.clone()); } _ => {} } - } - let unseen_items = items - .into_iter() - .map(|item| RustdocItemWithHistory { + items_to_visit.push_back(RustdocItemWithHistory { #[cfg(debug_assertions)] history: { let mut history = item_with_history.history.clone(); @@ -200,12 +215,13 @@ impl RustdocCrawler { history }, item, - }) - .filter(|item| !seen_items.contains(&item.item)); - - items_to_visit.extend(unseen_items); + }); + } } - Ok(Some(String::new())) + Ok(Some(CrateDocs { + crate_root_markdown, + items: docs_by_item, + })) } } diff --git a/crates/rustdoc/src/item.rs b/crates/rustdoc/src/item.rs index 69f10794c3..14d414113f 100644 --- a/crates/rustdoc/src/item.rs +++ b/crates/rustdoc/src/item.rs @@ -43,6 +43,13 @@ pub struct RustdocItem { } impl RustdocItem { + pub fn display(&self) -> String { + let mut path_segments = self.path.clone(); + path_segments.push(self.name.clone()); + + path_segments.join("::") + } + pub fn url_path(&self) -> String { let name = &self.name; let mut path_components = self.path.clone(); diff --git a/crates/rustdoc/src/rustdoc.rs b/crates/rustdoc/src/rustdoc.rs index a1ca949334..a35fdac62a 100644 --- a/crates/rustdoc/src/rustdoc.rs +++ b/crates/rustdoc/src/rustdoc.rs @@ -1,6 +1,8 @@ pub mod crawler; mod item; +mod store; mod to_markdown; pub use crate::item::*; +pub use crate::store::*; pub use crate::to_markdown::convert_rustdoc_to_markdown; diff --git a/crates/rustdoc/src/store.rs b/crates/rustdoc/src/store.rs new file mode 100644 index 0000000000..626a58e1c1 --- /dev/null +++ b/crates/rustdoc/src/store.rs @@ -0,0 +1,116 @@ +use std::sync::atomic::AtomicBool; +use std::sync::Arc; + +use anyhow::{anyhow, Result}; +use collections::HashMap; +use fuzzy::StringMatchCandidate; +use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal}; +use parking_lot::RwLock; + +use crate::crawler::{RustdocCrawler, RustdocProvider}; +use crate::RustdocItem; + +struct GlobalRustdocStore(Arc); + +impl Global for GlobalRustdocStore {} + +pub struct RustdocStore { + executor: BackgroundExecutor, + docs: Arc>>, +} + +impl RustdocStore { + pub fn global(cx: &AppContext) -> Arc { + GlobalRustdocStore::global(cx).0.clone() + } + + pub fn init_global(cx: &mut AppContext) { + GlobalRustdocStore::set_global( + cx, + GlobalRustdocStore(Arc::new(Self::new(cx.background_executor().clone()))), + ); + } + + pub fn new(executor: BackgroundExecutor) -> Self { + Self { + executor, + docs: Arc::new(RwLock::new(HashMap::default())), + } + } + + pub fn load(&self, crate_name: String, item_path: Option) -> Task> { + let item_docs = self + .docs + .read() + .iter() + .find_map(|((item_crate_name, item), item_docs)| { + if item_crate_name == &crate_name && item_path == Some(item.display()) { + Some(item_docs.clone()) + } else { + None + } + }); + + Task::ready(item_docs.ok_or_else(|| anyhow!("no docs found"))) + } + + pub fn index( + &self, + crate_name: String, + provider: Box, + ) -> Task> { + let docs = self.docs.clone(); + self.executor.spawn(async move { + let crawler = RustdocCrawler::new(provider); + + println!("Indexing {crate_name}"); + + let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else { + return Ok(()); + }; + + let mut lock = docs.write(); + + for (item, item_docs) in crate_docs.items { + lock.insert((crate_name.clone(), item), item_docs); + } + + Ok(()) + }) + } + + pub fn search(&self, query: String) -> Task> { + let executor = self.executor.clone(); + let docs = self.docs.read().clone(); + self.executor.spawn(async move { + if query.is_empty() { + return Vec::new(); + } + + let items = docs.keys().collect::>(); + + let candidates = items + .iter() + .enumerate() + .map(|(ix, (crate_name, item))| { + StringMatchCandidate::new(ix, format!("{crate_name}::{}", item.display())) + }) + .collect::>(); + + let matches = fuzzy::match_strings( + &candidates, + &query, + false, + 100, + &AtomicBool::default(), + executor, + ) + .await; + + matches + .into_iter() + .map(|mat| items[mat.candidate_id].clone()) + .collect() + }) + } +}