assistant: Add MVP for /rustdoc using indexed docs (#12952)

This PR adds an MVP of retrieving docs using the `/rustdoc` command from
an indexed set of docs.

To try this out:

1. Build local docs using `cargo doc`
2. Index the docs for the crate you want to search using `/rustdoc
--index <CRATE_NAME>`
    - Note: This may take a while, depending on the size of the crate
3. Search for docs using `/rustdoc my_crate::path::to::item`
    - You should get completions for the available items

Here are some screenshots of it in action:

<img width="640" alt="Screenshot 2024-06-12 at 6 19 20 PM"
src="https://github.com/zed-industries/zed/assets/1486634/6c49bec9-d084-4dcb-a92c-1b4c557ee9ce">

<img width="636" alt="Screenshot 2024-06-12 at 6 52 56 PM"
src="https://github.com/zed-industries/zed/assets/1486634/636a651c-7d02-48dc-b05c-931f33c49f9c">

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-06-12 19:33:31 -04:00 committed by GitHub
parent ec086945fc
commit 0ac9af94e0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 295 additions and 42 deletions

3
Cargo.lock generated
View File

@ -8696,10 +8696,13 @@ dependencies = [
"collections", "collections",
"fs", "fs",
"futures 0.3.28", "futures 0.3.28",
"fuzzy",
"gpui",
"html_to_markdown", "html_to_markdown",
"http 0.1.0", "http 0.1.0",
"indexmap 1.9.3", "indexmap 1.9.3",
"indoc", "indoc",
"parking_lot",
"pretty_assertions", "pretty_assertions",
"strum", "strum",
] ]

View File

@ -21,6 +21,7 @@ pub(crate) use context_store::*;
use gpui::{actions, AppContext, Global, SharedString, UpdateGlobal}; use gpui::{actions, AppContext, Global, SharedString, UpdateGlobal};
pub(crate) use inline_assistant::*; pub(crate) use inline_assistant::*;
pub(crate) use model_selector::*; pub(crate) use model_selector::*;
use rustdoc::RustdocStore;
use semantic_index::{CloudEmbeddingProvider, SemanticIndex}; use semantic_index::{CloudEmbeddingProvider, SemanticIndex};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore}; use settings::{Settings, SettingsStore};
@ -286,6 +287,7 @@ pub fn init(client: Arc<Client>, cx: &mut AppContext) {
register_slash_commands(cx); register_slash_commands(cx);
assistant_panel::init(cx); assistant_panel::init(cx);
inline_assistant::init(client.telemetry().clone(), cx); inline_assistant::init(client.telemetry().clone(), cx);
RustdocStore::init_global(cx);
CommandPaletteFilter::update_global(cx, |filter, _cx| { CommandPaletteFilter::update_global(cx, |filter, _cx| {
filter.hide_namespace(Assistant::NAMESPACE); filter.hide_namespace(Assistant::NAMESPACE);

View File

@ -10,7 +10,8 @@ use gpui::{AppContext, Model, Task, WeakView};
use http::{AsyncBody, HttpClient, HttpClientWithUrl}; use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use language::LspAdapterDelegate; use language::LspAdapterDelegate;
use project::{Project, ProjectPath}; use project::{Project, ProjectPath};
use rustdoc::convert_rustdoc_to_markdown; use rustdoc::crawler::LocalProvider;
use rustdoc::{convert_rustdoc_to_markdown, RustdocStore};
use ui::{prelude::*, ButtonLike, ElevationIndex}; use ui::{prelude::*, ButtonLike, ElevationIndex};
use workspace::Workspace; use workspace::Workspace;
@ -115,12 +116,19 @@ impl SlashCommand for RustdocSlashCommand {
fn complete_argument( fn complete_argument(
&self, &self,
_query: String, query: String,
_cancel: Arc<AtomicBool>, _cancel: Arc<AtomicBool>,
_workspace: Option<WeakView<Workspace>>, _workspace: Option<WeakView<Workspace>>,
_cx: &mut AppContext, cx: &mut AppContext,
) -> Task<Result<Vec<String>>> { ) -> Task<Result<Vec<String>>> {
Task::ready(Ok(Vec::new())) let store = RustdocStore::global(cx);
cx.background_executor().spawn(async move {
let items = store.search(query).await;
Ok(items
.into_iter()
.map(|(crate_name, item)| format!("{crate_name}::{}", item.display()))
.collect())
})
} }
fn run( fn run(
@ -140,7 +148,67 @@ impl SlashCommand for RustdocSlashCommand {
let project = workspace.read(cx).project().clone(); let project = workspace.read(cx).project().clone();
let fs = project.read(cx).fs().clone(); let fs = project.read(cx).fs().clone();
let http_client = workspace.read(cx).client().http_client(); let http_client = workspace.read(cx).client().http_client();
let mut path_components = argument.split("::"); let path_to_cargo_toml = Self::path_to_cargo_toml(project, cx);
let mut item_path = String::new();
let mut crate_name_to_index = None;
let mut args = argument.split(' ').map(|word| word.trim());
while let Some(arg) = args.next() {
if arg == "--index" {
let Some(crate_name) = args.next() else {
return Task::ready(Err(anyhow!("no crate name provided to --index")));
};
crate_name_to_index = Some(crate_name.to_string());
continue;
}
item_path.push_str(arg);
}
if let Some(crate_name_to_index) = crate_name_to_index {
let index_task = cx.background_executor().spawn({
let rustdoc_store = RustdocStore::global(cx);
let fs = fs.clone();
let crate_name_to_index = crate_name_to_index.clone();
async move {
let cargo_workspace_root = path_to_cargo_toml
.and_then(|path| path.parent().map(|path| path.to_path_buf()))
.ok_or_else(|| anyhow!("no Cargo workspace root found"))?;
let provider = Box::new(LocalProvider::new(fs, cargo_workspace_root));
rustdoc_store
.index(crate_name_to_index.clone(), provider)
.await?;
anyhow::Ok(format!("Indexed {crate_name_to_index}"))
}
});
return cx.foreground_executor().spawn(async move {
let text = index_task.await?;
let range = 0..text.len();
Ok(SlashCommandOutput {
text,
sections: vec![SlashCommandOutputSection {
range,
render_placeholder: Arc::new(move |id, unfold, _cx| {
RustdocIndexPlaceholder {
id,
unfold,
source: RustdocSource::Local,
crate_name: SharedString::from(crate_name_to_index.clone()),
}
.into_any_element()
}),
}],
run_commands_in_text: false,
})
});
}
let mut path_components = item_path.split("::");
let crate_name = match path_components let crate_name = match path_components
.next() .next()
.ok_or_else(|| anyhow!("missing crate name")) .ok_or_else(|| anyhow!("missing crate name"))
@ -148,29 +216,37 @@ impl SlashCommand for RustdocSlashCommand {
Ok(crate_name) => crate_name.to_string(), Ok(crate_name) => crate_name.to_string(),
Err(err) => return Task::ready(Err(err)), Err(err) => return Task::ready(Err(err)),
}; };
let module_path = path_components.map(ToString::to_string).collect::<Vec<_>>(); let item_path = path_components.map(ToString::to_string).collect::<Vec<_>>();
let path_to_cargo_toml = Self::path_to_cargo_toml(project, cx);
let text = cx.background_executor().spawn({ let text = cx.background_executor().spawn({
let rustdoc_store = RustdocStore::global(cx);
let crate_name = crate_name.clone(); let crate_name = crate_name.clone();
let module_path = module_path.clone(); let item_path = item_path.clone();
async move { async move {
Self::build_message( let item_docs = rustdoc_store
fs, .load(crate_name.clone(), Some(item_path.join("::")))
http_client, .await;
crate_name,
module_path, if let Ok(item_docs) = item_docs {
path_to_cargo_toml.as_deref(), anyhow::Ok((RustdocSource::Local, item_docs))
) } else {
.await Self::build_message(
fs,
http_client,
crate_name,
item_path,
path_to_cargo_toml.as_deref(),
)
.await
}
} }
}); });
let crate_name = SharedString::from(crate_name); let crate_name = SharedString::from(crate_name);
let module_path = if module_path.is_empty() { let module_path = if item_path.is_empty() {
None None
} else { } else {
Some(SharedString::from(module_path.join("::"))) Some(SharedString::from(item_path.join("::")))
}; };
cx.foreground_executor().spawn(async move { cx.foreground_executor().spawn(async move {
let (source, text) = text.await?; let (source, text) = text.await?;
@ -228,3 +304,31 @@ impl RenderOnce for RustdocPlaceholder {
.on_click(move |_, cx| unfold(cx)) .on_click(move |_, cx| unfold(cx))
} }
} }
#[derive(IntoElement)]
struct RustdocIndexPlaceholder {
pub id: ElementId,
pub unfold: Arc<dyn Fn(&mut WindowContext)>,
pub source: RustdocSource,
pub crate_name: SharedString,
}
impl RenderOnce for RustdocIndexPlaceholder {
fn render(self, _cx: &mut WindowContext) -> impl IntoElement {
let unfold = self.unfold;
ButtonLike::new(self.id)
.style(ButtonStyle::Filled)
.layer(ElevationIndex::ElevatedSurface)
.child(Icon::new(IconName::FileRust))
.child(Label::new(format!(
"rustdoc index ({source}): {crate_name}",
crate_name = self.crate_name,
source = match self.source {
RustdocSource::Local => "local",
RustdocSource::DocsDotRs => "docs.rs",
}
)))
.on_click(move |_, cx| unfold(cx))
}
}

View File

@ -17,9 +17,12 @@ async-trait.workspace = true
collections.workspace = true collections.workspace = true
fs.workspace = true fs.workspace = true
futures.workspace = true futures.workspace = true
fuzzy.workspace = true
gpui.workspace = true
html_to_markdown.workspace = true html_to_markdown.workspace = true
http.workspace = true http.workspace = true
indexmap.workspace = true indexmap.workspace = true
parking_lot.workspace = true
strum.workspace = true strum.workspace = true
[dev-dependencies] [dev-dependencies]

View File

@ -7,6 +7,7 @@ use collections::{HashSet, VecDeque};
use fs::Fs; use fs::Fs;
use futures::AsyncReadExt; use futures::AsyncReadExt;
use http::{AsyncBody, HttpClient, HttpClientWithUrl}; use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use indexmap::IndexMap;
use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind}; use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind};
@ -51,11 +52,12 @@ impl RustdocProvider for LocalProvider {
let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc"); let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
local_cargo_doc_path.push(&crate_name); local_cargo_doc_path.push(&crate_name);
if let Some(item) = item { if let Some(item) = item {
if !item.path.is_empty() { local_cargo_doc_path.push(item.url_path());
local_cargo_doc_path.push(item.path.join("/")); } else {
} local_cargo_doc_path.push("index.html");
} }
local_cargo_doc_path.push("index.html");
println!("Fetching {}", local_cargo_doc_path.display());
let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else { let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
return Ok(None); return Ok(None);
@ -120,12 +122,18 @@ impl RustdocProvider for DocsDotRsProvider {
} }
} }
pub struct RustdocItemWithHistory { #[derive(Debug)]
struct RustdocItemWithHistory {
pub item: RustdocItem, pub item: RustdocItem,
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
pub history: Vec<String>, pub history: Vec<String>,
} }
pub struct CrateDocs {
pub crate_root_markdown: String,
pub items: IndexMap<RustdocItem, String>,
}
pub struct RustdocCrawler { pub struct RustdocCrawler {
provider: Box<dyn RustdocProvider + Send + Sync + 'static>, provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
} }
@ -135,14 +143,16 @@ impl RustdocCrawler {
Self { provider } Self { provider }
} }
pub async fn crawl(&self, crate_name: String) -> Result<Option<String>> { pub async fn crawl(&self, crate_name: String) -> Result<Option<CrateDocs>> {
let Some(crate_index_content) = self.provider.fetch_page(&crate_name, None).await? else { let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
return Ok(None); return Ok(None);
}; };
let (_markdown, items) = convert_rustdoc_to_markdown(crate_index_content.as_bytes())?; let (crate_root_markdown, items) =
convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
let mut seen_items = HashSet::default(); let mut docs_by_item = IndexMap::new();
let mut seen_items = HashSet::from_iter(items.clone());
let mut items_to_visit: VecDeque<RustdocItemWithHistory> = let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory { VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
item, item,
@ -152,6 +162,7 @@ impl RustdocCrawler {
while let Some(item_with_history) = items_to_visit.pop_front() { while let Some(item_with_history) = items_to_visit.pop_front() {
let item = &item_with_history.item; let item = &item_with_history.item;
println!("Visiting {:?} {:?} {}", &item.kind, &item.path, &item.name); println!("Visiting {:?} {:?} {}", &item.kind, &item.path, &item.name);
let Some(result) = self let Some(result) = self
@ -176,23 +187,27 @@ impl RustdocCrawler {
continue; continue;
}; };
let (_markdown, mut items) = convert_rustdoc_to_markdown(result.as_bytes())?; let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
seen_items.insert(item.clone()); docs_by_item.insert(item.clone(), markdown);
for child in &mut items { let parent_item = item;
child.path.extend(item.path.clone()); for mut item in referenced_items {
match item.kind { if seen_items.contains(&item) {
continue;
}
seen_items.insert(item.clone());
item.path.extend(parent_item.path.clone());
match parent_item.kind {
RustdocItemKind::Mod => { RustdocItemKind::Mod => {
child.path.push(item.name.clone()); item.path.push(parent_item.name.clone());
} }
_ => {} _ => {}
} }
}
let unseen_items = items items_to_visit.push_back(RustdocItemWithHistory {
.into_iter()
.map(|item| RustdocItemWithHistory {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
history: { history: {
let mut history = item_with_history.history.clone(); let mut history = item_with_history.history.clone();
@ -200,12 +215,13 @@ impl RustdocCrawler {
history history
}, },
item, item,
}) });
.filter(|item| !seen_items.contains(&item.item)); }
items_to_visit.extend(unseen_items);
} }
Ok(Some(String::new())) Ok(Some(CrateDocs {
crate_root_markdown,
items: docs_by_item,
}))
} }
} }

View File

@ -43,6 +43,13 @@ pub struct RustdocItem {
} }
impl RustdocItem { impl RustdocItem {
pub fn display(&self) -> String {
let mut path_segments = self.path.clone();
path_segments.push(self.name.clone());
path_segments.join("::")
}
pub fn url_path(&self) -> String { pub fn url_path(&self) -> String {
let name = &self.name; let name = &self.name;
let mut path_components = self.path.clone(); let mut path_components = self.path.clone();

View File

@ -1,6 +1,8 @@
pub mod crawler; pub mod crawler;
mod item; mod item;
mod store;
mod to_markdown; mod to_markdown;
pub use crate::item::*; pub use crate::item::*;
pub use crate::store::*;
pub use crate::to_markdown::convert_rustdoc_to_markdown; pub use crate::to_markdown::convert_rustdoc_to_markdown;

116
crates/rustdoc/src/store.rs Normal file
View File

@ -0,0 +1,116 @@
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use anyhow::{anyhow, Result};
use collections::HashMap;
use fuzzy::StringMatchCandidate;
use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal};
use parking_lot::RwLock;
use crate::crawler::{RustdocCrawler, RustdocProvider};
use crate::RustdocItem;
struct GlobalRustdocStore(Arc<RustdocStore>);
impl Global for GlobalRustdocStore {}
pub struct RustdocStore {
executor: BackgroundExecutor,
docs: Arc<RwLock<HashMap<(String, RustdocItem), String>>>,
}
impl RustdocStore {
pub fn global(cx: &AppContext) -> Arc<Self> {
GlobalRustdocStore::global(cx).0.clone()
}
pub fn init_global(cx: &mut AppContext) {
GlobalRustdocStore::set_global(
cx,
GlobalRustdocStore(Arc::new(Self::new(cx.background_executor().clone()))),
);
}
pub fn new(executor: BackgroundExecutor) -> Self {
Self {
executor,
docs: Arc::new(RwLock::new(HashMap::default())),
}
}
pub fn load(&self, crate_name: String, item_path: Option<String>) -> Task<Result<String>> {
let item_docs = self
.docs
.read()
.iter()
.find_map(|((item_crate_name, item), item_docs)| {
if item_crate_name == &crate_name && item_path == Some(item.display()) {
Some(item_docs.clone())
} else {
None
}
});
Task::ready(item_docs.ok_or_else(|| anyhow!("no docs found")))
}
pub fn index(
&self,
crate_name: String,
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
) -> Task<Result<()>> {
let docs = self.docs.clone();
self.executor.spawn(async move {
let crawler = RustdocCrawler::new(provider);
println!("Indexing {crate_name}");
let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else {
return Ok(());
};
let mut lock = docs.write();
for (item, item_docs) in crate_docs.items {
lock.insert((crate_name.clone(), item), item_docs);
}
Ok(())
})
}
pub fn search(&self, query: String) -> Task<Vec<(String, RustdocItem)>> {
let executor = self.executor.clone();
let docs = self.docs.read().clone();
self.executor.spawn(async move {
if query.is_empty() {
return Vec::new();
}
let items = docs.keys().collect::<Vec<_>>();
let candidates = items
.iter()
.enumerate()
.map(|(ix, (crate_name, item))| {
StringMatchCandidate::new(ix, format!("{crate_name}::{}", item.display()))
})
.collect::<Vec<_>>();
let matches = fuzzy::match_strings(
&candidates,
&query,
false,
100,
&AtomicBool::default(),
executor,
)
.await;
matches
.into_iter()
.map(|mat| items[mat.candidate_id].clone())
.collect()
})
}
}