rustdoc: Index crates progressively (#13011)

This PR updates the rustdoc indexing to be more progressive.

Rather than waiting until we've crawled the entire crate to begin
writing to the database, we instead start writing the docs as we go.

This makes it so you can start getting completions while the indexing is
still running.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-06-13 16:40:06 -04:00 committed by GitHub
parent 0705fb9b97
commit 6181ac6bad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 31 additions and 40 deletions

View File

@ -10,7 +10,7 @@ use gpui::{AppContext, Model, Task, WeakView};
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use language::LspAdapterDelegate;
use project::{Project, ProjectPath};
use rustdoc::crawler::LocalProvider;
use rustdoc::LocalProvider;
use rustdoc::{convert_rustdoc_to_markdown, RustdocStore};
use ui::{prelude::*, ButtonLike, ElevationIndex};
use workspace::Workspace;

View File

@ -7,9 +7,8 @@ use collections::{HashSet, VecDeque};
use fs::Fs;
use futures::AsyncReadExt;
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use indexmap::IndexMap;
use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind};
use crate::{convert_rustdoc_to_markdown, RustdocDatabase, RustdocItem, RustdocItemKind};
#[derive(Debug, Clone, Copy)]
pub enum RustdocSource {
@ -129,29 +128,32 @@ struct RustdocItemWithHistory {
pub history: Vec<String>,
}
pub struct CrateDocs {
pub crate_root_markdown: String,
pub items: IndexMap<RustdocItem, String>,
}
pub struct RustdocCrawler {
pub(crate) struct RustdocIndexer {
database: Arc<RustdocDatabase>,
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
}
impl RustdocCrawler {
pub fn new(provider: Box<dyn RustdocProvider + Send + Sync + 'static>) -> Self {
Self { provider }
impl RustdocIndexer {
pub fn new(
database: Arc<RustdocDatabase>,
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
) -> Self {
Self { database, provider }
}
pub async fn crawl(&self, crate_name: String) -> Result<Option<CrateDocs>> {
/// Indexes the crate with the given name.
pub async fn index(&self, crate_name: String) -> Result<()> {
let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
return Ok(None);
return Ok(());
};
let (crate_root_markdown, items) =
convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
let mut docs_by_item = IndexMap::new();
self.database
.insert(crate_name.clone(), None, crate_root_markdown)
.await?;
let mut seen_items = HashSet::from_iter(items.clone());
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
@ -189,7 +191,13 @@ impl RustdocCrawler {
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
docs_by_item.insert(item.clone(), markdown);
self.database
.insert(
format!("{crate_name}::{}", item.display()),
Some(item),
markdown,
)
.await?;
let parent_item = item;
for mut item in referenced_items {
@ -219,9 +227,6 @@ impl RustdocCrawler {
}
}
Ok(Some(CrateDocs {
crate_root_markdown,
items: docs_by_item,
}))
Ok(())
}
}

View File

@ -1,8 +1,9 @@
pub mod crawler;
mod indexer;
mod item;
mod store;
mod to_markdown;
pub use crate::indexer::{DocsDotRsProvider, LocalProvider, RustdocSource};
pub use crate::item::*;
pub use crate::store::*;
pub use crate::to_markdown::convert_rustdoc_to_markdown;

View File

@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize};
use util::paths::SUPPORT_DIR;
use util::ResultExt;
use crate::crawler::{RustdocCrawler, RustdocProvider};
use crate::indexer::{RustdocIndexer, RustdocProvider};
use crate::{RustdocItem, RustdocItemKind};
struct GlobalRustdocStore(Arc<RustdocStore>);
@ -75,25 +75,10 @@ impl RustdocStore {
) -> Task<Result<()>> {
let database_future = self.database_future.clone();
self.executor.spawn(async move {
let crawler = RustdocCrawler::new(provider);
let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else {
return Ok(());
};
let database = database_future.await.map_err(|err| anyhow!(err))?;
let indexer = RustdocIndexer::new(database, provider);
database
.insert(crate_name.clone(), None, crate_docs.crate_root_markdown)
.await?;
for (item, item_docs) in crate_docs.items {
database
.insert(crate_name.clone(), Some(&item), item_docs)
.await?;
}
Ok(())
indexer.index(crate_name.clone()).await
})
}
@ -151,7 +136,7 @@ impl RustdocDatabaseEntry {
}
}
struct RustdocDatabase {
pub(crate) struct RustdocDatabase {
executor: BackgroundExecutor,
env: heed::Env,
entries: Database<SerdeBincode<String>, SerdeBincode<RustdocDatabaseEntry>>,