From 80a894b82999d4e562a18800568c8f712a705e6e Mon Sep 17 00:00:00 2001 From: KCaverly Date: Wed, 21 Jun 2023 14:53:08 -0400 Subject: [PATCH] WIP: started work on vector store db, by walking project worktrees.\n\nCo-Authored-By: Max --- Cargo.lock | 15 +++ Cargo.toml | 1 + crates/vector_store/Cargo.toml | 25 +++++ crates/vector_store/README.md | 31 ++++++ crates/vector_store/src/vector_store.rs | 134 ++++++++++++++++++++++++ crates/zed/Cargo.toml | 1 + crates/zed/src/main.rs | 1 + 7 files changed, 208 insertions(+) create mode 100644 crates/vector_store/Cargo.toml create mode 100644 crates/vector_store/README.md create mode 100644 crates/vector_store/src/vector_store.rs diff --git a/Cargo.lock b/Cargo.lock index a4b12223e5..3bf0a568a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7877,6 +7877,20 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vector_store" +version = "0.1.0" +dependencies = [ + "anyhow", + "futures 0.3.28", + "gpui", + "language", + "project", + "smol", + "util", + "workspace", +] + [[package]] name = "version_check" version = "0.9.4" @@ -8917,6 +8931,7 @@ dependencies = [ "urlencoding", "util", "uuid 1.3.2", + "vector_store", "vim", "welcome", "workspace", diff --git a/Cargo.toml b/Cargo.toml index fca7355964..b1faf158df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,6 +63,7 @@ members = [ "crates/theme_selector", "crates/theme_testbench", "crates/util", + "crates/vector_store", "crates/vim", "crates/workspace", "crates/welcome", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml new file mode 100644 index 0000000000..c33a35bcad --- /dev/null +++ b/crates/vector_store/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "vector_store" +version = "0.1.0" +edition = "2021" +publish = false + +[lib] +path = "src/vector_store.rs" +doctest = false + +[dependencies] +gpui = { path = "../gpui" } +language = { path = "../language" } +project = { path = "../project" } +workspace = { path = "../workspace" } +util = { path = "../util" } +anyhow.workspace = true +futures.workspace = true +smol.workspace = true + +[dev-dependencies] +gpui = { path = "../gpui", features = ["test-support"] } +language = { path = "../language", features = ["test-support"] } +project = { path = "../project", features = ["test-support"] } +workspace = { path = "../workspace", features = ["test-support"] } diff --git a/crates/vector_store/README.md b/crates/vector_store/README.md new file mode 100644 index 0000000000..86e68dc414 --- /dev/null +++ b/crates/vector_store/README.md @@ -0,0 +1,31 @@ + +WIP: Sample SQL Queries +/* + +create table "files" ( +"id" INTEGER PRIMARY KEY, +"path" VARCHAR, +"sha1" VARCHAR, +); + +create table symbols ( +"file_id" INTEGER REFERENCES("files", "id") ON CASCADE DELETE, +"offset" INTEGER, +"embedding" VECTOR, +); + +insert into "files" ("path", "sha1") values ("src/main.rs", "sha1") return id; +insert into symbols ( +"file_id", +"start", +"end", +"embedding" +) values ( +(id,), +(id,), +(id,), +(id,), +) + + +*/ diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs new file mode 100644 index 0000000000..1556df7ebe --- /dev/null +++ b/crates/vector_store/src/vector_store.rs @@ -0,0 +1,134 @@ +use anyhow::{anyhow, Result}; +use gpui::{AppContext, Entity, ModelContext, ModelHandle}; +use language::LanguageRegistry; +use project::{Fs, Project}; +use smol::channel; +use std::{path::PathBuf, sync::Arc}; +use util::ResultExt; +use workspace::WorkspaceCreated; + +pub fn init(fs: Arc, language_registry: Arc, cx: &mut AppContext) { + let vector_store = cx.add_model(|cx| VectorStore::new(fs, language_registry)); + + cx.subscribe_global::({ + let vector_store = vector_store.clone(); + move |event, cx| { + let workspace = &event.0; + if let Some(workspace) = workspace.upgrade(cx) { + let project = workspace.read(cx).project().clone(); + if project.read(cx).is_local() { + vector_store.update(cx, |store, cx| { + store.add_project(project, cx); + }); + } + } + } + }) + .detach(); +} + +struct Document { + offset: usize, + name: String, + embedding: Vec, +} + +struct IndexedFile { + path: PathBuf, + sha1: String, + documents: Vec, +} + +struct SearchResult { + path: PathBuf, + offset: usize, + name: String, + distance: f32, +} + +struct VectorStore { + fs: Arc, + language_registry: Arc, +} + +impl VectorStore { + fn new(fs: Arc, language_registry: Arc) -> Self { + Self { + fs, + language_registry, + } + } + + async fn index_file( + fs: &Arc, + language_registry: &Arc, + file_path: PathBuf, + ) -> Result { + eprintln!("indexing file {file_path:?}"); + Err(anyhow!("not implemented")) + // todo!(); + } + + fn add_project(&mut self, project: ModelHandle, cx: &mut ModelContext) { + let worktree_scans_complete = project + .read(cx) + .worktrees(cx) + .map(|worktree| worktree.read(cx).as_local().unwrap().scan_complete()) + .collect::>(); + + let fs = self.fs.clone(); + let language_registry = self.language_registry.clone(); + + cx.spawn(|this, cx| async move { + futures::future::join_all(worktree_scans_complete).await; + + let worktrees = project.read_with(&cx, |project, cx| { + project + .worktrees(cx) + .map(|worktree| worktree.read(cx).snapshot()) + .collect::>() + }); + + let (paths_tx, paths_rx) = channel::unbounded::(); + let (indexed_files_tx, indexed_files_rx) = channel::unbounded::(); + cx.background() + .spawn(async move { + for worktree in worktrees { + for file in worktree.files(false, 0) { + paths_tx.try_send(worktree.absolutize(&file.path)).unwrap(); + } + } + }) + .detach(); + cx.background() + .spawn(async move { + while let Ok(indexed_file) = indexed_files_rx.recv().await { + // write document to database + } + }) + .detach(); + cx.background() + .scoped(|scope| { + for _ in 0..cx.background().num_cpus() { + scope.spawn(async { + while let Ok(file_path) = paths_rx.recv().await { + if let Some(indexed_file) = + Self::index_file(&fs, &language_registry, file_path) + .await + .log_err() + { + indexed_files_tx.try_send(indexed_file).unwrap(); + } + } + }); + } + }) + .await; + }) + .detach(); + } +} + +impl Entity for VectorStore { + type Event = (); +} diff --git a/crates/zed/Cargo.toml b/crates/zed/Cargo.toml index d8e47d1c3e..26e27a9193 100644 --- a/crates/zed/Cargo.toml +++ b/crates/zed/Cargo.toml @@ -64,6 +64,7 @@ theme = { path = "../theme" } theme_selector = { path = "../theme_selector" } theme_testbench = { path = "../theme_testbench" } util = { path = "../util" } +vector_store = { path = "../vector_store" } vim = { path = "../vim" } workspace = { path = "../workspace" } welcome = { path = "../welcome" } diff --git a/crates/zed/src/main.rs b/crates/zed/src/main.rs index dcdf5c1ea5..76d02307f6 100644 --- a/crates/zed/src/main.rs +++ b/crates/zed/src/main.rs @@ -152,6 +152,7 @@ fn main() { project_panel::init(cx); diagnostics::init(cx); search::init(cx); + vector_store::init(fs.clone(), languages.clone(), cx); vim::init(cx); terminal_view::init(cx); theme_testbench::init(cx);