WIP: started work on vector store db, by walking project worktrees.\n\nCo-Authored-By: Max <max@zed.dev>

This commit is contained in:
KCaverly 2023-06-21 14:53:08 -04:00
parent 6ed86781b2
commit 80a894b829
7 changed files with 208 additions and 0 deletions

15
Cargo.lock generated
View File

@ -7877,6 +7877,20 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "vector_store"
version = "0.1.0"
dependencies = [
"anyhow",
"futures 0.3.28",
"gpui",
"language",
"project",
"smol",
"util",
"workspace",
]
[[package]]
name = "version_check"
version = "0.9.4"
@ -8917,6 +8931,7 @@ dependencies = [
"urlencoding",
"util",
"uuid 1.3.2",
"vector_store",
"vim",
"welcome",
"workspace",

View File

@ -63,6 +63,7 @@ members = [
"crates/theme_selector",
"crates/theme_testbench",
"crates/util",
"crates/vector_store",
"crates/vim",
"crates/workspace",
"crates/welcome",

View File

@ -0,0 +1,25 @@
[package]
name = "vector_store"
version = "0.1.0"
edition = "2021"
publish = false
[lib]
path = "src/vector_store.rs"
doctest = false
[dependencies]
gpui = { path = "../gpui" }
language = { path = "../language" }
project = { path = "../project" }
workspace = { path = "../workspace" }
util = { path = "../util" }
anyhow.workspace = true
futures.workspace = true
smol.workspace = true
[dev-dependencies]
gpui = { path = "../gpui", features = ["test-support"] }
language = { path = "../language", features = ["test-support"] }
project = { path = "../project", features = ["test-support"] }
workspace = { path = "../workspace", features = ["test-support"] }

View File

@ -0,0 +1,31 @@
WIP: Sample SQL Queries
/*
create table "files" (
"id" INTEGER PRIMARY KEY,
"path" VARCHAR,
"sha1" VARCHAR,
);
create table symbols (
"file_id" INTEGER REFERENCES("files", "id") ON CASCADE DELETE,
"offset" INTEGER,
"embedding" VECTOR,
);
insert into "files" ("path", "sha1") values ("src/main.rs", "sha1") return id;
insert into symbols (
"file_id",
"start",
"end",
"embedding"
) values (
(id,),
(id,),
(id,),
(id,),
)
*/

View File

@ -0,0 +1,134 @@
use anyhow::{anyhow, Result};
use gpui::{AppContext, Entity, ModelContext, ModelHandle};
use language::LanguageRegistry;
use project::{Fs, Project};
use smol::channel;
use std::{path::PathBuf, sync::Arc};
use util::ResultExt;
use workspace::WorkspaceCreated;
pub fn init(fs: Arc<dyn Fs>, language_registry: Arc<LanguageRegistry>, cx: &mut AppContext) {
let vector_store = cx.add_model(|cx| VectorStore::new(fs, language_registry));
cx.subscribe_global::<WorkspaceCreated, _>({
let vector_store = vector_store.clone();
move |event, cx| {
let workspace = &event.0;
if let Some(workspace) = workspace.upgrade(cx) {
let project = workspace.read(cx).project().clone();
if project.read(cx).is_local() {
vector_store.update(cx, |store, cx| {
store.add_project(project, cx);
});
}
}
}
})
.detach();
}
struct Document {
offset: usize,
name: String,
embedding: Vec<f32>,
}
struct IndexedFile {
path: PathBuf,
sha1: String,
documents: Vec<Document>,
}
struct SearchResult {
path: PathBuf,
offset: usize,
name: String,
distance: f32,
}
struct VectorStore {
fs: Arc<dyn Fs>,
language_registry: Arc<LanguageRegistry>,
}
impl VectorStore {
fn new(fs: Arc<dyn Fs>, language_registry: Arc<LanguageRegistry>) -> Self {
Self {
fs,
language_registry,
}
}
async fn index_file(
fs: &Arc<dyn Fs>,
language_registry: &Arc<LanguageRegistry>,
file_path: PathBuf,
) -> Result<IndexedFile> {
eprintln!("indexing file {file_path:?}");
Err(anyhow!("not implemented"))
// todo!();
}
fn add_project(&mut self, project: ModelHandle<Project>, cx: &mut ModelContext<Self>) {
let worktree_scans_complete = project
.read(cx)
.worktrees(cx)
.map(|worktree| worktree.read(cx).as_local().unwrap().scan_complete())
.collect::<Vec<_>>();
let fs = self.fs.clone();
let language_registry = self.language_registry.clone();
cx.spawn(|this, cx| async move {
futures::future::join_all(worktree_scans_complete).await;
let worktrees = project.read_with(&cx, |project, cx| {
project
.worktrees(cx)
.map(|worktree| worktree.read(cx).snapshot())
.collect::<Vec<_>>()
});
let (paths_tx, paths_rx) = channel::unbounded::<PathBuf>();
let (indexed_files_tx, indexed_files_rx) = channel::unbounded::<IndexedFile>();
cx.background()
.spawn(async move {
for worktree in worktrees {
for file in worktree.files(false, 0) {
paths_tx.try_send(worktree.absolutize(&file.path)).unwrap();
}
}
})
.detach();
cx.background()
.spawn(async move {
while let Ok(indexed_file) = indexed_files_rx.recv().await {
// write document to database
}
})
.detach();
cx.background()
.scoped(|scope| {
for _ in 0..cx.background().num_cpus() {
scope.spawn(async {
while let Ok(file_path) = paths_rx.recv().await {
if let Some(indexed_file) =
Self::index_file(&fs, &language_registry, file_path)
.await
.log_err()
{
indexed_files_tx.try_send(indexed_file).unwrap();
}
}
});
}
})
.await;
})
.detach();
}
}
impl Entity for VectorStore {
type Event = ();
}

View File

@ -64,6 +64,7 @@ theme = { path = "../theme" }
theme_selector = { path = "../theme_selector" }
theme_testbench = { path = "../theme_testbench" }
util = { path = "../util" }
vector_store = { path = "../vector_store" }
vim = { path = "../vim" }
workspace = { path = "../workspace" }
welcome = { path = "../welcome" }

View File

@ -152,6 +152,7 @@ fn main() {
project_panel::init(cx);
diagnostics::init(cx);
search::init(cx);
vector_store::init(fs.clone(), languages.clone(), cx);
vim::init(cx);
terminal_view::init(cx);
theme_testbench::init(cx);