mirror of
https://github.com/SilasMarvin/lsp-ai.git
synced 2024-10-26 11:53:46 +03:00
Made into a workspace
This commit is contained in:
parent
58192c4182
commit
f2b8c1eda3
103
Cargo.lock
generated
103
Cargo.lock
generated
@ -149,6 +149,18 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "auto_enums"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1899bfcfd9340ceea3533ea157360ba8fa864354eccbceab58e1006ecab35393"
|
||||
dependencies = [
|
||||
"derive_utils",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
@ -356,7 +368,7 @@ version = "4.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.4.1",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
@ -662,6 +674,17 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_utils"
|
||||
version = "0.14.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61bb5a1014ce6dfc2a378578509abe775a5aa06bff584a547555d9efdb81b926"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "difflib"
|
||||
version = "0.4.0"
|
||||
@ -730,9 +753,9 @@ checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.10.0"
|
||||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
|
||||
checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
@ -1056,6 +1079,12 @@ dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.9"
|
||||
@ -1364,6 +1393,15 @@ dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.10"
|
||||
@ -1518,7 +1556,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lsp-ai"
|
||||
version = "0.3.0"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
@ -1541,6 +1579,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"splitter-tree-sitter",
|
||||
"text-splitter",
|
||||
"tokenizers",
|
||||
"tokio",
|
||||
"tracing",
|
||||
@ -2419,6 +2458,12 @@ dependencies = [
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.17"
|
||||
@ -2479,7 +2524,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "878cf3d57f0e5bfacd425cdaccc58b4c06d68a7b71c63fc28710a20c88676808"
|
||||
dependencies = [
|
||||
"darling 0.14.4",
|
||||
"heck",
|
||||
"heck 0.4.1",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
@ -2502,7 +2547,7 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "25a82fcb49253abcb45cdcb2adf92956060ec0928635eb21b4f7a6d8f25ab0bc"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.4.1",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
@ -2767,6 +2812,8 @@ dependencies = [
|
||||
"cc",
|
||||
"thiserror",
|
||||
"tree-sitter",
|
||||
"tree-sitter-rust",
|
||||
"tree-sitter-zig",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2870,7 +2917,7 @@ checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8"
|
||||
dependencies = [
|
||||
"dotenvy",
|
||||
"either",
|
||||
"heck",
|
||||
"heck 0.4.1",
|
||||
"hex",
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
@ -3026,6 +3073,28 @@ version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.26.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.26.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.5.0"
|
||||
@ -3099,6 +3168,24 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
||||
|
||||
[[package]]
|
||||
name = "text-splitter"
|
||||
version = "0.13.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ab9dc04b7cf08eb01c07c272bf699fa55679a326ddf7dd075e14094efc80fb9"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"auto_enums",
|
||||
"either",
|
||||
"itertools 0.13.0",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"strum",
|
||||
"thiserror",
|
||||
"tree-sitter",
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.61"
|
||||
@ -3385,7 +3472,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tree-sitter-zig"
|
||||
version = "0.0.1"
|
||||
source = "git+https://github.com/SilasMarvin/tree-sitter-zig?branch=silas-update-tree-sitter-version#2eedab3ff6dda88aedddf0bb32a14f81bb709a73"
|
||||
source = "git+https://github.com/maxxnino/tree-sitter-zig#7c5a29b721d409be8842017351bf007d7e384401"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
|
56
Cargo.toml
56
Cargo.toml
@ -1,52 +1,16 @@
|
||||
[package]
|
||||
name = "lsp-ai"
|
||||
version = "0.3.0"
|
||||
[workspace]
|
||||
members = [
|
||||
"crates/*",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
description = "LSP-AI is an open-source language server that serves as a backend for AI-powered functionality, designed to assist and empower software engineers, not replace them."
|
||||
repository = "https://github.com/SilasMarvin/lsp-ai"
|
||||
readme = "README.md"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.75"
|
||||
lsp-server = "0.7.6"
|
||||
lsp-types = "0.95.0"
|
||||
ropey = "1.6.1"
|
||||
serde = "1.0.190"
|
||||
serde_json = "1.0.108"
|
||||
hf-hub = { git = "https://github.com/huggingface/hf-hub", version = "0.3.2" }
|
||||
rand = "0.8.5"
|
||||
tokenizers = "0.14.1"
|
||||
parking_lot = "0.12.1"
|
||||
once_cell = "1.19.0"
|
||||
directories = "5.0.1"
|
||||
llama-cpp-2 = { version = "0.1.55", optional = true }
|
||||
minijinja = { version = "1.0.12", features = ["loader"] }
|
||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||
tracing = "0.1.40"
|
||||
xxhash-rust = { version = "0.8.5", features = ["xxh3"] }
|
||||
reqwest = { version = "0.11.25", features = ["blocking", "json"] }
|
||||
ignore = "0.4.22"
|
||||
pgml = "1.0.4"
|
||||
tokio = { version = "1.36.0", features = ["rt-multi-thread", "time"] }
|
||||
indexmap = "2.2.5"
|
||||
async-trait = "0.1.78"
|
||||
tree-sitter = "0.22"
|
||||
# splitter-tree-sitter = { git = "https://github.com/SilasMarvin/splitter-tree-sitter" }
|
||||
splitter-tree-sitter = { path = "../../splitter-tree-sitter" }
|
||||
# utils-tree-sitter = { git = "https://github.com/SilasMarvin/utils-tree-sitter" }
|
||||
utils-tree-sitter = { path = "../../utils-tree-sitter", features = ["all"] }
|
||||
|
||||
[build-dependencies]
|
||||
cc="*"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
llama_cpp = ["dep:llama-cpp-2"]
|
||||
metal = ["llama-cpp-2/metal"]
|
||||
cuda = ["llama-cpp-2/cuda"]
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "2.0.14"
|
||||
[workspace.dependencies]
|
||||
utils-tree-sitter = { path = "./crates/utils-tree-sitter" }
|
||||
splitter-tree-sitter = { path = "./crates/splitter-tree-sitter" }
|
||||
|
50
crates/lsp-ai/Cargo.toml
Normal file
50
crates/lsp-ai/Cargo.toml
Normal file
@ -0,0 +1,50 @@
|
||||
[package]
|
||||
name = "lsp-ai"
|
||||
version = "0.2.0"
|
||||
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
readme.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.75"
|
||||
lsp-server = "0.7.6"
|
||||
lsp-types = "0.95.0"
|
||||
ropey = "1.6.1"
|
||||
serde = "1.0.190"
|
||||
serde_json = "1.0.108"
|
||||
hf-hub = { git = "https://github.com/huggingface/hf-hub", version = "0.3.2" }
|
||||
rand = "0.8.5"
|
||||
tokenizers = "0.14.1"
|
||||
parking_lot = "0.12.1"
|
||||
once_cell = "1.19.0"
|
||||
directories = "5.0.1"
|
||||
llama-cpp-2 = { version = "0.1.55", optional = true }
|
||||
minijinja = { version = "1.0.12", features = ["loader"] }
|
||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||
tracing = "0.1.40"
|
||||
xxhash-rust = { version = "0.8.5", features = ["xxh3"] }
|
||||
reqwest = { version = "0.11.25", features = ["blocking", "json"] }
|
||||
ignore = "0.4.22"
|
||||
pgml = "1.0.4"
|
||||
tokio = { version = "1.36.0", features = ["rt-multi-thread", "time"] }
|
||||
indexmap = "2.2.5"
|
||||
async-trait = "0.1.78"
|
||||
tree-sitter = "0.22"
|
||||
utils-tree-sitter = { workspace = true, features = ["all"] }
|
||||
splitter-tree-sitter = { workspace = true }
|
||||
text-splitter = { version = "0.13.3", features = ["code"] }
|
||||
|
||||
[build-dependencies]
|
||||
cc="*"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
llama_cpp = ["dep:llama-cpp-2"]
|
||||
metal = ["llama-cpp-2/metal"]
|
||||
cuda = ["llama-cpp-2/cuda"]
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "2.0.14"
|
@ -1,5 +1,6 @@
|
||||
use ignore::WalkBuilder;
|
||||
use std::collections::HashSet;
|
||||
use tracing::{error, instrument};
|
||||
|
||||
use crate::config::{self, Config};
|
||||
|
||||
@ -18,14 +19,11 @@ impl Crawl {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn crawl_config(&self) -> &config::Crawl {
|
||||
&self.crawl_config
|
||||
}
|
||||
|
||||
#[instrument(skip(self, f))]
|
||||
pub fn maybe_do_crawl(
|
||||
&mut self,
|
||||
triggered_file: Option<String>,
|
||||
mut f: impl FnMut(&config::Crawl, &str) -> anyhow::Result<()>,
|
||||
mut f: impl FnMut(&config::Crawl, &str) -> anyhow::Result<bool>,
|
||||
) -> anyhow::Result<()> {
|
||||
if let Some(root_uri) = &self.config.client_params.root_uri {
|
||||
if !root_uri.starts_with("file://") {
|
||||
@ -56,7 +54,14 @@ impl Crawl {
|
||||
if !path.is_dir() {
|
||||
if let Some(path_str) = path.to_str() {
|
||||
if self.crawl_config.all_files {
|
||||
f(&self.crawl_config, path_str)?;
|
||||
match f(&self.crawl_config, path_str) {
|
||||
Ok(c) => {
|
||||
if !c {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(e) => error!("{e:?}"),
|
||||
}
|
||||
} else {
|
||||
match (
|
||||
path.extension().map(|pe| pe.to_str()).flatten(),
|
||||
@ -64,7 +69,14 @@ impl Crawl {
|
||||
) {
|
||||
(Some(path_extension), Some(extension_to_match)) => {
|
||||
if path_extension == extension_to_match {
|
||||
f(&self.crawl_config, path_str)?;
|
||||
match f(&self.crawl_config, path_str) {
|
||||
Ok(c) => {
|
||||
if !c {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(e) => error!("{e:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => continue,
|
@ -4,8 +4,8 @@ use lsp_types::TextDocumentPositionParams;
|
||||
use parking_lot::Mutex;
|
||||
use ropey::Rope;
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use tracing::{error, instrument};
|
||||
use std::{collections::HashMap, io::Read};
|
||||
use tracing::{error, instrument, warn};
|
||||
use tree_sitter::{InputEdit, Point, Tree};
|
||||
|
||||
use crate::{
|
||||
@ -114,18 +114,37 @@ impl FileStore {
|
||||
}
|
||||
|
||||
fn maybe_do_crawl(&self, triggered_file: Option<String>) -> anyhow::Result<()> {
|
||||
let mut total_bytes = 0;
|
||||
let mut current_bytes = 0;
|
||||
if let Some(crawl) = &self.crawl {
|
||||
crawl
|
||||
.lock()
|
||||
.maybe_do_crawl(triggered_file, |config, path| {
|
||||
// Break if total bytes is over the max crawl memory
|
||||
if total_bytes as u64 >= config.max_crawl_memory {
|
||||
warn!("Ending crawl early due to `max_crawl_memory` resetraint");
|
||||
return Ok(false);
|
||||
}
|
||||
// This means it has been opened before
|
||||
let insert_uri = format!("file://{path}");
|
||||
if self.file_map.lock().contains_key(&insert_uri) {
|
||||
return Ok(());
|
||||
return Ok(true);
|
||||
}
|
||||
// TODO: actually limit files based on config
|
||||
let contents = std::fs::read_to_string(path)?;
|
||||
// Open the file and see if it is small enough to read
|
||||
let mut f = std::fs::File::open(path)?;
|
||||
let metadata = f.metadata()?;
|
||||
if metadata.len() > config.max_file_size {
|
||||
warn!("Skipping file: {path} because it is too large");
|
||||
return Ok(true);
|
||||
}
|
||||
// Read the file contents
|
||||
let mut contents = vec![];
|
||||
f.read_to_end(&mut contents)?;
|
||||
let contents = String::from_utf8(contents)?;
|
||||
current_bytes += contents.len();
|
||||
total_bytes += contents.len();
|
||||
self.add_new_file(&insert_uri, contents);
|
||||
Ok(())
|
||||
Ok(true)
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
@ -251,29 +251,31 @@ impl PostgresML {
|
||||
crawl
|
||||
.lock()
|
||||
.maybe_do_crawl(triggered_file, |config, path| {
|
||||
let uri = format!("file://{path}");
|
||||
// Break if total bytes is over the max crawl memory
|
||||
if total_bytes as u64 >= config.max_crawl_memory {
|
||||
warn!("Ending crawl early due to `max_crawl_memory` resetraint");
|
||||
return Ok(false);
|
||||
}
|
||||
// This means it has been opened before
|
||||
let uri = format!("file://{path}");
|
||||
if self.file_store.contains_file(&uri) {
|
||||
return Ok(());
|
||||
return Ok(true);
|
||||
}
|
||||
// Open the file and see if it is small enough to read
|
||||
let mut f = std::fs::File::open(path)?;
|
||||
if f.metadata()
|
||||
.map(|m| m.len() > config.max_file_size)
|
||||
.unwrap_or(true)
|
||||
{
|
||||
warn!("Skipping file because it is too large: {path}");
|
||||
return Ok(());
|
||||
let metadata = f.metadata()?;
|
||||
if metadata.len() > config.max_file_size {
|
||||
warn!("Skipping file: {path} because it is too large");
|
||||
return Ok(true);
|
||||
}
|
||||
// Read the file contents
|
||||
let mut contents = vec![];
|
||||
f.read_to_end(&mut contents);
|
||||
if let Ok(contents) = String::from_utf8(contents) {
|
||||
current_bytes += contents.len();
|
||||
total_bytes += contents.len();
|
||||
let chunks = self.splitter.split_file_contents(&uri, &contents);
|
||||
documents.push((uri, chunks));
|
||||
}
|
||||
f.read_to_end(&mut contents)?;
|
||||
let contents = String::from_utf8(contents)?;
|
||||
current_bytes += contents.len();
|
||||
total_bytes += contents.len();
|
||||
let chunks = self.splitter.split_file_contents(&uri, &contents);
|
||||
documents.push((uri, chunks));
|
||||
// If we have over 100 mega bytes of data do the upsert
|
||||
if current_bytes >= 100_000_000 || total_bytes as u64 >= config.max_crawl_memory
|
||||
{
|
||||
@ -305,12 +307,7 @@ impl PostgresML {
|
||||
current_bytes = 0;
|
||||
documents = vec![];
|
||||
}
|
||||
// Break if total bytes is over the max crawl memory
|
||||
if total_bytes as u64 >= config.max_crawl_memory {
|
||||
warn!("Ending crawl eraly do to max_crawl_memory");
|
||||
return Ok(());
|
||||
}
|
||||
Ok(())
|
||||
Ok(true)
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
@ -41,7 +41,7 @@ impl Splitter for TreeSitter {
|
||||
Ok(chunks) => chunks,
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to parse tree for file with error {e:?}. Falling back to default splitter.",
|
||||
"Failed to parse tree for file with error: {e:?}. Falling back to default splitter.",
|
||||
);
|
||||
todo!()
|
||||
}
|
||||
@ -57,14 +57,14 @@ impl Splitter for TreeSitter {
|
||||
Ok(chunks) => chunks,
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to parse tree for file: {uri} with error {e:?}. Falling back to default splitter.",
|
||||
"Failed to parse tree for file: {uri} with error: {e:?}. Falling back to default splitter.",
|
||||
);
|
||||
todo!()
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to parse tree for file {uri} with error {e:?}. Falling back to default splitter.",
|
||||
"Failed to parse tree for file {uri} with error: {e:?}. Falling back to default splitter.",
|
||||
);
|
||||
todo!()
|
||||
}
|
1
crates/splitter-tree-sitter
Submodule
1
crates/splitter-tree-sitter
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 37a2e98cce5a1b39f07aec7e5b3bc75eebb41ac2
|
1
crates/utils-tree-sitter
Submodule
1
crates/utils-tree-sitter
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit a38e7143bcab2412348fd92904cc5105117896a1
|
Loading…
Reference in New Issue
Block a user