Some more tests and vscode plugin cleanup

2024-10-05 16:17:09 +03:00 · 2024-05-27 09:49:43 -07:00 · 2024-05-27 09:49:43 -07:00 · ffbafe39c8
commit ffbafe39c8
parent dbb01d67bb
13 changed files with 2520 additions and 62 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,4 +2,5 @@
 /models
 node_modules
 out
+dist
 lsp-ai.log
--- a/.vscode/task.json
+++ b/.vscode/task.json
@ -5,7 +5,7 @@
      "label": "Build Extension",
      "group": "build",
      "type": "npm",
-      "script": "build",
+      "script": "esbuild",
      "path": "editors/vscode/",
      "problemMatcher": {
        "base": "$tsc-watch",
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@ -5,7 +5,7 @@
 			"label": "Build Extension",
 			"group": "build",
 			"type": "npm",
-			"script": "build",
+			"script": "esbuild",
 			"path": "editors/vscode/",
 			"problemMatcher": {
 			  "base": "$tsc-watch",
@ -14,4 +14,4 @@
 			"isBackground": true
 		}
 	]
-}
+}
--- a/Cargo.lock
+++ b/Cargo.lock
@ -266,12 +266,13 @@ checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"

 [[package]]
 name = "cc"
-version = "1.0.90"
+version = "1.0.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
+checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f"
 dependencies = [
 "jobserver",
 "libc",
+ "once_cell",
 ]

 [[package]]
@ -1310,9 +1311,9 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"

 [[package]]
 name = "jobserver"
-version = "0.1.28"
+version = "0.1.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
+checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
 dependencies = [
 "libc",
 ]
@ -1409,9 +1410,7 @@ checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"

 [[package]]
 name = "llama-cpp-2"
-version = "0.1.47"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f391a790923a78bbe6100824124492c7df3d17b26340424eb813b88a521707a3"
+version = "0.1.52"
 dependencies = [
 "llama-cpp-sys-2",
 "thiserror",
@ -1420,9 +1419,7 @@ dependencies = [

 [[package]]
 name = "llama-cpp-sys-2"
-version = "0.1.47"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1f26aac755fc36d5cc19f0853c4d359db8c4e4e5944705a27c7cce5e2cb9c36"
+version = "0.1.52"
 dependencies = [
 "bindgen",
 "cc",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -18,7 +18,8 @@ tokenizers = "0.14.1"
 parking_lot = "0.12.1"
 once_cell = "1.19.0"
 directories = "5.0.1"
-llama-cpp-2 = { version = "0.1.47", optional = true }
+# llama-cpp-2 = { version = "0.1.47", optional = true }
+llama-cpp-2 = { path = "../llama-cpp-rs/llama-cpp-2", optional = true }
 minijinja = { version = "1.0.12", features = ["loader"] }
 tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
 tracing = "0.1.40"
@ -31,7 +32,7 @@ indexmap = "2.2.5"
 async-trait = "0.1.78"

 [features]
-default = []
+default = ["llamacpp"]
 llamacpp = ["dep:llama-cpp-2"]
 cublas = ["llama-cpp-2/cublas"]

--- a/README.md
+++ b/README.md
@ -10,4 +10,3 @@ A short list of a few of the editors it works with:
 - Sublime

 It works with many many many more editors.
-
--- a/editors/vscode/.vscodeignore
+++ b/editors/vscode/.vscodeignore
@ -0,0 +1,6 @@
+.vscode
+node_modules
+out/
+src/
+tsconfig.json
+webpack.config.js
--- a/editors/vscode/package-lock.json
+++ b/editors/vscode/package-lock.json
--- a/editors/vscode/package.json
+++ b/editors/vscode/package.json
@ -2,9 +2,14 @@
  "name": "lsp-ai",
  "version": "0.1.0",
  "description": "",
-  "main": "/out/index.js",
+  "repository": "https://github.com/SilasMarvin/lsp-ai",
+  "main": "/dist/index.js",
  "scripts": {
-    "build": "npx tsc"
+    "vscode:prepublish": "npm run esbuild-base -- --minify",
+    "esbuild-base": "esbuild ./src/index.ts --bundle --outfile=dist/index.js --external:vscode --format=cjs --platform=node",
+    "esbuild": "npm run esbuild-base -- --sourcemap",
+    "esbuild-watch": "npm run esbuild-base -- --sourcemap --watch",
+    "test-compile": "tsc -p ./"
  },
  "author": "",
  "license": "MIT",
@ -51,30 +56,85 @@
              "messages": [
                {
                  "role": "system",
-                  "content": "You are a code completion tool. Use the [CONTEXT] and [CURRENT_CODE] provided to replace the <CURSOR> with the correct code. Do not reply with anything but valid code"
+                  "content": "Instructions:\n- You are an AI programming assistant.\n- Given a piece of code with the cursor location marked by \"<CURSOR>\", replace \"<CURSOR>\" with the correct code or comment.\n- First, think step-by-step.\n- Describe your plan for what to build in pseudocode, written out in great detail.\n- Then output the code replacing the \"<CURSOR>\".\n- Ensure that your completion fits within the language context of the provided code snippet.\n\nRules:\n- Only respond with code or comments.\n- Only replace \"<CURSOR>\"; do not include any previously written code.\n- Never include \"<CURSOR>\" in your response.\n- If the cursor is within a comment, complete the comment meaningfully.\n- Handle ambiguous cases by providing the most contextually appropriate completion.\n- Be consistent with your responses."
                },
                {
                  "role": "user",
-                  "content": "[CONTEXT]\nprint(\"hello\")\n\n[CURRENT_CODE]\ndef print_to_screen(a):  pri<CURSOR>\n\nprint_to_screen(\"test\")"
+                  "content": "def greet(name):\n    print(f\"Hello, {<CURSOR>}\")"
                },
                {
-                  "role": "system",
-                  "content": "nt_to_screen(a)"
+                  "role": "assistant",
+                  "content": "name"
                },
                {
                  "role": "user",
-                  "content": "[CONTEXT]\ndef mul_two_nums(a, b):\n  return a * b\n\n[CURRENT_CODE]\n# Test 5 * 25\nass<CURSOR>"
+                  "content": "function sum(a, b) {\n    return a + <CURSOR>;\n}"
                },
                {
-                  "role": "system",
-                  "content": "ert mul_two_nums(5, 25) == 125"
+                  "role": "assistant",
+                  "content": "b"
                },
                {
                  "role": "user",
-                  "content": "[CONTENT]\n{CONTENT}\n\n[CURRENT_CODE]\n{CODE}"
+                  "content": "fn multiply(a: i32, b: i32) -> i32 {\n    a * <CURSOR>\n}"
+                },
+                {
+                  "role": "assistant",
+                  "content": "b"
+                },
+                {
+                  "role": "user",
+                  "content": "# <CURSOR>\ndef add(a, b):\n    return a + b"
+                },
+                {
+                  "role": "assistant",
+                  "content": "Adds two numbers"
+                },
+                {
+                  "role": "user",
+                  "content": "# This function checks if a number is even\n<CURSOR>"
+                },
+                {
+                  "role": "assistant",
+                  "content": "def is_even(n):\n    return n % 2 == 0"
+                },
+                {
+                  "role": "user",
+                  "content": "public class HelloWorld {\n    public static void main(String[] args) {\n        System.out.println(\"Hello, <CURSOR>\");\n    }\n}"
+                },
+                {
+                  "role": "assistant",
+                  "content": "world"
+                },
+                {
+                  "role": "user",
+                  "content": "try:\n    # Trying to open a file\n    file = open(\"example.txt\", \"r\")\n    # <CURSOR>\nfinally:\n    file.close()"
+                },
+                {
+                  "role": "assistant",
+                  "content": "content = file.read()"
+                },
+                {
+                  "role": "user",
+                  "content": "#include <iostream>\nusing namespace std;\n\nint main() {\n    int a = 5, b = 10;\n    cout << \"Sum: \" << (a + <CURSOR>) << endl;\n    return 0;\n}"
+                },
+                {
+                  "role": "assistant",
+                  "content": "b"
+                },
+                {
+                  "role": "user",
+                  "content": "<!DOCTYPE html>\n<html>\n<head>\n    <title>My Page</title>\n</head>\n<body>\n    <h1>Welcome to My Page</h1>\n    <p>This is a sample page with a list of items:</p>\n    <ul>\n        <li>Item 1</li>\n        <li>Item 2</li>\n        <li><CURSOR></li>\n    </ul>\n</body>\n</html>"
+                },
+                {
+                  "role": "assistant",
+                  "content": "Item 3"
+                },
+                {
+                  "role": "user",
+                  "content": "{CODE}"
                }
-              ],
-              "max_new_tokens": 32
+              ]
            }
          },
          "description": "JSON configuration for LSP-AI generation"
@ -85,6 +145,8 @@
  "devDependencies": {
    "@types/node": "^20.11.0",
    "@types/uuid": "^9.0.8",
+    "@vscode/vsce": "^2.26.1",
+    "esbuild": "^0.21.4",
    "typescript": "^5.3.3"
  },
  "dependencies": {
--- a/editors/vscode/src/index.ts
+++ b/editors/vscode/src/index.ts
@ -19,7 +19,7 @@ export function activate(context: vscode.ExtensionContext) {
  // Options to control the language client
  const config = vscode.workspace.getConfiguration("lsp-ai");
  const clientOptions: LanguageClientOptions = {
-    documentSelector: [{ pattern: "**" }],
+    documentSelector: [{ scheme: "file" }],
    initializationOptions: config.serverConfiguration
  };

--- a/src/memory_backends/file_store.rs
+++ b/src/memory_backends/file_store.rs
@ -111,36 +111,43 @@ impl FileStore {
        let (mut rope, cursor_index) =
            self.get_rope_for_position(position, params.max_context_length)?;

+        // Prioritize doing chat
+        // If FIM is enabled, make sure the cursor is not at the end of the file as that is just completion
+        // If not chat and not FIM do completion
        Ok(match (params.messages.is_some(), params.fim) {
-            r @ (true, _) | r @ (false, Some(_)) if rope.len_chars() != cursor_index => {
+            (true, _) => {
                let max_length = tokens_to_estimated_characters(params.max_context_length);
                let start = cursor_index.saturating_sub(max_length / 2);
                let end = rope
                    .len_chars()
                    .min(cursor_index + (max_length - (cursor_index - start)));

-                if r.0 {
-                    rope.insert(cursor_index, "<CURSOR>");
-                    let rope_slice = rope
-                        .get_slice(start..end + "<CURSOR>".chars().count())
-                        .context("Error getting rope slice")?;
-                    rope_slice.to_string()
-                } else {
-                    let fim = r.1.unwrap(); // We can unwrap as we know it is some from the match
-                    rope.insert(end, &fim.end);
-                    rope.insert(cursor_index, &fim.middle);
-                    rope.insert(start, &fim.start);
-                    let rope_slice = rope
-                        .get_slice(
-                            start
-                                ..end
-                                    + fim.start.chars().count()
-                                    + fim.middle.chars().count()
-                                    + fim.end.chars().count(),
-                        )
-                        .context("Error getting rope slice")?;
-                    rope_slice.to_string()
-                }
+                rope.insert(cursor_index, "<CURSOR>");
+                let rope_slice = rope
+                    .get_slice(start..end + "<CURSOR>".chars().count())
+                    .context("Error getting rope slice")?;
+                rope_slice.to_string()
+            }
+            (false, Some(fim)) if rope.len_chars() != cursor_index => {
+                let max_length = tokens_to_estimated_characters(params.max_context_length);
+                let start = cursor_index.saturating_sub(max_length / 2);
+                let end = rope
+                    .len_chars()
+                    .min(cursor_index + (max_length - (cursor_index - start)));
+
+                rope.insert(end, &fim.end);
+                rope.insert(cursor_index, &fim.middle);
+                rope.insert(start, &fim.start);
+                let rope_slice = rope
+                    .get_slice(
+                        start
+                            ..end
+                                + fim.start.chars().count()
+                                + fim.middle.chars().count()
+                                + fim.end.chars().count(),
+                    )
+                    .context("Error getting rope slice")?;
+                rope_slice.to_string()
            }
            _ => {
                let start = cursor_index
@ -515,4 +522,80 @@ The end with a trailing new line

        Ok(())
    }
+
+    #[tokio::test]
+    async fn test_document_cursor_placement_corner_cases() -> anyhow::Result<()> {
+        let text_document = generate_filler_text_document(None, Some("test\n"));
+        let params = lsp_types::DidOpenTextDocumentParams {
+            text_document: text_document.clone(),
+        };
+        let file_store = generate_base_file_store()?;
+        file_store.opened_text_document(params).await?;
+
+        // Test chat
+        let params = json!({
+            "messages": []
+        });
+        let prompt = file_store
+            .build_prompt(
+                &TextDocumentPositionParams {
+                    text_document: TextDocumentIdentifier {
+                        uri: text_document.uri.clone(),
+                    },
+                    position: Position {
+                        line: 1,
+                        character: 0,
+                    },
+                },
+                params,
+            )
+            .await?;
+        assert_eq!(prompt.context, "");
+        let text = r#"test
+<CURSOR>"#
+            .to_string();
+        assert_eq!(text, prompt.code);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_fim_placement_corner_cases() -> anyhow::Result<()> {
+        let text_document = generate_filler_text_document(None, Some("test\n"));
+        let params = lsp_types::DidOpenTextDocumentParams {
+            text_document: text_document.clone(),
+        };
+        let file_store = generate_base_file_store()?;
+        file_store.opened_text_document(params).await?;
+
+        // Test FIM
+        let params = json!({
+            "fim": {
+                "start": "SS",
+                "middle": "MM",
+                "end": "EE"
+            }
+        });
+        let prompt = file_store
+            .build_prompt(
+                &TextDocumentPositionParams {
+                    text_document: TextDocumentIdentifier {
+                        uri: text_document.uri.clone(),
+                    },
+                    position: Position {
+                        line: 1,
+                        character: 0,
+                    },
+                },
+                params,
+            )
+            .await?;
+        assert_eq!(prompt.context, "");
+        let text = r#"test
+"#
+        .to_string();
+        assert_eq!(text, prompt.code);
+
+        Ok(())
+    }
 }
--- a/src/transformer_backends/llama_cpp/mod.rs
+++ b/src/transformer_backends/llama_cpp/mod.rs
@ -32,7 +32,7 @@ pub struct LLaMACPPRunParams {
    chat_template: Option<String>, // A Jinja template
    chat_format: Option<String>,   // The name of a template in llamacpp
    #[serde(default = "max_new_tokens_default")]
-    pub max_new_tokens: usize,
+    pub max_tokens: usize,
    // TODO: Explore other arguments
 }

@ -122,7 +122,36 @@ mod test {
    use serde_json::json;

    #[tokio::test]
-    async fn llama_cpp_do_completion() -> anyhow::Result<()> {
+    async fn llama_cpp_do_completion_chat() -> anyhow::Result<()> {
+        let configuration: config::LLaMACPP = serde_json::from_value(json!({
+            "repository": "QuantFactory/Meta-Llama-3-8B-GGUF",
+            "name": "Meta-Llama-3-8B.Q5_K_M.gguf",
+            "n_ctx": 2048,
+            "n_gpu_layers": 1000,
+        }))?;
+        let llama_cpp = LLaMACPP::new(configuration).unwrap();
+        let prompt = Prompt::default_with_cursor();
+        let run_params = json!({
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "Test"
+                },
+                {
+                    "role": "user",
+                    "content": "Test {CONTEXT} - {CODE}"
+                }
+            ],
+            "chat_format": "llama2",
+            "max_tokens": 64
+        });
+        let response = llama_cpp.do_completion(&prompt, run_params).await?;
+        assert!(!response.insert_text.is_empty());
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn llama_cpp_do_completion_fim() -> anyhow::Result<()> {
        let configuration: config::LLaMACPP = serde_json::from_value(json!({
            "repository": "stabilityai/stable-code-3b",
            "name": "stable-code-3b-Q5_K_M.gguf",
@ -145,7 +174,7 @@ mod test {
    }

    #[tokio::test]
-    async fn llama_cpp_do_generate() -> anyhow::Result<()> {
+    async fn llama_cpp_do_generate_fim() -> anyhow::Result<()> {
        let configuration: config::LLaMACPP = serde_json::from_value(json!({
            "repository": "stabilityai/stable-code-3b",
            "name": "stable-code-3b-Q5_K_M.gguf",
--- a/src/transformer_backends/llama_cpp/model.rs
+++ b/src/transformer_backends/llama_cpp/model.rs
@ -4,7 +4,7 @@ use llama_cpp_2::{
    ggml_time_us,
    llama_backend::LlamaBackend,
    llama_batch::LlamaBatch,
-    model::{params::LlamaModelParams, AddBos, LlamaChatMessage, LlamaModel},
+    model::{params::LlamaModelParams, AddBos, LlamaChatMessage, LlamaModel, Special},
    token::data_array::LlamaTokenDataArray,
 };
 use once_cell::sync::Lazy;
@ -56,11 +56,11 @@ impl Model {
            .with_context(|| format!("failed to tokenize {}", prompt))?;

        let n_cxt = ctx.n_ctx() as usize;
-        let n_kv_req = tokens_list.len() + params.max_new_tokens;
+        let n_kv_req = tokens_list.len() + params.max_tokens;

        info!(
            "n_len / max_new_tokens = {}, n_ctx = {n_cxt}, k_kv_req = {n_kv_req}",
-            params.max_new_tokens
+            params.max_tokens
        );

        // make sure the KV cache is big enough to hold all the prompt and generated tokens
@ -89,7 +89,7 @@ impl Model {
        let mut n_cur = n_start;
        let mut n_decode = 0;
        let t_main_start = ggml_time_us();
-        while (n_cur as usize) <= (n_start as usize + params.max_new_tokens) {
+        while (n_cur as usize) <= (n_start as usize + params.max_tokens) {
            // sample the next token
            {
                let candidates = ctx.candidates_ith(batch.n_tokens() - 1);
@ -103,7 +103,7 @@ impl Model {
                    break;
                }

-                output.push(self.model.token_to_str(new_token_id)?);
+                output.push(self.model.token_to_str(new_token_id, Special::Tokenize)?);
                batch.clear();
                batch.add(new_token_id, n_cur, &[0], true)?;
            }
@ -143,12 +143,12 @@ impl Model {
    #[instrument(skip(self))]
    pub fn get_eos_token(&self) -> anyhow::Result<String> {
        let token = self.model.token_eos();
-        Ok(self.model.token_to_str(token)?)
+        Ok(self.model.token_to_str(token, Special::Tokenize)?)
    }

    #[instrument(skip(self))]
    pub fn get_bos_token(&self) -> anyhow::Result<String> {
        let token = self.model.token_bos();
-        Ok(self.model.token_to_str(token)?)
+        Ok(self.model.token_to_str(token, Special::Tokenize)?)
    }
 }