Rate limit vscode inlineCompletions

This commit is contained in:
Silas Marvin 2024-06-01 08:52:01 -07:00
parent 4ad13253b1
commit c9479b21cf
4 changed files with 41 additions and 10 deletions

View File

@ -40,6 +40,13 @@
"type": "object",
"default": {},
"description": "JSON configuration for LSP-AI generation"
},
"lsp-ai.inlineCompletionConfiguration": {
"type": "object",
"default": {
"maxCompletionsPerSecond": 1
},
"description": "JSON configuration for LSP-AI generation"
}
}
}

View File

@ -139,6 +139,9 @@ export function activate(context: vscode.ExtensionContext) {
generationConfiguration = defaultGenerationConfiguration;
}
// Set the inlineCompletionConfiguration
const inlineCompletionConfiguration = vscode.workspace.getConfiguration("lsp-ai").inlineCompletionConfiguration;
const clientOptions: LanguageClientOptions = {
documentSelector: [{ scheme: "file" }],
initializationOptions: serverConfiguration
@ -177,6 +180,8 @@ export function activate(context: vscode.ExtensionContext) {
context.subscriptions.push(vscode.commands.registerTextEditorCommand(generateCommand, generateCommandHandler));
// Register as an inline completion provider
let lastInlineCompletion = Date.now();
let inlineCompletionRequestCounter = 0;
vscode.languages.registerInlineCompletionItemProvider({ pattern: '**' },
{
provideInlineCompletionItems: async (document: vscode.TextDocument, position: vscode.Position) => {
@ -188,8 +193,24 @@ export function activate(context: vscode.ExtensionContext) {
model: generationConfiguration.model,
parameters: generationConfiguration.parameters
};
const result = await client.sendRequest("textDocument/generation", params);
return [new vscode.InlineCompletionItem(result["generatedText"])];
inlineCompletionRequestCounter += 1;
let localInlineCompletionRequestCounter = inlineCompletionRequestCounter;
if ((Date.now() - lastInlineCompletion) / 1000 < 1 / inlineCompletionConfiguration["maxCompletionsPerSecond"]) {
await new Promise(r => setTimeout(r, ((1 / inlineCompletionConfiguration["maxCompletionsPerSecond"]) - ((Date.now() - lastInlineCompletion) / 1000 )) * 1000));
if (inlineCompletionRequestCounter == localInlineCompletionRequestCounter) {
lastInlineCompletion = Date.now();
const result = await client.sendRequest("textDocument/generation", params);
return [new vscode.InlineCompletionItem(result["generatedText"])];
} else {
return [];
}
} else {
lastInlineCompletion = Date.now();
const result = await client.sendRequest("textDocument/generation", params);
return [new vscode.InlineCompletionItem(result["generatedText"])];
}
}
}
);

View File

@ -5,6 +5,10 @@ use std::collections::HashMap;
pub type Kwargs = HashMap<String, Value>;
const fn max_requests_per_second_default() -> f32 {
1.
}
#[derive(Debug, Clone, Deserialize)]
pub enum ValidMemoryBackend {
#[serde(rename = "file_store")]
@ -103,7 +107,7 @@ pub struct MistralFIM {
pub fim_endpoint: Option<String>,
// The model name
pub model: String,
#[serde(default = "api_max_requests_per_second_default")]
#[serde(default = "max_requests_per_second_default")]
pub max_requests_per_second: f32,
}
@ -117,10 +121,8 @@ pub struct LLaMACPP {
pub n_gpu_layers: u32,
#[serde(default = "n_ctx_default")]
pub n_ctx: u32,
}
const fn api_max_requests_per_second_default() -> f32 {
0.5
#[serde(default = "max_requests_per_second_default")]
pub max_requests_per_second: f32,
}
#[derive(Clone, Debug, Deserialize)]
@ -134,7 +136,7 @@ pub struct OpenAI {
// The chat endpoint
pub chat_endpoint: Option<String>,
// The maximum requests per second
#[serde(default = "api_max_requests_per_second_default")]
#[serde(default = "max_requests_per_second_default")]
pub max_requests_per_second: f32,
// The model name
pub model: String,
@ -151,7 +153,7 @@ pub struct Anthropic {
// The chat endpoint
pub chat_endpoint: Option<String>,
// The maximum requests per second
#[serde(default = "api_max_requests_per_second_default")]
#[serde(default = "max_requests_per_second_default")]
pub max_requests_per_second: f32,
// The model name
pub model: String,
@ -233,7 +235,7 @@ impl Config {
)
})? {
#[cfg(feature = "llama_cpp")]
ValidModel::LLaMACPP(_) => Ok(1.),
ValidModel::LLaMACPP(llama_cpp) => Ok(llama_cpp.max_requests_per_second),
ValidModel::OpenAI(open_ai) => Ok(open_ai.max_requests_per_second),
ValidModel::Anthropic(anthropic) => Ok(anthropic.max_requests_per_second),
ValidModel::MistralFIM(mistral_fim) => Ok(mistral_fim.max_requests_per_second),

View File

@ -124,6 +124,7 @@ fn do_run(
let mut last_completion_request = None;
let run_dispatch_request = |request| {
eprintln!("DISPATCHING REQUEST");
let task_connection = connection.clone();
let task_transformer_backends = transformer_backends.clone();
let task_memory_backend_tx = memory_backend_tx.clone();