ollama: Support model context_size (num_ctx) >2048 (#16877)

This commit is contained in:
Peter Tripp 2024-08-26 11:09:47 -04:00 committed by GitHub
parent 2a03dde538
commit 7936fe40ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 28 additions and 2 deletions

View File

@ -895,7 +895,8 @@
"api_url": "https://generativelanguage.googleapis.com"
},
"ollama": {
"api_url": "http://localhost:11434"
"api_url": "http://localhost:11434",
"low_speed_timeout_in_seconds": 60
},
"openai": {
"version": "1",

View File

@ -70,11 +70,36 @@ pub struct Model {
pub keep_alive: Option<KeepAlive>,
}
// This could be dynamically retrieved via the API (1 call per model)
// curl -s http://localhost:11434/api/show -d '{"model": "llama3.1:latest"}' | jq '.model_info."llama.context_length"'
fn get_max_tokens(name: &str) -> usize {
match name {
"dolphin-llama3:8b-256k" => 262144, // 256K
_ => match name.split(':').next().unwrap() {
"mistral-nemo" => 1024000, // 1M
"deepseek-coder-v2" => 163840, // 160K
"llama3.1" | "phi3" | "command-r" | "command-r-plus" => 131072, // 128K
"codeqwen" => 65536, // 64K
"mistral" | "mistral-large" | "dolphin-mistral" | "codestral" // 32K
| "mistral-openorca" | "dolphin-mixtral" | "mixstral" | "llava"
| "qwen" | "qwen2" | "wizardlm2" | "wizard-math" => 32768,
"codellama" | "stable-code" | "deepseek-coder" | "starcoder2" // 16K
| "wizardcoder" => 16384,
"llama3" | "gemma2" | "gemma" | "codegemma" | "dolphin-llama3" // 8K
| "llava-llama3" | "starcoder" | "openchat" | "aya" => 8192,
"llama2" | "yi" | "llama2-chinese" | "vicuna" | "nous-hermes2" // 4K
| "stablelm2" => 4096,
"phi" | "orca-mini" | "tinyllama" | "granite-code" => 2048, // 2K
_ => 2048, // 2K (default)
},
}
}
impl Model {
pub fn new(name: &str) -> Self {
Self {
name: name.to_owned(),
max_tokens: 2048,
max_tokens: get_max_tokens(name),
keep_alive: Some(KeepAlive::indefinite()),
}
}