diff --git a/src/llm/README.md b/src/llm/README.md new file mode 100644 index 00000000..8d75d02e --- /dev/null +++ b/src/llm/README.md @@ -0,0 +1,7 @@ +# Local LLM Integration +1. Clone and build [llama.cpp](https://github.com/ggerganov/llama.cpp) on the same machine where you will run your uqbar node + - follow their README for details on how to do this. In most cases simply running `make` works + - make sure to get your model as a .gguf file +2. Within the llama.cpp directory, run this command in llama.cpp on the same machine you will run your uqbar node: `./server --port ` + - Note: you can pass in whatever other command line arguments to the llama cpp server you want depending on your preferences/hardware/model/etc. +3. Run your Uqbar node with `--features llm` and `--llm http://localhost:`. For example `cargo +nightly run --features llm --release home --rpc wss://eth-sepolia.g.alchemy.com/v2/ --llm http://localhost:` diff --git a/src/llm.rs b/src/llm/mod.rs similarity index 99% rename from src/llm.rs rename to src/llm/mod.rs index 7417164e..ce53d802 100644 --- a/src/llm.rs +++ b/src/llm/mod.rs @@ -1,7 +1,10 @@ use crate::types::*; +use crate::llm::types::*; use anyhow::Result; use reqwest::Response as ReqwestResponse; +mod types; + pub async fn llm( our_name: String, send_to_loop: MessageSender, diff --git a/src/llm/types.rs b/src/llm/types.rs new file mode 100644 index 00000000..3fd2be01 --- /dev/null +++ b/src/llm/types.rs @@ -0,0 +1,86 @@ +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +#[derive(Debug, Serialize, Deserialize)] +pub struct LlmPrompt { + pub prompt: String, + pub n_predict: u64, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct LlmResponse { + pub content: String, + pub generation_settings: GenerationSettings, + pub model: String, + pub prompt: String, + pub slot_id: u64, + pub stop: bool, + pub stopped_eos: bool, + pub stopped_limit: bool, + pub stopped_word: bool, + pub stopping_word: String, + pub timings: Timings, + pub tokens_cached: u64, + pub tokens_evaluated: u64, + pub tokens_predicted: u64, + pub truncated: bool, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct GenerationSettings { + pub frequency_penalty: f64, + pub grammar: String, + pub ignore_eos: bool, + pub logit_bias: Vec, // This should be changed to the appropriate type + pub mirostat: u64, + pub mirostat_eta: f64, + pub mirostat_tau: f64, + pub model: String, + pub n_ctx: u64, + pub n_keep: u64, + pub n_predict: u64, + pub n_probs: u64, + pub penalize_nl: bool, + pub presence_penalty: f64, + pub repeat_last_n: u64, + pub repeat_penalty: f64, + pub seed: u64, + pub stop: Vec, // This should be changed to the appropriate type + pub stream: bool, + pub temp: f64, + pub tfs_z: f64, + pub top_k: u64, + pub top_p: f64, + pub typical_p: f64, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct Timings { + pub predicted_ms: f64, + pub predicted_n: u64, + pub predicted_per_second: f64, + pub predicted_per_token_ms: f64, + pub prompt_ms: f64, + pub prompt_n: u64, + pub prompt_per_second: f64, + pub prompt_per_token_ms: f64, +} + +#[derive(Error, Debug, Serialize, Deserialize)] +pub enum LlmError { + #[error("llm: rsvp is None but message is expecting response")] + BadRsvp, + #[error("llm: no json in request")] + NoJson, + #[error( + "llm: JSON payload could not be parsed to LlmPrompt: {error}. Got {:?}.", + json + )] + BadJson { json: String, error: String }, + #[error("llm: http method not supported: {:?}", method)] + BadMethod { method: String }, + #[error("llm: failed to execute request {:?}", error)] + RequestFailed { error: String }, + #[error("llm: failed to deserialize response {:?}", error)] + DeserializationToLlmResponseFailed { error: String }, +} diff --git a/src/types.rs b/src/types.rs index 8a6d2b86..7fe2018e 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1060,90 +1060,3 @@ pub enum EncryptorMessage { Encrypt(EncryptAction), } // encryptor End - -// llm.rs Start -#[derive(Debug, Serialize, Deserialize)] -pub struct LlmPrompt { - pub prompt: String, - pub n_predict: u64, -} - -#[derive(Clone, Serialize, Deserialize, Debug)] -pub struct LlmResponse { - pub content: String, - pub generation_settings: GenerationSettings, - pub model: String, - pub prompt: String, - pub slot_id: u64, - pub stop: bool, - pub stopped_eos: bool, - pub stopped_limit: bool, - pub stopped_word: bool, - pub stopping_word: String, - pub timings: Timings, - pub tokens_cached: u64, - pub tokens_evaluated: u64, - pub tokens_predicted: u64, - pub truncated: bool, -} - -#[derive(Clone, Serialize, Deserialize, Debug)] -pub struct GenerationSettings { - pub frequency_penalty: f64, - pub grammar: String, - pub ignore_eos: bool, - pub logit_bias: Vec, // This should be changed to the appropriate type - pub mirostat: u64, - pub mirostat_eta: f64, - pub mirostat_tau: f64, - pub model: String, - pub n_ctx: u64, - pub n_keep: u64, - pub n_predict: u64, - pub n_probs: u64, - pub penalize_nl: bool, - pub presence_penalty: f64, - pub repeat_last_n: u64, - pub repeat_penalty: f64, - pub seed: u64, - pub stop: Vec, // This should be changed to the appropriate type - pub stream: bool, - pub temp: f64, - pub tfs_z: f64, - pub top_k: u64, - pub top_p: f64, - pub typical_p: f64, -} - -#[derive(Clone, Serialize, Deserialize, Debug)] -pub struct Timings { - pub predicted_ms: f64, - pub predicted_n: u64, - pub predicted_per_second: f64, - pub predicted_per_token_ms: f64, - pub prompt_ms: f64, - pub prompt_n: u64, - pub prompt_per_second: f64, - pub prompt_per_token_ms: f64, -} - -#[derive(Error, Debug, Serialize, Deserialize)] -pub enum LlmError { - #[error("llm: rsvp is None but message is expecting response")] - BadRsvp, - #[error("llm: no json in request")] - NoJson, - #[error( - "llm: JSON payload could not be parsed to LlmPrompt: {error}. Got {:?}.", - json - )] - BadJson { json: String, error: String }, - #[error("llm: http method not supported: {:?}", method)] - BadMethod { method: String }, - #[error("llm: failed to execute request {:?}", error)] - RequestFailed { error: String }, - #[error("llm: failed to deserialize response {:?}", error)] - DeserializationToLlmResponseFailed { error: String }, -} - -// llm End