mirror of
https://github.com/leon-ai/leon.git
synced 2024-11-27 08:06:03 +03:00
feat(server): final LLM setup
This commit is contained in:
parent
2de95c4ef9
commit
cf02f0f91d
@ -52,6 +52,64 @@
|
||||
"route": "/api/action/games/rochambeau/rematch",
|
||||
"params": []
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/news/github_trends/run",
|
||||
"params": ["number", "daterange"],
|
||||
"entitiesType": "builtIn"
|
||||
},
|
||||
{
|
||||
"method": "GET",
|
||||
"route": "/api/action/news/product_hunt_trends/run",
|
||||
"params": []
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/create_list",
|
||||
"params": ["list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "GET",
|
||||
"route": "/api/action/productivity/todo_list/view_lists",
|
||||
"params": []
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/view_list",
|
||||
"params": ["list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/rename_list",
|
||||
"params": ["old_list", "new_list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/delete_list",
|
||||
"params": ["list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/add_todos",
|
||||
"params": ["todos", "list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/complete_todos",
|
||||
"params": ["todos", "list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/uncheck_todos",
|
||||
"params": ["todos", "list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "GET",
|
||||
"route": "/api/action/leon/age/run",
|
||||
@ -127,64 +185,6 @@
|
||||
"route": "/api/action/leon/welcome/run",
|
||||
"params": []
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/news/github_trends/run",
|
||||
"params": ["number", "daterange"],
|
||||
"entitiesType": "builtIn"
|
||||
},
|
||||
{
|
||||
"method": "GET",
|
||||
"route": "/api/action/news/product_hunt_trends/run",
|
||||
"params": []
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/create_list",
|
||||
"params": ["list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "GET",
|
||||
"route": "/api/action/productivity/todo_list/view_lists",
|
||||
"params": []
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/view_list",
|
||||
"params": ["list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/rename_list",
|
||||
"params": ["old_list", "new_list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/delete_list",
|
||||
"params": ["list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/add_todos",
|
||||
"params": ["todos", "list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/complete_todos",
|
||||
"params": ["todos", "list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "POST",
|
||||
"route": "/api/action/productivity/todo_list/uncheck_todos",
|
||||
"params": ["todos", "list"],
|
||||
"entitiesType": "trim"
|
||||
},
|
||||
{
|
||||
"method": "GET",
|
||||
"route": "/api/action/social_communication/mbti/setup",
|
||||
|
@ -95,7 +95,7 @@
|
||||
"getos": "3.2.1",
|
||||
"googleapis": "67.1.1",
|
||||
"ibm-watson": "6.1.1",
|
||||
"node-llama-cpp": "3.0.0-beta.15",
|
||||
"node-llama-cpp": "3.0.0-beta.16",
|
||||
"node-wav": "0.0.2",
|
||||
"os-name": "4.0.1",
|
||||
"pretty-bytes": "5.6.0",
|
||||
|
@ -161,25 +161,29 @@ export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
|
||||
/**
|
||||
* LLMs
|
||||
*/
|
||||
// export const LLM_VERSION = 'v0.2.Q4_K_S'
|
||||
// export const LLM_VERSION = 'v0.2.Q4_K_M'
|
||||
export const LLM_VERSION = '1.1-7b-it-Q4_K_M'
|
||||
// export const LLM_NAME = 'Mistral 7B Instruct'
|
||||
export const LLM_NAME = 'Gemma 1.1 7B (IT)'
|
||||
export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
|
||||
// export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf`
|
||||
export const LLM_FILE_NAME = `gemma-${LLM_VERSION}.gguf`
|
||||
export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
|
||||
export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
|
||||
export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
|
||||
export const LLM_MINIMUM_TOTAL_RAM = 0
|
||||
export const LLM_MINIMUM_FREE_RAM = 0
|
||||
export const LLM_HF_DOWNLOAD_URL =
|
||||
'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
|
||||
'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
|
||||
/*export const LLM_HF_DOWNLOAD_URL =
|
||||
'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf?download=true'*/
|
||||
export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
|
||||
'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
|
||||
/*export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf?download=true'*/
|
||||
/**
|
||||
* @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases
|
||||
*/
|
||||
export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2679'
|
||||
export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2681'
|
||||
|
||||
/**
|
||||
* Misc
|
||||
|
@ -6,7 +6,7 @@ import {
|
||||
import { LogHelper } from '@/helpers/log-helper'
|
||||
import { LLM_MANAGER } from '@/core'
|
||||
import { LLMDuties } from '@/core/llm-manager/types'
|
||||
import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager'
|
||||
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
|
||||
|
||||
interface CustomNERLLMDutyParams<T> extends LLMDutyParams {
|
||||
data: {
|
||||
@ -38,11 +38,11 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
|
||||
LogHelper.info('Executing...')
|
||||
|
||||
try {
|
||||
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import(
|
||||
'node-llama-cpp'
|
||||
)
|
||||
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
|
||||
'return import("node-llama-cpp")'
|
||||
)()
|
||||
|
||||
const context = await LLM_MANAGER.model.createContext({
|
||||
contextSize: LLM_CONTEXT_SIZE,
|
||||
threads: LLM_THREADS
|
||||
})
|
||||
const completion = new LlamaCompletion({
|
||||
@ -50,14 +50,13 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
|
||||
})
|
||||
const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
|
||||
type: 'object',
|
||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||
// @ts-expect-error
|
||||
properties: {
|
||||
...this.data.schema
|
||||
}
|
||||
})
|
||||
const prompt = `Utterance: ${this.input}`
|
||||
const prompt = `${this.systemPrompt} Utterance: ${this.input}`
|
||||
const rawResult = await completion.generateCompletion(prompt, {
|
||||
contextShiftSize: context.contextSize / 2,
|
||||
grammar,
|
||||
maxTokens: context.contextSize
|
||||
})
|
||||
|
@ -6,7 +6,7 @@ import {
|
||||
import { LogHelper } from '@/helpers/log-helper'
|
||||
import { LLM_MANAGER } from '@/core'
|
||||
import { LLMDuties } from '@/core/llm-manager/types'
|
||||
import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager'
|
||||
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
|
||||
|
||||
interface SummarizationLLMDutyParams extends LLMDutyParams {}
|
||||
|
||||
@ -30,11 +30,11 @@ export class SummarizationLLMDuty extends LLMDuty {
|
||||
LogHelper.info('Executing...')
|
||||
|
||||
try {
|
||||
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import(
|
||||
'node-llama-cpp'
|
||||
)
|
||||
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
|
||||
'return import("node-llama-cpp")'
|
||||
)()
|
||||
|
||||
const context = await LLM_MANAGER.model.createContext({
|
||||
contextSize: LLM_CONTEXT_SIZE,
|
||||
threads: LLM_THREADS
|
||||
})
|
||||
const completion = new LlamaCompletion({
|
||||
@ -48,7 +48,7 @@ export class SummarizationLLMDuty extends LLMDuty {
|
||||
}
|
||||
}
|
||||
})
|
||||
const prompt = `Text: ${this.input}`
|
||||
const prompt = `${this.systemPrompt} Text: ${this.input}`
|
||||
const rawResult = await completion.generateCompletion(prompt, {
|
||||
grammar,
|
||||
maxTokens: context.contextSize
|
||||
|
@ -6,7 +6,7 @@ import {
|
||||
import { LogHelper } from '@/helpers/log-helper'
|
||||
import { LLM_MANAGER } from '@/core'
|
||||
import { LLMDuties } from '@/core/llm-manager/types'
|
||||
import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager'
|
||||
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
|
||||
|
||||
interface TranslationLLMDutyParams extends LLMDutyParams {
|
||||
data: {
|
||||
@ -47,11 +47,11 @@ export class TranslationLLMDuty extends LLMDuty {
|
||||
LogHelper.info('Executing...')
|
||||
|
||||
try {
|
||||
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import(
|
||||
'node-llama-cpp'
|
||||
)
|
||||
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
|
||||
'return import("node-llama-cpp")'
|
||||
)()
|
||||
|
||||
const context = await LLM_MANAGER.model.createContext({
|
||||
contextSize: LLM_CONTEXT_SIZE,
|
||||
threads: LLM_THREADS
|
||||
})
|
||||
const completion = new LlamaCompletion({
|
||||
@ -65,8 +65,9 @@ export class TranslationLLMDuty extends LLMDuty {
|
||||
}
|
||||
}
|
||||
})
|
||||
const prompt = `Text: ${this.input}`
|
||||
const prompt = `${this.systemPrompt} Text: ${this.input}`
|
||||
const rawResult = await completion.generateCompletion(prompt, {
|
||||
contextShiftSize: context.contextSize / 2,
|
||||
grammar,
|
||||
maxTokens: context.contextSize
|
||||
})
|
||||
|
@ -13,7 +13,6 @@ import { SystemHelper } from '@/helpers/system-helper'
|
||||
type LLMManagerLlama = Llama | null
|
||||
type LLMManagerModel = LlamaModel | null
|
||||
|
||||
export const LLM_CONTEXT_SIZE = 8_096
|
||||
// Set to 0 to use the maximum threads supported by the current machine hardware
|
||||
export const LLM_THREADS = 4
|
||||
|
||||
@ -77,11 +76,15 @@ export default class LLMManager {
|
||||
}
|
||||
|
||||
try {
|
||||
const { LlamaLogLevel, getLlama } = await import('node-llama-cpp')
|
||||
const { LlamaLogLevel, getLlama } = await Function(
|
||||
'return import("node-llama-cpp")'
|
||||
)()
|
||||
|
||||
this._llama = await getLlama({
|
||||
logLevel: LlamaLogLevel.disabled
|
||||
logLevel: LlamaLogLevel.debug
|
||||
})
|
||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||
// @ts-expect-error
|
||||
this._model = await this._llama.loadModel({
|
||||
modelPath: LLM_PATH
|
||||
})
|
||||
|
@ -76,11 +76,11 @@ import { LogHelper } from '@/helpers/log-helper'
|
||||
await summarizationDuty.execute()*/
|
||||
|
||||
/*const translationDuty = new TranslationLLMDuty({
|
||||
input: 'Bonjour, la température est très agréable à Shenzhen',
|
||||
input: 'the weather is good in shenzhen',
|
||||
data: {
|
||||
source: 'French',
|
||||
target: 'English'
|
||||
// autoDetectLanguage: true
|
||||
// source: 'French',
|
||||
target: 'French',
|
||||
autoDetectLanguage: true
|
||||
}
|
||||
})
|
||||
await translationDuty.execute()*/
|
||||
|
Loading…
Reference in New Issue
Block a user