1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-11-23 20:12:08 +03:00

feat(server): final LLM setup

This commit is contained in:
louistiti 2024-04-17 00:10:41 +08:00
parent 2de95c4ef9
commit cf02f0f91d
No known key found for this signature in database
GPG Key ID: 92CD6A2E497E1669
8 changed files with 98 additions and 91 deletions

View File

@ -52,6 +52,64 @@
"route": "/api/action/games/rochambeau/rematch", "route": "/api/action/games/rochambeau/rematch",
"params": [] "params": []
}, },
{
"method": "POST",
"route": "/api/action/news/github_trends/run",
"params": ["number", "daterange"],
"entitiesType": "builtIn"
},
{
"method": "GET",
"route": "/api/action/news/product_hunt_trends/run",
"params": []
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/create_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "GET",
"route": "/api/action/productivity/todo_list/view_lists",
"params": []
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/view_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/rename_list",
"params": ["old_list", "new_list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/delete_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/add_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/complete_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/uncheck_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{ {
"method": "GET", "method": "GET",
"route": "/api/action/leon/age/run", "route": "/api/action/leon/age/run",
@ -127,64 +185,6 @@
"route": "/api/action/leon/welcome/run", "route": "/api/action/leon/welcome/run",
"params": [] "params": []
}, },
{
"method": "POST",
"route": "/api/action/news/github_trends/run",
"params": ["number", "daterange"],
"entitiesType": "builtIn"
},
{
"method": "GET",
"route": "/api/action/news/product_hunt_trends/run",
"params": []
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/create_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "GET",
"route": "/api/action/productivity/todo_list/view_lists",
"params": []
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/view_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/rename_list",
"params": ["old_list", "new_list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/delete_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/add_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/complete_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/uncheck_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{ {
"method": "GET", "method": "GET",
"route": "/api/action/social_communication/mbti/setup", "route": "/api/action/social_communication/mbti/setup",

View File

@ -95,7 +95,7 @@
"getos": "3.2.1", "getos": "3.2.1",
"googleapis": "67.1.1", "googleapis": "67.1.1",
"ibm-watson": "6.1.1", "ibm-watson": "6.1.1",
"node-llama-cpp": "3.0.0-beta.15", "node-llama-cpp": "3.0.0-beta.16",
"node-wav": "0.0.2", "node-wav": "0.0.2",
"os-name": "4.0.1", "os-name": "4.0.1",
"pretty-bytes": "5.6.0", "pretty-bytes": "5.6.0",

View File

@ -161,25 +161,29 @@ export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
/** /**
* LLMs * LLMs
*/ */
// export const LLM_VERSION = 'v0.2.Q4_K_S' // export const LLM_VERSION = 'v0.2.Q4_K_M'
export const LLM_VERSION = '1.1-7b-it-Q4_K_M' export const LLM_VERSION = '1.1-7b-it-Q4_K_M'
// export const LLM_NAME = 'Mistral 7B Instruct' // export const LLM_NAME = 'Mistral 7B Instruct'
export const LLM_NAME = 'Gemma 1.1 7B (IT)' export const LLM_NAME = 'Gemma 1.1 7B (IT)'
export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
// export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf` // export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf`
export const LLM_FILE_NAME = `gemma-${LLM_VERSION}.gguf` export const LLM_FILE_NAME = `gemma-${LLM_VERSION}.gguf`
export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm') export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME) export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
export const LLM_MINIMUM_TOTAL_RAM = 0 export const LLM_MINIMUM_TOTAL_RAM = 0
export const LLM_MINIMUM_FREE_RAM = 0 export const LLM_MINIMUM_FREE_RAM = 0
export const LLM_HF_DOWNLOAD_URL = export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true' 'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf?download=true'*/
export const LLM_MIRROR_DOWNLOAD_URL = export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true' 'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf?download=true'*/
/** /**
* @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases * @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases
*/ */
export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2679' export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2681'
/** /**
* Misc * Misc

View File

@ -6,7 +6,7 @@ import {
import { LogHelper } from '@/helpers/log-helper' import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER } from '@/core' import { LLM_MANAGER } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types' import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager' import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
interface CustomNERLLMDutyParams<T> extends LLMDutyParams { interface CustomNERLLMDutyParams<T> extends LLMDutyParams {
data: { data: {
@ -38,11 +38,11 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
LogHelper.info('Executing...') LogHelper.info('Executing...')
try { try {
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import( const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
'node-llama-cpp' 'return import("node-llama-cpp")'
) )()
const context = await LLM_MANAGER.model.createContext({ const context = await LLM_MANAGER.model.createContext({
contextSize: LLM_CONTEXT_SIZE,
threads: LLM_THREADS threads: LLM_THREADS
}) })
const completion = new LlamaCompletion({ const completion = new LlamaCompletion({
@ -50,14 +50,13 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
}) })
const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, { const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
type: 'object', type: 'object',
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-expect-error
properties: { properties: {
...this.data.schema ...this.data.schema
} }
}) })
const prompt = `Utterance: ${this.input}` const prompt = `${this.systemPrompt} Utterance: ${this.input}`
const rawResult = await completion.generateCompletion(prompt, { const rawResult = await completion.generateCompletion(prompt, {
contextShiftSize: context.contextSize / 2,
grammar, grammar,
maxTokens: context.contextSize maxTokens: context.contextSize
}) })

View File

@ -6,7 +6,7 @@ import {
import { LogHelper } from '@/helpers/log-helper' import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER } from '@/core' import { LLM_MANAGER } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types' import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager' import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
interface SummarizationLLMDutyParams extends LLMDutyParams {} interface SummarizationLLMDutyParams extends LLMDutyParams {}
@ -30,11 +30,11 @@ export class SummarizationLLMDuty extends LLMDuty {
LogHelper.info('Executing...') LogHelper.info('Executing...')
try { try {
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import( const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
'node-llama-cpp' 'return import("node-llama-cpp")'
) )()
const context = await LLM_MANAGER.model.createContext({ const context = await LLM_MANAGER.model.createContext({
contextSize: LLM_CONTEXT_SIZE,
threads: LLM_THREADS threads: LLM_THREADS
}) })
const completion = new LlamaCompletion({ const completion = new LlamaCompletion({
@ -48,7 +48,7 @@ export class SummarizationLLMDuty extends LLMDuty {
} }
} }
}) })
const prompt = `Text: ${this.input}` const prompt = `${this.systemPrompt} Text: ${this.input}`
const rawResult = await completion.generateCompletion(prompt, { const rawResult = await completion.generateCompletion(prompt, {
grammar, grammar,
maxTokens: context.contextSize maxTokens: context.contextSize

View File

@ -6,7 +6,7 @@ import {
import { LogHelper } from '@/helpers/log-helper' import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER } from '@/core' import { LLM_MANAGER } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types' import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager' import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
interface TranslationLLMDutyParams extends LLMDutyParams { interface TranslationLLMDutyParams extends LLMDutyParams {
data: { data: {
@ -47,11 +47,11 @@ export class TranslationLLMDuty extends LLMDuty {
LogHelper.info('Executing...') LogHelper.info('Executing...')
try { try {
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import( const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
'node-llama-cpp' 'return import("node-llama-cpp")'
) )()
const context = await LLM_MANAGER.model.createContext({ const context = await LLM_MANAGER.model.createContext({
contextSize: LLM_CONTEXT_SIZE,
threads: LLM_THREADS threads: LLM_THREADS
}) })
const completion = new LlamaCompletion({ const completion = new LlamaCompletion({
@ -65,8 +65,9 @@ export class TranslationLLMDuty extends LLMDuty {
} }
} }
}) })
const prompt = `Text: ${this.input}` const prompt = `${this.systemPrompt} Text: ${this.input}`
const rawResult = await completion.generateCompletion(prompt, { const rawResult = await completion.generateCompletion(prompt, {
contextShiftSize: context.contextSize / 2,
grammar, grammar,
maxTokens: context.contextSize maxTokens: context.contextSize
}) })

View File

@ -13,7 +13,6 @@ import { SystemHelper } from '@/helpers/system-helper'
type LLMManagerLlama = Llama | null type LLMManagerLlama = Llama | null
type LLMManagerModel = LlamaModel | null type LLMManagerModel = LlamaModel | null
export const LLM_CONTEXT_SIZE = 8_096
// Set to 0 to use the maximum threads supported by the current machine hardware // Set to 0 to use the maximum threads supported by the current machine hardware
export const LLM_THREADS = 4 export const LLM_THREADS = 4
@ -77,11 +76,15 @@ export default class LLMManager {
} }
try { try {
const { LlamaLogLevel, getLlama } = await import('node-llama-cpp') const { LlamaLogLevel, getLlama } = await Function(
'return import("node-llama-cpp")'
)()
this._llama = await getLlama({ this._llama = await getLlama({
logLevel: LlamaLogLevel.disabled logLevel: LlamaLogLevel.debug
}) })
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-expect-error
this._model = await this._llama.loadModel({ this._model = await this._llama.loadModel({
modelPath: LLM_PATH modelPath: LLM_PATH
}) })

View File

@ -76,11 +76,11 @@ import { LogHelper } from '@/helpers/log-helper'
await summarizationDuty.execute()*/ await summarizationDuty.execute()*/
/*const translationDuty = new TranslationLLMDuty({ /*const translationDuty = new TranslationLLMDuty({
input: 'Bonjour, la température est très agréable à Shenzhen', input: 'the weather is good in shenzhen',
data: { data: {
source: 'French', // source: 'French',
target: 'English' target: 'French',
// autoDetectLanguage: true autoDetectLanguage: true
} }
}) })
await translationDuty.execute()*/ await translationDuty.execute()*/