1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-11-23 20:12:08 +03:00

feat(server): final LLM setup

This commit is contained in:
louistiti 2024-04-17 00:10:41 +08:00
parent 2de95c4ef9
commit cf02f0f91d
No known key found for this signature in database
GPG Key ID: 92CD6A2E497E1669
8 changed files with 98 additions and 91 deletions

View File

@ -52,6 +52,64 @@
"route": "/api/action/games/rochambeau/rematch",
"params": []
},
{
"method": "POST",
"route": "/api/action/news/github_trends/run",
"params": ["number", "daterange"],
"entitiesType": "builtIn"
},
{
"method": "GET",
"route": "/api/action/news/product_hunt_trends/run",
"params": []
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/create_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "GET",
"route": "/api/action/productivity/todo_list/view_lists",
"params": []
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/view_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/rename_list",
"params": ["old_list", "new_list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/delete_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/add_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/complete_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/uncheck_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{
"method": "GET",
"route": "/api/action/leon/age/run",
@ -127,64 +185,6 @@
"route": "/api/action/leon/welcome/run",
"params": []
},
{
"method": "POST",
"route": "/api/action/news/github_trends/run",
"params": ["number", "daterange"],
"entitiesType": "builtIn"
},
{
"method": "GET",
"route": "/api/action/news/product_hunt_trends/run",
"params": []
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/create_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "GET",
"route": "/api/action/productivity/todo_list/view_lists",
"params": []
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/view_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/rename_list",
"params": ["old_list", "new_list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/delete_list",
"params": ["list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/add_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/complete_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{
"method": "POST",
"route": "/api/action/productivity/todo_list/uncheck_todos",
"params": ["todos", "list"],
"entitiesType": "trim"
},
{
"method": "GET",
"route": "/api/action/social_communication/mbti/setup",

View File

@ -95,7 +95,7 @@
"getos": "3.2.1",
"googleapis": "67.1.1",
"ibm-watson": "6.1.1",
"node-llama-cpp": "3.0.0-beta.15",
"node-llama-cpp": "3.0.0-beta.16",
"node-wav": "0.0.2",
"os-name": "4.0.1",
"pretty-bytes": "5.6.0",

View File

@ -161,25 +161,29 @@ export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
/**
* LLMs
*/
// export const LLM_VERSION = 'v0.2.Q4_K_S'
// export const LLM_VERSION = 'v0.2.Q4_K_M'
export const LLM_VERSION = '1.1-7b-it-Q4_K_M'
// export const LLM_NAME = 'Mistral 7B Instruct'
export const LLM_NAME = 'Gemma 1.1 7B (IT)'
export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
// export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf`
export const LLM_FILE_NAME = `gemma-${LLM_VERSION}.gguf`
export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
export const LLM_MINIMUM_TOTAL_RAM = 0
export const LLM_MINIMUM_FREE_RAM = 0
export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf?download=true'*/
export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf?download=true'*/
/**
* @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases
*/
export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2679'
export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2681'
/**
* Misc

View File

@ -6,7 +6,7 @@ import {
import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager'
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
interface CustomNERLLMDutyParams<T> extends LLMDutyParams {
data: {
@ -38,11 +38,11 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
LogHelper.info('Executing...')
try {
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import(
'node-llama-cpp'
)
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
'return import("node-llama-cpp")'
)()
const context = await LLM_MANAGER.model.createContext({
contextSize: LLM_CONTEXT_SIZE,
threads: LLM_THREADS
})
const completion = new LlamaCompletion({
@ -50,14 +50,13 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
})
const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
type: 'object',
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-expect-error
properties: {
...this.data.schema
}
})
const prompt = `Utterance: ${this.input}`
const prompt = `${this.systemPrompt} Utterance: ${this.input}`
const rawResult = await completion.generateCompletion(prompt, {
contextShiftSize: context.contextSize / 2,
grammar,
maxTokens: context.contextSize
})

View File

@ -6,7 +6,7 @@ import {
import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager'
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
interface SummarizationLLMDutyParams extends LLMDutyParams {}
@ -30,11 +30,11 @@ export class SummarizationLLMDuty extends LLMDuty {
LogHelper.info('Executing...')
try {
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import(
'node-llama-cpp'
)
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
'return import("node-llama-cpp")'
)()
const context = await LLM_MANAGER.model.createContext({
contextSize: LLM_CONTEXT_SIZE,
threads: LLM_THREADS
})
const completion = new LlamaCompletion({
@ -48,7 +48,7 @@ export class SummarizationLLMDuty extends LLMDuty {
}
}
})
const prompt = `Text: ${this.input}`
const prompt = `${this.systemPrompt} Text: ${this.input}`
const rawResult = await completion.generateCompletion(prompt, {
grammar,
maxTokens: context.contextSize

View File

@ -6,7 +6,7 @@ import {
import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager'
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
interface TranslationLLMDutyParams extends LLMDutyParams {
data: {
@ -47,11 +47,11 @@ export class TranslationLLMDuty extends LLMDuty {
LogHelper.info('Executing...')
try {
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import(
'node-llama-cpp'
)
const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
'return import("node-llama-cpp")'
)()
const context = await LLM_MANAGER.model.createContext({
contextSize: LLM_CONTEXT_SIZE,
threads: LLM_THREADS
})
const completion = new LlamaCompletion({
@ -65,8 +65,9 @@ export class TranslationLLMDuty extends LLMDuty {
}
}
})
const prompt = `Text: ${this.input}`
const prompt = `${this.systemPrompt} Text: ${this.input}`
const rawResult = await completion.generateCompletion(prompt, {
contextShiftSize: context.contextSize / 2,
grammar,
maxTokens: context.contextSize
})

View File

@ -13,7 +13,6 @@ import { SystemHelper } from '@/helpers/system-helper'
type LLMManagerLlama = Llama | null
type LLMManagerModel = LlamaModel | null
export const LLM_CONTEXT_SIZE = 8_096
// Set to 0 to use the maximum threads supported by the current machine hardware
export const LLM_THREADS = 4
@ -77,11 +76,15 @@ export default class LLMManager {
}
try {
const { LlamaLogLevel, getLlama } = await import('node-llama-cpp')
const { LlamaLogLevel, getLlama } = await Function(
'return import("node-llama-cpp")'
)()
this._llama = await getLlama({
logLevel: LlamaLogLevel.disabled
logLevel: LlamaLogLevel.debug
})
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-expect-error
this._model = await this._llama.loadModel({
modelPath: LLM_PATH
})

View File

@ -76,11 +76,11 @@ import { LogHelper } from '@/helpers/log-helper'
await summarizationDuty.execute()*/
/*const translationDuty = new TranslationLLMDuty({
input: 'Bonjour, la température est très agréable à Shenzhen',
input: 'the weather is good in shenzhen',
data: {
source: 'French',
target: 'English'
// autoDetectLanguage: true
// source: 'French',
target: 'French',
autoDetectLanguage: true
}
})
await translationDuty.execute()*/