diff --git a/core/skills-endpoints.json b/core/skills-endpoints.json index 7d06a760..e3643482 100644 --- a/core/skills-endpoints.json +++ b/core/skills-endpoints.json @@ -52,6 +52,64 @@ "route": "/api/action/games/rochambeau/rematch", "params": [] }, + { + "method": "POST", + "route": "/api/action/news/github_trends/run", + "params": ["number", "daterange"], + "entitiesType": "builtIn" + }, + { + "method": "GET", + "route": "/api/action/news/product_hunt_trends/run", + "params": [] + }, + { + "method": "POST", + "route": "/api/action/productivity/todo_list/create_list", + "params": ["list"], + "entitiesType": "trim" + }, + { + "method": "GET", + "route": "/api/action/productivity/todo_list/view_lists", + "params": [] + }, + { + "method": "POST", + "route": "/api/action/productivity/todo_list/view_list", + "params": ["list"], + "entitiesType": "trim" + }, + { + "method": "POST", + "route": "/api/action/productivity/todo_list/rename_list", + "params": ["old_list", "new_list"], + "entitiesType": "trim" + }, + { + "method": "POST", + "route": "/api/action/productivity/todo_list/delete_list", + "params": ["list"], + "entitiesType": "trim" + }, + { + "method": "POST", + "route": "/api/action/productivity/todo_list/add_todos", + "params": ["todos", "list"], + "entitiesType": "trim" + }, + { + "method": "POST", + "route": "/api/action/productivity/todo_list/complete_todos", + "params": ["todos", "list"], + "entitiesType": "trim" + }, + { + "method": "POST", + "route": "/api/action/productivity/todo_list/uncheck_todos", + "params": ["todos", "list"], + "entitiesType": "trim" + }, { "method": "GET", "route": "/api/action/leon/age/run", @@ -127,64 +185,6 @@ "route": "/api/action/leon/welcome/run", "params": [] }, - { - "method": "POST", - "route": "/api/action/news/github_trends/run", - "params": ["number", "daterange"], - "entitiesType": "builtIn" - }, - { - "method": "GET", - "route": "/api/action/news/product_hunt_trends/run", - "params": [] - }, - { - "method": "POST", - "route": "/api/action/productivity/todo_list/create_list", - "params": ["list"], - "entitiesType": "trim" - }, - { - "method": "GET", - "route": "/api/action/productivity/todo_list/view_lists", - "params": [] - }, - { - "method": "POST", - "route": "/api/action/productivity/todo_list/view_list", - "params": ["list"], - "entitiesType": "trim" - }, - { - "method": "POST", - "route": "/api/action/productivity/todo_list/rename_list", - "params": ["old_list", "new_list"], - "entitiesType": "trim" - }, - { - "method": "POST", - "route": "/api/action/productivity/todo_list/delete_list", - "params": ["list"], - "entitiesType": "trim" - }, - { - "method": "POST", - "route": "/api/action/productivity/todo_list/add_todos", - "params": ["todos", "list"], - "entitiesType": "trim" - }, - { - "method": "POST", - "route": "/api/action/productivity/todo_list/complete_todos", - "params": ["todos", "list"], - "entitiesType": "trim" - }, - { - "method": "POST", - "route": "/api/action/productivity/todo_list/uncheck_todos", - "params": ["todos", "list"], - "entitiesType": "trim" - }, { "method": "GET", "route": "/api/action/social_communication/mbti/setup", diff --git a/package.json b/package.json index ddec1a25..0ca8d30b 100644 --- a/package.json +++ b/package.json @@ -95,7 +95,7 @@ "getos": "3.2.1", "googleapis": "67.1.1", "ibm-watson": "6.1.1", - "node-llama-cpp": "3.0.0-beta.15", + "node-llama-cpp": "3.0.0-beta.16", "node-wav": "0.0.2", "os-name": "4.0.1", "pretty-bytes": "5.6.0", diff --git a/server/src/constants.ts b/server/src/constants.ts index 18917b0b..091f93cf 100644 --- a/server/src/constants.ts +++ b/server/src/constants.ts @@ -161,25 +161,29 @@ export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json') /** * LLMs */ -// export const LLM_VERSION = 'v0.2.Q4_K_S' +// export const LLM_VERSION = 'v0.2.Q4_K_M' export const LLM_VERSION = '1.1-7b-it-Q4_K_M' // export const LLM_NAME = 'Mistral 7B Instruct' export const LLM_NAME = 'Gemma 1.1 7B (IT)' -export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})` // export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf` export const LLM_FILE_NAME = `gemma-${LLM_VERSION}.gguf` +export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})` export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm') export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME) export const LLM_MINIMUM_TOTAL_RAM = 0 export const LLM_MINIMUM_FREE_RAM = 0 export const LLM_HF_DOWNLOAD_URL = - 'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true' + 'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true' +/*export const LLM_HF_DOWNLOAD_URL = + 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf?download=true'*/ export const LLM_MIRROR_DOWNLOAD_URL = - 'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true' + 'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true' +/*export const LLM_MIRROR_DOWNLOAD_URL = + 'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf?download=true'*/ /** * @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases */ -export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2679' +export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2681' /** * Misc diff --git a/server/src/core/llm-manager/llm-duties/custom-ner-llm-duty.ts b/server/src/core/llm-manager/llm-duties/custom-ner-llm-duty.ts index 7242b015..0f901a3d 100644 --- a/server/src/core/llm-manager/llm-duties/custom-ner-llm-duty.ts +++ b/server/src/core/llm-manager/llm-duties/custom-ner-llm-duty.ts @@ -6,7 +6,7 @@ import { import { LogHelper } from '@/helpers/log-helper' import { LLM_MANAGER } from '@/core' import { LLMDuties } from '@/core/llm-manager/types' -import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager' +import { LLM_THREADS } from '@/core/llm-manager/llm-manager' interface CustomNERLLMDutyParams extends LLMDutyParams { data: { @@ -38,11 +38,11 @@ export class CustomNERLLMDuty extends LLMDuty { LogHelper.info('Executing...') try { - const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import( - 'node-llama-cpp' - ) + const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function( + 'return import("node-llama-cpp")' + )() + const context = await LLM_MANAGER.model.createContext({ - contextSize: LLM_CONTEXT_SIZE, threads: LLM_THREADS }) const completion = new LlamaCompletion({ @@ -50,14 +50,13 @@ export class CustomNERLLMDuty extends LLMDuty { }) const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, { type: 'object', - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-expect-error properties: { ...this.data.schema } }) - const prompt = `Utterance: ${this.input}` + const prompt = `${this.systemPrompt} Utterance: ${this.input}` const rawResult = await completion.generateCompletion(prompt, { + contextShiftSize: context.contextSize / 2, grammar, maxTokens: context.contextSize }) diff --git a/server/src/core/llm-manager/llm-duties/summarization-llm-duty.ts b/server/src/core/llm-manager/llm-duties/summarization-llm-duty.ts index 300abc50..5c169a0f 100644 --- a/server/src/core/llm-manager/llm-duties/summarization-llm-duty.ts +++ b/server/src/core/llm-manager/llm-duties/summarization-llm-duty.ts @@ -6,7 +6,7 @@ import { import { LogHelper } from '@/helpers/log-helper' import { LLM_MANAGER } from '@/core' import { LLMDuties } from '@/core/llm-manager/types' -import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager' +import { LLM_THREADS } from '@/core/llm-manager/llm-manager' interface SummarizationLLMDutyParams extends LLMDutyParams {} @@ -30,11 +30,11 @@ export class SummarizationLLMDuty extends LLMDuty { LogHelper.info('Executing...') try { - const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import( - 'node-llama-cpp' - ) + const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function( + 'return import("node-llama-cpp")' + )() + const context = await LLM_MANAGER.model.createContext({ - contextSize: LLM_CONTEXT_SIZE, threads: LLM_THREADS }) const completion = new LlamaCompletion({ @@ -48,7 +48,7 @@ export class SummarizationLLMDuty extends LLMDuty { } } }) - const prompt = `Text: ${this.input}` + const prompt = `${this.systemPrompt} Text: ${this.input}` const rawResult = await completion.generateCompletion(prompt, { grammar, maxTokens: context.contextSize diff --git a/server/src/core/llm-manager/llm-duties/translation-llm-duty.ts b/server/src/core/llm-manager/llm-duties/translation-llm-duty.ts index 57107599..d0bc806e 100644 --- a/server/src/core/llm-manager/llm-duties/translation-llm-duty.ts +++ b/server/src/core/llm-manager/llm-duties/translation-llm-duty.ts @@ -6,7 +6,7 @@ import { import { LogHelper } from '@/helpers/log-helper' import { LLM_MANAGER } from '@/core' import { LLMDuties } from '@/core/llm-manager/types' -import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager' +import { LLM_THREADS } from '@/core/llm-manager/llm-manager' interface TranslationLLMDutyParams extends LLMDutyParams { data: { @@ -47,11 +47,11 @@ export class TranslationLLMDuty extends LLMDuty { LogHelper.info('Executing...') try { - const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import( - 'node-llama-cpp' - ) + const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function( + 'return import("node-llama-cpp")' + )() + const context = await LLM_MANAGER.model.createContext({ - contextSize: LLM_CONTEXT_SIZE, threads: LLM_THREADS }) const completion = new LlamaCompletion({ @@ -65,8 +65,9 @@ export class TranslationLLMDuty extends LLMDuty { } } }) - const prompt = `Text: ${this.input}` + const prompt = `${this.systemPrompt} Text: ${this.input}` const rawResult = await completion.generateCompletion(prompt, { + contextShiftSize: context.contextSize / 2, grammar, maxTokens: context.contextSize }) diff --git a/server/src/core/llm-manager/llm-manager.ts b/server/src/core/llm-manager/llm-manager.ts index 66a30d2b..8874659a 100644 --- a/server/src/core/llm-manager/llm-manager.ts +++ b/server/src/core/llm-manager/llm-manager.ts @@ -13,7 +13,6 @@ import { SystemHelper } from '@/helpers/system-helper' type LLMManagerLlama = Llama | null type LLMManagerModel = LlamaModel | null -export const LLM_CONTEXT_SIZE = 8_096 // Set to 0 to use the maximum threads supported by the current machine hardware export const LLM_THREADS = 4 @@ -77,11 +76,15 @@ export default class LLMManager { } try { - const { LlamaLogLevel, getLlama } = await import('node-llama-cpp') + const { LlamaLogLevel, getLlama } = await Function( + 'return import("node-llama-cpp")' + )() this._llama = await getLlama({ - logLevel: LlamaLogLevel.disabled + logLevel: LlamaLogLevel.debug }) + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-expect-error this._model = await this._llama.loadModel({ modelPath: LLM_PATH }) diff --git a/server/src/index.ts b/server/src/index.ts index 2d770f45..d4dad528 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -76,11 +76,11 @@ import { LogHelper } from '@/helpers/log-helper' await summarizationDuty.execute()*/ /*const translationDuty = new TranslationLLMDuty({ - input: 'Bonjour, la température est très agréable à Shenzhen', + input: 'the weather is good in shenzhen', data: { - source: 'French', - target: 'English' - // autoDetectLanguage: true + // source: 'French', + target: 'French', + autoDetectLanguage: true } }) await translationDuty.execute()*/