feat(server): final LLM setup

2024-10-05 21:58:40 +03:00 · 2024-04-17 00:10:41 +08:00 · 2024-04-17 00:10:41 +08:00 · cf02f0f91d
commit cf02f0f91d
parent 2de95c4ef9
8 changed files with 98 additions and 91 deletions
--- a/core/skills-endpoints.json
+++ b/core/skills-endpoints.json
@ -52,6 +52,64 @@
      "route": "/api/action/games/rochambeau/rematch",
      "params": []
    },
+    {
+      "method": "POST",
+      "route": "/api/action/news/github_trends/run",
+      "params": ["number", "daterange"],
+      "entitiesType": "builtIn"
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/news/product_hunt_trends/run",
+      "params": []
+    },
+    {
+      "method": "POST",
+      "route": "/api/action/productivity/todo_list/create_list",
+      "params": ["list"],
+      "entitiesType": "trim"
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/productivity/todo_list/view_lists",
+      "params": []
+    },
+    {
+      "method": "POST",
+      "route": "/api/action/productivity/todo_list/view_list",
+      "params": ["list"],
+      "entitiesType": "trim"
+    },
+    {
+      "method": "POST",
+      "route": "/api/action/productivity/todo_list/rename_list",
+      "params": ["old_list", "new_list"],
+      "entitiesType": "trim"
+    },
+    {
+      "method": "POST",
+      "route": "/api/action/productivity/todo_list/delete_list",
+      "params": ["list"],
+      "entitiesType": "trim"
+    },
+    {
+      "method": "POST",
+      "route": "/api/action/productivity/todo_list/add_todos",
+      "params": ["todos", "list"],
+      "entitiesType": "trim"
+    },
+    {
+      "method": "POST",
+      "route": "/api/action/productivity/todo_list/complete_todos",
+      "params": ["todos", "list"],
+      "entitiesType": "trim"
+    },
+    {
+      "method": "POST",
+      "route": "/api/action/productivity/todo_list/uncheck_todos",
+      "params": ["todos", "list"],
+      "entitiesType": "trim"
+    },
    {
      "method": "GET",
      "route": "/api/action/leon/age/run",
@ -127,64 +185,6 @@
      "route": "/api/action/leon/welcome/run",
      "params": []
    },
-    {
-      "method": "POST",
-      "route": "/api/action/news/github_trends/run",
-      "params": ["number", "daterange"],
-      "entitiesType": "builtIn"
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/news/product_hunt_trends/run",
-      "params": []
-    },
-    {
-      "method": "POST",
-      "route": "/api/action/productivity/todo_list/create_list",
-      "params": ["list"],
-      "entitiesType": "trim"
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/productivity/todo_list/view_lists",
-      "params": []
-    },
-    {
-      "method": "POST",
-      "route": "/api/action/productivity/todo_list/view_list",
-      "params": ["list"],
-      "entitiesType": "trim"
-    },
-    {
-      "method": "POST",
-      "route": "/api/action/productivity/todo_list/rename_list",
-      "params": ["old_list", "new_list"],
-      "entitiesType": "trim"
-    },
-    {
-      "method": "POST",
-      "route": "/api/action/productivity/todo_list/delete_list",
-      "params": ["list"],
-      "entitiesType": "trim"
-    },
-    {
-      "method": "POST",
-      "route": "/api/action/productivity/todo_list/add_todos",
-      "params": ["todos", "list"],
-      "entitiesType": "trim"
-    },
-    {
-      "method": "POST",
-      "route": "/api/action/productivity/todo_list/complete_todos",
-      "params": ["todos", "list"],
-      "entitiesType": "trim"
-    },
-    {
-      "method": "POST",
-      "route": "/api/action/productivity/todo_list/uncheck_todos",
-      "params": ["todos", "list"],
-      "entitiesType": "trim"
-    },
    {
      "method": "GET",
      "route": "/api/action/social_communication/mbti/setup",
--- a/package.json
+++ b/package.json
@ -95,7 +95,7 @@
    "getos": "3.2.1",
    "googleapis": "67.1.1",
    "ibm-watson": "6.1.1",
-    "node-llama-cpp": "3.0.0-beta.15",
+    "node-llama-cpp": "3.0.0-beta.16",
    "node-wav": "0.0.2",
    "os-name": "4.0.1",
    "pretty-bytes": "5.6.0",
--- a/server/src/constants.ts
+++ b/server/src/constants.ts
@ -161,25 +161,29 @@ export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
 /**
 * LLMs
 */
-// export const LLM_VERSION = 'v0.2.Q4_K_S'
+// export const LLM_VERSION = 'v0.2.Q4_K_M'
 export const LLM_VERSION = '1.1-7b-it-Q4_K_M'
 // export const LLM_NAME = 'Mistral 7B Instruct'
 export const LLM_NAME = 'Gemma 1.1 7B (IT)'
-export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
 // export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf`
 export const LLM_FILE_NAME = `gemma-${LLM_VERSION}.gguf`
+export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
 export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
 export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
 export const LLM_MINIMUM_TOTAL_RAM = 0
 export const LLM_MINIMUM_FREE_RAM = 0
 export const LLM_HF_DOWNLOAD_URL =
-  'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
+  'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
+/*export const LLM_HF_DOWNLOAD_URL =
+  'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf?download=true'*/
 export const LLM_MIRROR_DOWNLOAD_URL =
-  'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
+  'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
+/*export const LLM_MIRROR_DOWNLOAD_URL =
+  'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf?download=true'*/
 /**
 * @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases
 */
-export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2679'
+export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2681'

 /**
 * Misc
--- a/server/src/core/llm-manager/llm-duties/custom-ner-llm-duty.ts
+++ b/server/src/core/llm-manager/llm-duties/custom-ner-llm-duty.ts
@ -6,7 +6,7 @@ import {
 import { LogHelper } from '@/helpers/log-helper'
 import { LLM_MANAGER } from '@/core'
 import { LLMDuties } from '@/core/llm-manager/types'
-import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager'
+import { LLM_THREADS } from '@/core/llm-manager/llm-manager'

 interface CustomNERLLMDutyParams<T> extends LLMDutyParams {
  data: {
@ -38,11 +38,11 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
    LogHelper.info('Executing...')

    try {
-      const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import(
-        'node-llama-cpp'
-      )
+      const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
+        'return import("node-llama-cpp")'
+      )()
+
      const context = await LLM_MANAGER.model.createContext({
-        contextSize: LLM_CONTEXT_SIZE,
        threads: LLM_THREADS
      })
      const completion = new LlamaCompletion({
@ -50,14 +50,13 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
      })
      const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
        type: 'object',
-        // eslint-disable-next-line @typescript-eslint/ban-ts-comment
-        // @ts-expect-error
        properties: {
          ...this.data.schema
        }
      })
-      const prompt = `Utterance: ${this.input}`
+      const prompt = `${this.systemPrompt} Utterance: ${this.input}`
      const rawResult = await completion.generateCompletion(prompt, {
+        contextShiftSize: context.contextSize / 2,
        grammar,
        maxTokens: context.contextSize
      })
--- a/server/src/core/llm-manager/llm-duties/summarization-llm-duty.ts
+++ b/server/src/core/llm-manager/llm-duties/summarization-llm-duty.ts
@ -6,7 +6,7 @@ import {
 import { LogHelper } from '@/helpers/log-helper'
 import { LLM_MANAGER } from '@/core'
 import { LLMDuties } from '@/core/llm-manager/types'
-import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager'
+import { LLM_THREADS } from '@/core/llm-manager/llm-manager'

 interface SummarizationLLMDutyParams extends LLMDutyParams {}

@ -30,11 +30,11 @@ export class SummarizationLLMDuty extends LLMDuty {
    LogHelper.info('Executing...')

    try {
-      const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import(
-        'node-llama-cpp'
-      )
+      const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
+        'return import("node-llama-cpp")'
+      )()
+
      const context = await LLM_MANAGER.model.createContext({
-        contextSize: LLM_CONTEXT_SIZE,
        threads: LLM_THREADS
      })
      const completion = new LlamaCompletion({
@ -48,7 +48,7 @@ export class SummarizationLLMDuty extends LLMDuty {
          }
        }
      })
-      const prompt = `Text: ${this.input}`
+      const prompt = `${this.systemPrompt} Text: ${this.input}`
      const rawResult = await completion.generateCompletion(prompt, {
        grammar,
        maxTokens: context.contextSize
--- a/server/src/core/llm-manager/llm-duties/translation-llm-duty.ts
+++ b/server/src/core/llm-manager/llm-duties/translation-llm-duty.ts
@ -6,7 +6,7 @@ import {
 import { LogHelper } from '@/helpers/log-helper'
 import { LLM_MANAGER } from '@/core'
 import { LLMDuties } from '@/core/llm-manager/types'
-import { LLM_CONTEXT_SIZE, LLM_THREADS } from '@/core/llm-manager/llm-manager'
+import { LLM_THREADS } from '@/core/llm-manager/llm-manager'

 interface TranslationLLMDutyParams extends LLMDutyParams {
  data: {
@ -47,11 +47,11 @@ export class TranslationLLMDuty extends LLMDuty {
    LogHelper.info('Executing...')

    try {
-      const { LlamaCompletion, LlamaJsonSchemaGrammar } = await import(
-        'node-llama-cpp'
-      )
+      const { LlamaCompletion, LlamaJsonSchemaGrammar } = await Function(
+        'return import("node-llama-cpp")'
+      )()
+
      const context = await LLM_MANAGER.model.createContext({
-        contextSize: LLM_CONTEXT_SIZE,
        threads: LLM_THREADS
      })
      const completion = new LlamaCompletion({
@ -65,8 +65,9 @@ export class TranslationLLMDuty extends LLMDuty {
          }
        }
      })
-      const prompt = `Text: ${this.input}`
+      const prompt = `${this.systemPrompt} Text: ${this.input}`
      const rawResult = await completion.generateCompletion(prompt, {
+        contextShiftSize: context.contextSize / 2,
        grammar,
        maxTokens: context.contextSize
      })
--- a/server/src/core/llm-manager/llm-manager.ts
+++ b/server/src/core/llm-manager/llm-manager.ts
@ -13,7 +13,6 @@ import { SystemHelper } from '@/helpers/system-helper'
 type LLMManagerLlama = Llama | null
 type LLMManagerModel = LlamaModel | null

-export const LLM_CONTEXT_SIZE = 8_096
 // Set to 0 to use the maximum threads supported by the current machine hardware
 export const LLM_THREADS = 4

@ -77,11 +76,15 @@ export default class LLMManager {
    }

    try {
-      const { LlamaLogLevel, getLlama } = await import('node-llama-cpp')
+      const { LlamaLogLevel, getLlama } = await Function(
+        'return import("node-llama-cpp")'
+      )()

      this._llama = await getLlama({
-        logLevel: LlamaLogLevel.disabled
+        logLevel: LlamaLogLevel.debug
      })
+      // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+      // @ts-expect-error
      this._model = await this._llama.loadModel({
        modelPath: LLM_PATH
      })
--- a/server/src/index.ts
+++ b/server/src/index.ts
@ -76,11 +76,11 @@ import { LogHelper } from '@/helpers/log-helper'
  await summarizationDuty.execute()*/

  /*const translationDuty = new TranslationLLMDuty({
-    input: 'Bonjour, la température est très agréable à Shenzhen',
+    input: 'the weather is good in shenzhen',
    data: {
-      source: 'French',
-      target: 'English'
-      // autoDetectLanguage: true
+      // source: 'French',
+      target: 'French',
+      autoDetectLanguage: true
    }
  })
  await translationDuty.execute()*/