feat(server): Gemma support (prototype)

2024-11-30 10:44:25 +03:00 · 2024-04-16 20:18:18 +08:00 · 2024-04-16 20:18:18 +08:00 · 2de95c4ef9
commit 2de95c4ef9
parent 2434d36564
6 changed files with 23 additions and 16 deletions
--- a/scripts/setup/setup-llm.js
+++ b/scripts/setup/setup-llm.js
@ -115,7 +115,7 @@ async function downloadAndCompileLlamaCPP() {
        osType === OSTypes.MacOS &&
        cpuArchitecture === CPUArchitectures.X64
      ) {
-        llamaCPPDownloadCommand = `${llamaCPPDownloadCommand} --no-metal`
+        // llamaCPPDownloadCommand = `${llamaCPPDownloadCommand} --no-metal`
        LogHelper.info(`macOS Intel chipset detected, Metal support disabled`)
      }
--- a/server/src/constants.ts
+++ b/server/src/constants.ts
@ -161,22 +161,25 @@ export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
 /**
 * LLMs
 */
-export const LLM_VERSION = 'v0.2.Q4_K_S'
+// export const LLM_VERSION = 'v0.2.Q4_K_S'
-export const LLM_NAME = 'Mistral 7B Instruct'
+export const LLM_VERSION = '1.1-7b-it-Q4_K_M'
-export const LLM_NAME_WITH_VERSION = `${LLM_NAME} ${LLM_VERSION}`
+// export const LLM_NAME = 'Mistral 7B Instruct'
-export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf`
+export const LLM_NAME = 'Gemma 1.1 7B (IT)'
 export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
 // export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf`
 export const LLM_FILE_NAME = `gemma-${LLM_VERSION}.gguf`
 export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
 export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
-export const LLM_MINIMUM_TOTAL_RAM = 8
+export const LLM_MINIMUM_TOTAL_RAM = 0
-export const LLM_MINIMUM_FREE_RAM = 8
+export const LLM_MINIMUM_FREE_RAM = 0
 export const LLM_HF_DOWNLOAD_URL =
-  'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
+  'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
 export const LLM_MIRROR_DOWNLOAD_URL =
-  'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
+  'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
 /**
 * @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases
 */
-export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2096'
+export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2679'
 /**
 * Misc
--- a/server/src/core/llm-manager/llm-duties/summarization-llm-duty.ts
+++ b/server/src/core/llm-manager/llm-duties/summarization-llm-duty.ts
@ -43,7 +43,7 @@ export class SummarizationLLMDuty extends LLMDuty {
      const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
        type: 'object',
        properties: {
-          o: {
+          summary: {
            type: 'string'
          }
        }
--- a/server/src/core/llm-manager/llm-duties/translation-llm-duty.ts
+++ b/server/src/core/llm-manager/llm-duties/translation-llm-duty.ts
@ -60,7 +60,7 @@ export class TranslationLLMDuty extends LLMDuty {
      const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
        type: 'object',
        properties: {
-          o: {
+          translation: {
            type: 'string'
          }
        }
--- a/server/src/core/llm-manager/llm-manager.ts
+++ b/server/src/core/llm-manager/llm-manager.ts
@ -2,7 +2,11 @@ import fs from 'node:fs'
 import type { Llama, LlamaModel } from 'node-llama-cpp'
-import { LLM_MINIMUM_FREE_RAM, LLM_PATH } from '@/constants'
+import {
  LLM_MINIMUM_FREE_RAM,
  LLM_NAME_WITH_VERSION,
  LLM_PATH
 } from '@/constants'
 import { LogHelper } from '@/helpers/log-helper'
 import { SystemHelper } from '@/helpers/system-helper'
@ -73,7 +77,7 @@ export default class LLMManager {
    }
    try {
-      const { getLlama, LlamaLogLevel } = await import('node-llama-cpp')
+      const { LlamaLogLevel, getLlama } = await import('node-llama-cpp')
      this._llama = await getLlama({
        logLevel: LlamaLogLevel.disabled
@ -83,7 +87,7 @@ export default class LLMManager {
      })
      this._isLLMEnabled = true
-      LogHelper.success('LLM has been loaded')
+      LogHelper.success(`${LLM_NAME_WITH_VERSION} LLM has been loaded`)
    } catch (e) {
      LogHelper.error(`LLM Manager failed to load: ${e}`)
    }
--- a/skills/utilities/translator-poc/src/actions/short_translate.ts
+++ b/skills/utilities/translator-poc/src/actions/short_translate.ts
@ -50,7 +50,7 @@ export const run: ActionFunction = async function (params) {
  await leon.answer({
    key: 'translate',
    data: {
-      output: response.data.output.o
+      output: response.data.output.translation
    }
  })
 }