mirror of
https://github.com/leon-ai/leon.git
synced 2024-11-27 08:06:03 +03:00
feat(server): Gemma support (prototype)
This commit is contained in:
parent
2434d36564
commit
2de95c4ef9
@ -115,7 +115,7 @@ async function downloadAndCompileLlamaCPP() {
|
||||
osType === OSTypes.MacOS &&
|
||||
cpuArchitecture === CPUArchitectures.X64
|
||||
) {
|
||||
llamaCPPDownloadCommand = `${llamaCPPDownloadCommand} --no-metal`
|
||||
// llamaCPPDownloadCommand = `${llamaCPPDownloadCommand} --no-metal`
|
||||
|
||||
LogHelper.info(`macOS Intel chipset detected, Metal support disabled`)
|
||||
}
|
||||
|
@ -161,22 +161,25 @@ export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
|
||||
/**
|
||||
* LLMs
|
||||
*/
|
||||
export const LLM_VERSION = 'v0.2.Q4_K_S'
|
||||
export const LLM_NAME = 'Mistral 7B Instruct'
|
||||
export const LLM_NAME_WITH_VERSION = `${LLM_NAME} ${LLM_VERSION}`
|
||||
export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf`
|
||||
// export const LLM_VERSION = 'v0.2.Q4_K_S'
|
||||
export const LLM_VERSION = '1.1-7b-it-Q4_K_M'
|
||||
// export const LLM_NAME = 'Mistral 7B Instruct'
|
||||
export const LLM_NAME = 'Gemma 1.1 7B (IT)'
|
||||
export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
|
||||
// export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf`
|
||||
export const LLM_FILE_NAME = `gemma-${LLM_VERSION}.gguf`
|
||||
export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
|
||||
export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
|
||||
export const LLM_MINIMUM_TOTAL_RAM = 8
|
||||
export const LLM_MINIMUM_FREE_RAM = 8
|
||||
export const LLM_MINIMUM_TOTAL_RAM = 0
|
||||
export const LLM_MINIMUM_FREE_RAM = 0
|
||||
export const LLM_HF_DOWNLOAD_URL =
|
||||
'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
|
||||
'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
|
||||
export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
|
||||
'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/blob/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
|
||||
/**
|
||||
* @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases
|
||||
*/
|
||||
export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2096'
|
||||
export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2679'
|
||||
|
||||
/**
|
||||
* Misc
|
||||
|
@ -43,7 +43,7 @@ export class SummarizationLLMDuty extends LLMDuty {
|
||||
const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
|
||||
type: 'object',
|
||||
properties: {
|
||||
o: {
|
||||
summary: {
|
||||
type: 'string'
|
||||
}
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ export class TranslationLLMDuty extends LLMDuty {
|
||||
const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
|
||||
type: 'object',
|
||||
properties: {
|
||||
o: {
|
||||
translation: {
|
||||
type: 'string'
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,11 @@ import fs from 'node:fs'
|
||||
|
||||
import type { Llama, LlamaModel } from 'node-llama-cpp'
|
||||
|
||||
import { LLM_MINIMUM_FREE_RAM, LLM_PATH } from '@/constants'
|
||||
import {
|
||||
LLM_MINIMUM_FREE_RAM,
|
||||
LLM_NAME_WITH_VERSION,
|
||||
LLM_PATH
|
||||
} from '@/constants'
|
||||
import { LogHelper } from '@/helpers/log-helper'
|
||||
import { SystemHelper } from '@/helpers/system-helper'
|
||||
|
||||
@ -73,7 +77,7 @@ export default class LLMManager {
|
||||
}
|
||||
|
||||
try {
|
||||
const { getLlama, LlamaLogLevel } = await import('node-llama-cpp')
|
||||
const { LlamaLogLevel, getLlama } = await import('node-llama-cpp')
|
||||
|
||||
this._llama = await getLlama({
|
||||
logLevel: LlamaLogLevel.disabled
|
||||
@ -83,7 +87,7 @@ export default class LLMManager {
|
||||
})
|
||||
this._isLLMEnabled = true
|
||||
|
||||
LogHelper.success('LLM has been loaded')
|
||||
LogHelper.success(`${LLM_NAME_WITH_VERSION} LLM has been loaded`)
|
||||
} catch (e) {
|
||||
LogHelper.error(`LLM Manager failed to load: ${e}`)
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ export const run: ActionFunction = async function (params) {
|
||||
await leon.answer({
|
||||
key: 'translate',
|
||||
data: {
|
||||
output: response.data.output.o
|
||||
output: response.data.output.translation
|
||||
}
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user