1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-10-03 20:57:55 +03:00

feat: use VRAM as LLM unit requirements

This commit is contained in:
louistiti 2024-07-03 23:41:23 +08:00
parent 6867e9c6db
commit 0c775ba2e5
8 changed files with 103 additions and 40 deletions

View File

@ -36,6 +36,11 @@ document.addEventListener('DOMContentLoaded', async () => {
const infoKeys = [
'timeZone',
'telemetry',
'gpu',
'graphicsComputeAPI',
'totalVRAM',
'freeVRAM',
'usedVRAM',
'llm',
'shouldWarmUpLLMDuties',
'isLLMActionRecognitionEnabled',

View File

@ -7,7 +7,7 @@ import { command } from 'execa'
import {
LLM_NAME,
LLM_NAME_WITH_VERSION,
LLM_MINIMUM_TOTAL_RAM,
LLM_MINIMUM_TOTAL_VRAM,
LLM_DIR_PATH,
LLM_PATH,
LLM_VERSION,
@ -31,8 +31,27 @@ import { FileHelper } from '@/helpers/file-helper'
const LLM_MANIFEST_PATH = path.join(LLM_DIR_PATH, 'manifest.json')
let manifest = null
function checkMinimumHardwareRequirements() {
return SystemHelper.getTotalRAM() >= LLM_MINIMUM_TOTAL_RAM
async function checkMinimumHardwareRequirements() {
const { getLlama, LlamaLogLevel } = await Function(
'return import("node-llama-cpp")'
)()
const llama = await getLlama({
logLevel: LlamaLogLevel.disabled
})
if (!(await SystemHelper.hasGPU(llama))) {
return false
}
LogHelper.info(
`GPU detected: ${(await SystemHelper.getGPUDeviceNames(llama))[0]}`
)
LogHelper.info(
`Graphics compute API: ${await SystemHelper.getGraphicsComputeAPI(llama)}`
)
LogHelper.info(`Total VRAM: ${await SystemHelper.getTotalVRAM(llama)} GB`)
return (await SystemHelper.getTotalVRAM(llama)) >= LLM_MINIMUM_TOTAL_VRAM
}
async function downloadLLM() {
@ -162,13 +181,19 @@ async function downloadAndCompileLlamaCPP() {
}
export default async () => {
const canSetupLLM = checkMinimumHardwareRequirements()
const canSetupLLM = await checkMinimumHardwareRequirements()
if (!canSetupLLM) {
const totalRAM = SystemHelper.getTotalRAM()
const { getLlama, LlamaLogLevel } = await Function(
'return import("node-llama-cpp")'
)()
const llama = await getLlama({
logLevel: LlamaLogLevel.disabled
})
const totalVRAM = await SystemHelper.getTotalVRAM(llama)
LogHelper.warning(
`LLM requires at least ${LLM_MINIMUM_TOTAL_RAM} of total RAM. Current total RAM is ${totalRAM} GB. No worries though, Leon can still run without LLM.`
`LLM requires at least ${LLM_MINIMUM_TOTAL_VRAM} GB of total VRAM. Current total VRAM is ${totalVRAM} GB. No worries though, Leon can still run without LLM.`
)
} else {
await downloadLLM()

View File

@ -283,8 +283,8 @@ export const LLM_FILE_NAME = `Lexi-Llama-${LLM_VERSION}.gguf`
export const LLM_NAME_WITH_VERSION = `${LLM_NAME} (${LLM_VERSION})`
export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
export const LLM_MINIMUM_TOTAL_RAM = 8
export const LLM_MINIMUM_FREE_RAM = 8
export const LLM_MINIMUM_TOTAL_VRAM = 8
export const LLM_MINIMUM_FREE_VRAM = 8
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'
)*/

View File

@ -14,6 +14,7 @@ import {
import { LLM_MANAGER, PERSONA } from '@/core'
import { LogHelper } from '@/helpers/log-helper'
import { DateHelper } from '@/helpers/date-helper'
import { SystemHelper } from '@/helpers/system-helper'
export const getInfo: FastifyPluginAsync<APIOptions> = async (
fastify,
@ -27,6 +28,20 @@ export const getInfo: FastifyPluginAsync<APIOptions> = async (
const message = 'Information pulled.'
LogHelper.success(message)
const [
gpuDeviceNames,
graphicsComputeAPI,
totalVRAM,
freeVRAM,
usedVRAM
] = await Promise.all([
SystemHelper.getGPUDeviceNames(),
SystemHelper.getGraphicsComputeAPI(),
SystemHelper.getTotalVRAM(),
SystemHelper.getFreeVRAM(),
SystemHelper.getUsedVRAM()
])
reply.send({
success: true,
status: 200,
@ -39,6 +54,11 @@ export const getInfo: FastifyPluginAsync<APIOptions> = async (
LLM_MANAGER.isLLMActionRecognitionEnabled,
isLLMNLGEnabled: LLM_MANAGER.isLLMNLGEnabled,
timeZone: DateHelper.getTimeZone(),
gpu: gpuDeviceNames[0],
graphicsComputeAPI,
totalVRAM,
freeVRAM,
usedVRAM,
llm: {
enabled: LLM_MANAGER.isLLMEnabled,
provider: LLM_PROVIDER

View File

@ -19,6 +19,7 @@ import { llmInferencePlugin } from '@/core/http-server/api/llm-inference'
import { keyMidd } from '@/core/http-server/plugins/key'
import { utterancePlugin } from '@/core/http-server/api/utterance'
import { LLM_MANAGER, PERSONA } from '@/core'
import { SystemHelper } from '@/helpers/system-helper'
const API_VERSION = 'v1'
@ -65,6 +66,12 @@ export default class HTTPServer {
LogHelper.info(`Mood: ${PERSONA.mood.type}`)
LogHelper.info(`GPU: ${(await SystemHelper.getGPUDeviceNames())[0]}`)
LogHelper.info(
`Graphics compute API: ${await SystemHelper.getGraphicsComputeAPI()}`
)
LogHelper.info(`Total VRAM: ${await SystemHelper.getTotalVRAM()} GB`)
const isLLMEnabled = LLM_MANAGER.isLLMEnabled ? 'enabled' : 'disabled'
LogHelper.info(`LLM: ${isLLMEnabled}`)

View File

@ -12,8 +12,8 @@ import {
HAS_LLM,
HAS_LLM_ACTION_RECOGNITION,
HAS_LLM_NLG,
LLM_MINIMUM_FREE_RAM,
LLM_MINIMUM_TOTAL_RAM,
LLM_MINIMUM_FREE_VRAM,
LLM_MINIMUM_TOTAL_VRAM,
LLM_NAME_WITH_VERSION,
LLM_PATH,
LLM_PROVIDER,
@ -180,11 +180,13 @@ export default class LLMManager {
}
if (LLM_PROVIDER === LLMProviders.Local) {
const freeRAMInGB = SystemHelper.getFreeRAM()
const totalRAMInGB = SystemHelper.getTotalRAM()
const [freeVRAMInGB, totalVRAMInGB] = await Promise.all([
SystemHelper.getFreeVRAM(),
SystemHelper.getTotalVRAM()
])
const isLLMPathFound = fs.existsSync(LLM_PATH)
const isCurrentFreeRAMEnough = LLM_MINIMUM_FREE_RAM <= freeRAMInGB * 4 // Multiply by 4 to boost probability of success
const isTotalRAMEnough = LLM_MINIMUM_TOTAL_RAM <= totalRAMInGB
const isCurrentFreeRAMEnough = LLM_MINIMUM_FREE_VRAM <= freeVRAMInGB
const isTotalRAMEnough = LLM_MINIMUM_TOTAL_VRAM <= totalVRAMInGB
/**
* In case the LLM is not set up and

View File

@ -24,14 +24,14 @@ export class LogHelper {
* This one looks obvious :)
*/
public static info(value: string): void {
console.info('\x1b[36m %s\x1b[0m', value)
console.info('\x1b[36m %s\x1b[0m', value)
}
/**
* This one looks obvious :)
*/
public static warning(value: string): void {
console.warn('\x1b[33m⚠ %s\x1b[0m', value)
console.warn('\x1b[33m⚠ %s\x1b[0m', value)
}
/**

View File

@ -1,5 +1,7 @@
import os from 'node:os'
import type { Llama } from 'node-llama-cpp'
import { OSTypes, CPUArchitectures } from '@/types'
enum OSNames {
@ -196,11 +198,11 @@ export class SystemHelper {
* Get the names of the GPU devices on the machine
* @example getGPUDeviceNames() // ['Apple M1 Pro']
*/
public static async getGPUDeviceNames(): Promise<string[]> {
const { LLM_MANAGER } = await import('@/core')
public static async getGPUDeviceNames(llama?: Llama): Promise<string[]> {
const llamaAPI = llama ? llama : (await import('@/core')).LLM_MANAGER.llama
if (LLM_MANAGER.llama) {
return LLM_MANAGER.llama.getGpuDeviceNames()
if (llamaAPI) {
return llamaAPI.getGpuDeviceNames()
}
return []
@ -210,11 +212,11 @@ export class SystemHelper {
* Check if the machine has a GPU
* @example hasGPU() // true
*/
public static async hasGPU(): Promise<boolean> {
const { LLM_MANAGER } = await import('@/core')
public static async hasGPU(llama?: Llama): Promise<boolean> {
const llamaAPI = llama ? llama : (await import('@/core')).LLM_MANAGER.llama
if (LLM_MANAGER.llama) {
return !!LLM_MANAGER.llama.gpu
if (llamaAPI) {
return !!llamaAPI.gpu
}
return false
@ -224,11 +226,13 @@ export class SystemHelper {
* Get the graphics compute API used by the machine
* @example getGraphicsComputeAPI() // 'cuda'
*/
public static async getGraphicsComputeAPI(): Promise<GraphicsComputeAPIs> {
const { LLM_MANAGER } = await import('@/core')
public static async getGraphicsComputeAPI(
llama?: Llama
): Promise<GraphicsComputeAPIs> {
const llamaAPI = llama ? llama : (await import('@/core')).LLM_MANAGER.llama
if (LLM_MANAGER.llama && LLM_MANAGER.llama.gpu) {
return LLM_MANAGER.llama.gpu as GraphicsComputeAPIs
if (llamaAPI && llamaAPI.gpu) {
return llamaAPI.gpu as GraphicsComputeAPIs
}
return GraphicsComputeAPIs.CPU
@ -238,11 +242,11 @@ export class SystemHelper {
* Get the amount of used VRAM (in GB) on the machine
* @example getUsedVRAM() // 6.04
*/
public static async getUsedVRAM(): Promise<number> {
const { LLM_MANAGER } = await import('@/core')
public static async getUsedVRAM(llama?: Llama): Promise<number> {
const llamaAPI = llama ? llama : (await import('@/core')).LLM_MANAGER.llama
if (LLM_MANAGER.llama) {
const vramState = await LLM_MANAGER.llama.getVramState()
if (llamaAPI) {
const vramState = await llamaAPI.getVramState()
return Number((vramState.used / (1_024 * 1_024 * 1_024)).toFixed(2))
}
@ -254,11 +258,11 @@ export class SystemHelper {
* Get the total amount of VRAM (in GB) on the machine
* @example getTotalVRAM() // 12
*/
public static async getTotalVRAM(): Promise<number> {
const { LLM_MANAGER } = await import('@/core')
public static async getTotalVRAM(llama?: Llama): Promise<number> {
const llamaAPI = llama ? llama : (await import('@/core')).LLM_MANAGER.llama
if (LLM_MANAGER.llama) {
const vramState = await LLM_MANAGER.llama.getVramState()
if (llamaAPI) {
const vramState = await llamaAPI.getVramState()
return Number((vramState.total / (1_024 * 1_024 * 1_024)).toFixed(2))
}
@ -270,11 +274,11 @@ export class SystemHelper {
* Get the amount of free VRAM (in GB) on the machine
* @example getFreeVRAM() // 6
*/
public static async getFreeVRAM(): Promise<number> {
const { LLM_MANAGER } = await import('@/core')
public static async getFreeVRAM(llama?: Llama): Promise<number> {
const llamaAPI = llama ? llama : (await import('@/core')).LLM_MANAGER.llama
if (LLM_MANAGER.llama) {
const vramState = await LLM_MANAGER.llama.getVramState()
if (llamaAPI) {
const vramState = await llamaAPI.getVramState()
return Number((vramState.free / (1_024 * 1_024 * 1_024)).toFixed(2))
}