diff --git a/package.json b/package.json index d12bbdb3..fbe82fd5 100644 --- a/package.json +++ b/package.json @@ -95,6 +95,7 @@ "getos": "3.2.1", "googleapis": "67.1.1", "ibm-watson": "6.1.1", + "node-llama-cpp": "2.8.6", "node-wav": "0.0.2", "os-name": "4.0.1", "pretty-bytes": "5.6.0", diff --git a/scripts/setup/setup-binaries.js b/scripts/setup/setup-binaries.js index 135d0131..4db986ca 100644 --- a/scripts/setup/setup-binaries.js +++ b/scripts/setup/setup-binaries.js @@ -107,7 +107,10 @@ const setupBinaries = async (key) => { const archiveWriter = fs.createWriteStream(archivePath) const latestReleaseAssetURL = `${GITHUB_URL}/releases/download/${key}_v${version}/${archiveName}` - const { data } = await FileHelper.downloadFile(latestReleaseAssetURL, 'stream') + const { data } = await FileHelper.downloadFile( + latestReleaseAssetURL, + 'stream' + ) data.pipe(archiveWriter) await stream.promises.finished(archiveWriter) diff --git a/scripts/setup/setup-llm.js b/scripts/setup/setup-llm.js index 68eaab79..675afe48 100644 --- a/scripts/setup/setup-llm.js +++ b/scripts/setup/setup-llm.js @@ -3,6 +3,8 @@ import path from 'node:path' import dns from 'node:dns' import stream from 'node:stream' +import { command } from 'execa' + import { LLM_NAME, LLM_NAME_WITH_VERSION, @@ -11,8 +13,10 @@ import { LLM_PATH, LLM_VERSION, LLM_HF_DOWNLOAD_URL, - LLM_MIRROR_DOWNLOAD_URL + LLM_MIRROR_DOWNLOAD_URL, + LLM_LLAMA_CPP_RELEASE_TAG } from '@/constants' +import { OSTypes, CPUArchitectures } from '@/types' import { SystemHelper } from '@/helpers/system-helper' import { LogHelper } from '@/helpers/log-helper' import { FileHelper } from '@/helpers/file-helper' @@ -22,9 +26,13 @@ import { FileHelper } from '@/helpers/file-helper' * 1. Check minimum hardware requirements * 2. Check if Hugging Face is accessible * 3. Download the latest LLM from Hugging Face or mirror - * 4. Create manifest file + * 4. Download and compile the latest llama.cpp release + * 5. Create manifest file */ +const LLM_MANIFEST_PATH = path.join(LLM_DIR_PATH, 'manifest.json') +let manifest = null + function checkMinimumHardwareRequirements() { return SystemHelper.getTotalRAM() >= LLM_MINIMUM_TOTAL_RAM } @@ -39,29 +47,32 @@ async function canAccessHuggingFace() { } } -async function setupLLM() { +async function downloadLLM() { try { - LogHelper.info('Setting up LLM...') + LogHelper.info('Downloading LLM...') - const llmManifestPath = path.join(LLM_DIR_PATH, 'manifest.json') - let manifest = null - - if (fs.existsSync(llmManifestPath)) { - manifest = JSON.parse(await fs.promises.readFile(llmManifestPath, 'utf8')) + if (fs.existsSync(LLM_MANIFEST_PATH)) { + manifest = JSON.parse( + await fs.promises.readFile(LLM_MANIFEST_PATH, 'utf8') + ) LogHelper.info(`Found ${LLM_NAME} ${manifest.version}`) LogHelper.info(`Latest version is ${LLM_VERSION}`) } if (!manifest || manifest.version !== LLM_VERSION) { - const downloadURL = await canAccessHuggingFace() ? LLM_HF_DOWNLOAD_URL : LLM_MIRROR_DOWNLOAD_URL + const downloadURL = (await canAccessHuggingFace()) + ? LLM_HF_DOWNLOAD_URL + : LLM_MIRROR_DOWNLOAD_URL // Just in case the LLM file already exists, delete it first if (fs.existsSync(LLM_PATH)) { await fs.promises.unlink(LLM_PATH) } - LogHelper.info(`Downloading ${LLM_NAME_WITH_VERSION} from ${downloadURL}...`) + LogHelper.info( + `Downloading ${LLM_NAME_WITH_VERSION} from ${downloadURL}...` + ) const llmWriter = fs.createWriteStream(LLM_PATH) const response = await FileHelper.downloadFile(downloadURL, 'stream') @@ -71,17 +82,69 @@ async function setupLLM() { LogHelper.success(`${LLM_NAME_WITH_VERSION} downloaded`) - await FileHelper.createManifestFile(llmManifestPath, LLM_NAME, LLM_VERSION) - - LogHelper.success('Manifest file created') LogHelper.success(`${LLM_NAME_WITH_VERSION} ready`) } else { - LogHelper.info(`${LLM_NAME_WITH_VERSION} is already set up and use the latest version`) + LogHelper.success( + `${LLM_NAME_WITH_VERSION} is already set up and use the latest version` + ) + } + } catch (e) { + LogHelper.error(`Failed to download LLM: ${e}`) + } +} + +async function downloadAndCompileLlamaCPP() { + try { + LogHelper.info( + `Downloading and compiling "${LLM_LLAMA_CPP_RELEASE_TAG}" llama.cpp release...` + ) + + if (manifest.llamaCPPVersion) { + LogHelper.info(`Found llama.cpp ${manifest.llamaCPPVersion}`) + LogHelper.info(`Latest version is ${LLM_LLAMA_CPP_RELEASE_TAG}`) } - LogHelper.success('LLM is set up') + if (!manifest || manifest.llamaCPPVersion !== LLM_LLAMA_CPP_RELEASE_TAG) { + if (manifest.llamaCPPVersion !== LLM_LLAMA_CPP_RELEASE_TAG) { + LogHelper.info(`Updating llama.cpp to ${LLM_LLAMA_CPP_RELEASE_TAG}...`) + } + + const { type: osType, cpuArchitecture } = SystemHelper.getInformation() + let llamaCPPDownloadCommand = `npx --no node-llama-cpp download --release "${LLM_LLAMA_CPP_RELEASE_TAG}"` + + if ( + osType === OSTypes.MacOS && + cpuArchitecture === CPUArchitectures.X64 + ) { + llamaCPPDownloadCommand = `${llamaCPPDownloadCommand} --no-metal` + + LogHelper.info(`macOS Intel chipset detected, Metal support disabled`) + } + + await command(llamaCPPDownloadCommand, { + shell: true, + stdio: 'inherit' + }) + + await FileHelper.createManifestFile( + LLM_MANIFEST_PATH, + LLM_NAME, + LLM_VERSION, + { + llamaCPPVersion: LLM_LLAMA_CPP_RELEASE_TAG + } + ) + + LogHelper.success('Manifest file created') + LogHelper.success(`llama.cpp downloaded and compiled`) + LogHelper.success('The LLM is ready to go') + } else { + LogHelper.success( + `llama.cpp is already set up and use the latest version (${LLM_LLAMA_CPP_RELEASE_TAG})` + ) + } } catch (e) { - LogHelper.error(`Failed to set up LLM: ${e}`) + LogHelper.error(`Failed to set up llama.cpp: ${e}`) } } @@ -91,8 +154,11 @@ export default async () => { if (!canSetupLLM) { const totalRAM = SystemHelper.getTotalRAM() - LogHelper.warning(`LLM requires at least ${LLM_MINIMUM_TOTAL_RAM} of total RAM. Current total RAM is ${totalRAM} GB. No worries though, Leon can still run without LLM.`) + LogHelper.warning( + `LLM requires at least ${LLM_MINIMUM_TOTAL_RAM} of total RAM. Current total RAM is ${totalRAM} GB. No worries though, Leon can still run without LLM.` + ) } else { - await setupLLM() + await downloadLLM() + await downloadAndCompileLlamaCPP() } } diff --git a/server/src/constants.ts b/server/src/constants.ts index 408a552d..b0600a17 100644 --- a/server/src/constants.ts +++ b/server/src/constants.ts @@ -85,7 +85,7 @@ export const LEON_VERSION = process.env['npm_package_version'] /** * spaCy models - * Find new spaCy models: https://github.com/explosion/spacy-models/releases + * @see Find new spaCy models: https://github.com/explosion/spacy-models/releases */ export const EN_SPACY_MODEL_NAME = 'en_core_web_trf' export const EN_SPACY_MODEL_VERSION = '3.4.0' @@ -158,8 +158,14 @@ export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm') export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME) export const LLM_MINIMUM_TOTAL_RAM = 8 export const LLM_MINIMUM_FREE_RAM = 8 -export const LLM_HF_DOWNLOAD_URL = 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true' -export const LLM_MIRROR_DOWNLOAD_URL = 'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true' +export const LLM_HF_DOWNLOAD_URL = + 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true' +export const LLM_MIRROR_DOWNLOAD_URL = + 'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true' +/** + * @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases + */ +export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2096' /** * Misc diff --git a/server/src/helpers/file-helper.ts b/server/src/helpers/file-helper.ts index 410f27a6..f631ccc8 100644 --- a/server/src/helpers/file-helper.ts +++ b/server/src/helpers/file-helper.ts @@ -15,7 +15,10 @@ export class FileHelper { * @param responseType The Axios request response type * @example downloadFile('https://example.com/file.zip', 'arraybuffer') // ArrayBuffer */ - public static downloadFile(fileURL: string, responseType: AxiosResponseType): Promise { + public static downloadFile( + fileURL: string, + responseType: AxiosResponseType + ): Promise { return axios.get(fileURL, { responseType, onDownloadProgress: ({ loaded, total, progress, estimated, rate }) => { @@ -45,12 +48,19 @@ export class FileHelper { * @param manifestPath The manifest file path * @param manifestName The manifest name * @param manifestVersion The manifest version + * @param extraData Extra data to add to the manifest */ - public static async createManifestFile(manifestPath: string, manifestName: string, manifestVersion: string): Promise { + public static async createManifestFile( + manifestPath: string, + manifestName: string, + manifestVersion: string, + extraData?: Record + ): Promise { const manifest = { name: manifestName, version: manifestVersion, - setupDate: Date.now() + setupDate: Date.now(), + ...extraData } await fs.promises.writeFile(manifestPath, JSON.stringify(manifest, null, 2))