feat(scripts): download and compile llama.cpp

2024-09-11 18:27:21 +03:00 · 2024-02-08 21:05:06 +08:00 · 2024-02-08 21:05:06 +08:00 · 1c0c8080bf
commit 1c0c8080bf
parent 46f6c09739
5 changed files with 112 additions and 26 deletions
--- a/package.json
+++ b/package.json
@ -95,6 +95,7 @@
    "getos": "3.2.1",
    "googleapis": "67.1.1",
    "ibm-watson": "6.1.1",
+    "node-llama-cpp": "2.8.6",
    "node-wav": "0.0.2",
    "os-name": "4.0.1",
    "pretty-bytes": "5.6.0",
--- a/scripts/setup/setup-binaries.js
+++ b/scripts/setup/setup-binaries.js
@ -107,7 +107,10 @@ const setupBinaries = async (key) => {

      const archiveWriter = fs.createWriteStream(archivePath)
      const latestReleaseAssetURL = `${GITHUB_URL}/releases/download/${key}_v${version}/${archiveName}`
-      const { data } = await FileHelper.downloadFile(latestReleaseAssetURL, 'stream')
+      const { data } = await FileHelper.downloadFile(
+        latestReleaseAssetURL,
+        'stream'
+      )

      data.pipe(archiveWriter)
      await stream.promises.finished(archiveWriter)
--- a/scripts/setup/setup-llm.js
+++ b/scripts/setup/setup-llm.js
@ -3,6 +3,8 @@ import path from 'node:path'
 import dns from 'node:dns'
 import stream from 'node:stream'

+import { command } from 'execa'
+
 import {
  LLM_NAME,
  LLM_NAME_WITH_VERSION,
@ -11,8 +13,10 @@ import {
  LLM_PATH,
  LLM_VERSION,
  LLM_HF_DOWNLOAD_URL,
-  LLM_MIRROR_DOWNLOAD_URL
+  LLM_MIRROR_DOWNLOAD_URL,
+  LLM_LLAMA_CPP_RELEASE_TAG
 } from '@/constants'
+import { OSTypes, CPUArchitectures } from '@/types'
 import { SystemHelper } from '@/helpers/system-helper'
 import { LogHelper } from '@/helpers/log-helper'
 import { FileHelper } from '@/helpers/file-helper'
@ -22,9 +26,13 @@ import { FileHelper } from '@/helpers/file-helper'
 * 1. Check minimum hardware requirements
 * 2. Check if Hugging Face is accessible
 * 3. Download the latest LLM from Hugging Face or mirror
- * 4. Create manifest file
+ * 4. Download and compile the latest llama.cpp release
+ * 5. Create manifest file
 */

+const LLM_MANIFEST_PATH = path.join(LLM_DIR_PATH, 'manifest.json')
+let manifest = null
+
 function checkMinimumHardwareRequirements() {
  return SystemHelper.getTotalRAM() >= LLM_MINIMUM_TOTAL_RAM
 }
@ -39,29 +47,32 @@ async function canAccessHuggingFace() {
  }
 }

-async function setupLLM() {
+async function downloadLLM() {
  try {
-    LogHelper.info('Setting up LLM...')
+    LogHelper.info('Downloading LLM...')

-    const llmManifestPath = path.join(LLM_DIR_PATH, 'manifest.json')
-    let manifest = null
-
-    if (fs.existsSync(llmManifestPath)) {
-      manifest = JSON.parse(await fs.promises.readFile(llmManifestPath, 'utf8'))
+    if (fs.existsSync(LLM_MANIFEST_PATH)) {
+      manifest = JSON.parse(
+        await fs.promises.readFile(LLM_MANIFEST_PATH, 'utf8')
+      )

      LogHelper.info(`Found ${LLM_NAME} ${manifest.version}`)
      LogHelper.info(`Latest version is ${LLM_VERSION}`)
    }

    if (!manifest || manifest.version !== LLM_VERSION) {
-      const downloadURL = await canAccessHuggingFace() ? LLM_HF_DOWNLOAD_URL : LLM_MIRROR_DOWNLOAD_URL
+      const downloadURL = (await canAccessHuggingFace())
+        ? LLM_HF_DOWNLOAD_URL
+        : LLM_MIRROR_DOWNLOAD_URL

      // Just in case the LLM file already exists, delete it first
      if (fs.existsSync(LLM_PATH)) {
        await fs.promises.unlink(LLM_PATH)
      }

-      LogHelper.info(`Downloading ${LLM_NAME_WITH_VERSION} from ${downloadURL}...`)
+      LogHelper.info(
+        `Downloading ${LLM_NAME_WITH_VERSION} from ${downloadURL}...`
+      )

      const llmWriter = fs.createWriteStream(LLM_PATH)
      const response = await FileHelper.downloadFile(downloadURL, 'stream')
@ -71,17 +82,69 @@ async function setupLLM() {

      LogHelper.success(`${LLM_NAME_WITH_VERSION} downloaded`)

-      await FileHelper.createManifestFile(llmManifestPath, LLM_NAME, LLM_VERSION)
-
-      LogHelper.success('Manifest file created')
      LogHelper.success(`${LLM_NAME_WITH_VERSION} ready`)
    } else {
-      LogHelper.info(`${LLM_NAME_WITH_VERSION} is already set up and use the latest version`)
+      LogHelper.success(
+        `${LLM_NAME_WITH_VERSION} is already set up and use the latest version`
+      )
+    }
+  } catch (e) {
+    LogHelper.error(`Failed to download LLM: ${e}`)
+  }
+}
+
+async function downloadAndCompileLlamaCPP() {
+  try {
+    LogHelper.info(
+      `Downloading and compiling "${LLM_LLAMA_CPP_RELEASE_TAG}" llama.cpp release...`
+    )
+
+    if (manifest.llamaCPPVersion) {
+      LogHelper.info(`Found llama.cpp ${manifest.llamaCPPVersion}`)
+      LogHelper.info(`Latest version is ${LLM_LLAMA_CPP_RELEASE_TAG}`)
    }

-    LogHelper.success('LLM is set up')
+    if (!manifest || manifest.llamaCPPVersion !== LLM_LLAMA_CPP_RELEASE_TAG) {
+      if (manifest.llamaCPPVersion !== LLM_LLAMA_CPP_RELEASE_TAG) {
+        LogHelper.info(`Updating llama.cpp to ${LLM_LLAMA_CPP_RELEASE_TAG}...`)
+      }
+
+      const { type: osType, cpuArchitecture } = SystemHelper.getInformation()
+      let llamaCPPDownloadCommand = `npx --no node-llama-cpp download --release "${LLM_LLAMA_CPP_RELEASE_TAG}"`
+
+      if (
+        osType === OSTypes.MacOS &&
+        cpuArchitecture === CPUArchitectures.X64
+      ) {
+        llamaCPPDownloadCommand = `${llamaCPPDownloadCommand} --no-metal`
+
+        LogHelper.info(`macOS Intel chipset detected, Metal support disabled`)
+      }
+
+      await command(llamaCPPDownloadCommand, {
+        shell: true,
+        stdio: 'inherit'
+      })
+
+      await FileHelper.createManifestFile(
+        LLM_MANIFEST_PATH,
+        LLM_NAME,
+        LLM_VERSION,
+        {
+          llamaCPPVersion: LLM_LLAMA_CPP_RELEASE_TAG
+        }
+      )
+
+      LogHelper.success('Manifest file created')
+      LogHelper.success(`llama.cpp downloaded and compiled`)
+      LogHelper.success('The LLM is ready to go')
+    } else {
+      LogHelper.success(
+        `llama.cpp is already set up and use the latest version (${LLM_LLAMA_CPP_RELEASE_TAG})`
+      )
+    }
  } catch (e) {
-    LogHelper.error(`Failed to set up LLM: ${e}`)
+    LogHelper.error(`Failed to set up llama.cpp: ${e}`)
  }
 }

@ -91,8 +154,11 @@ export default async () => {
  if (!canSetupLLM) {
    const totalRAM = SystemHelper.getTotalRAM()

-    LogHelper.warning(`LLM requires at least ${LLM_MINIMUM_TOTAL_RAM} of total RAM. Current total RAM is ${totalRAM} GB. No worries though, Leon can still run without LLM.`)
+    LogHelper.warning(
+      `LLM requires at least ${LLM_MINIMUM_TOTAL_RAM} of total RAM. Current total RAM is ${totalRAM} GB. No worries though, Leon can still run without LLM.`
+    )
  } else {
-    await setupLLM()
+    await downloadLLM()
+    await downloadAndCompileLlamaCPP()
  }
 }
--- a/server/src/constants.ts
+++ b/server/src/constants.ts
@ -85,7 +85,7 @@ export const LEON_VERSION = process.env['npm_package_version']

 /**
 * spaCy models
- * Find new spaCy models: https://github.com/explosion/spacy-models/releases
+ * @see Find new spaCy models: https://github.com/explosion/spacy-models/releases
 */
 export const EN_SPACY_MODEL_NAME = 'en_core_web_trf'
 export const EN_SPACY_MODEL_VERSION = '3.4.0'
@ -158,8 +158,14 @@ export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
 export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
 export const LLM_MINIMUM_TOTAL_RAM = 8
 export const LLM_MINIMUM_FREE_RAM = 8
-export const LLM_HF_DOWNLOAD_URL = 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
-export const LLM_MIRROR_DOWNLOAD_URL = 'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
+export const LLM_HF_DOWNLOAD_URL =
+  'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
+export const LLM_MIRROR_DOWNLOAD_URL =
+  'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
+/**
+ * @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases
+ */
+export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2096'

 /**
 * Misc
--- a/server/src/helpers/file-helper.ts
+++ b/server/src/helpers/file-helper.ts
@ -15,7 +15,10 @@ export class FileHelper {
   * @param responseType The Axios request response type
   * @example downloadFile('https://example.com/file.zip', 'arraybuffer') // ArrayBuffer
   */
-  public static downloadFile(fileURL: string, responseType: AxiosResponseType): Promise<AxiosResponse> {
+  public static downloadFile(
+    fileURL: string,
+    responseType: AxiosResponseType
+  ): Promise<AxiosResponse> {
    return axios.get(fileURL, {
      responseType,
      onDownloadProgress: ({ loaded, total, progress, estimated, rate }) => {
@ -45,12 +48,19 @@ export class FileHelper {
   * @param manifestPath The manifest file path
   * @param manifestName The manifest name
   * @param manifestVersion The manifest version
+   * @param extraData Extra data to add to the manifest
   */
-  public static async createManifestFile(manifestPath: string, manifestName: string, manifestVersion: string): Promise<void> {
+  public static async createManifestFile(
+    manifestPath: string,
+    manifestName: string,
+    manifestVersion: string,
+    extraData?: Record<string, unknown>
+  ): Promise<void> {
    const manifest = {
      name: manifestName,
      version: manifestVersion,
-      setupDate: Date.now()
+      setupDate: Date.now(),
+      ...extraData
    }

    await fs.promises.writeFile(manifestPath, JSON.stringify(manifest, null, 2))