diff --git a/package.json b/package.json
index d12bbdb3..fbe82fd5 100644
--- a/package.json
+++ b/package.json
@@ -95,6 +95,7 @@
     "getos": "3.2.1",
     "googleapis": "67.1.1",
     "ibm-watson": "6.1.1",
+    "node-llama-cpp": "2.8.6",
     "node-wav": "0.0.2",
     "os-name": "4.0.1",
     "pretty-bytes": "5.6.0",
diff --git a/scripts/setup/setup-binaries.js b/scripts/setup/setup-binaries.js
index 135d0131..4db986ca 100644
--- a/scripts/setup/setup-binaries.js
+++ b/scripts/setup/setup-binaries.js
@@ -107,7 +107,10 @@ const setupBinaries = async (key) => {
 
       const archiveWriter = fs.createWriteStream(archivePath)
       const latestReleaseAssetURL = `${GITHUB_URL}/releases/download/${key}_v${version}/${archiveName}`
-      const { data } = await FileHelper.downloadFile(latestReleaseAssetURL, 'stream')
+      const { data } = await FileHelper.downloadFile(
+        latestReleaseAssetURL,
+        'stream'
+      )
 
       data.pipe(archiveWriter)
       await stream.promises.finished(archiveWriter)
diff --git a/scripts/setup/setup-llm.js b/scripts/setup/setup-llm.js
index 68eaab79..675afe48 100644
--- a/scripts/setup/setup-llm.js
+++ b/scripts/setup/setup-llm.js
@@ -3,6 +3,8 @@ import path from 'node:path'
 import dns from 'node:dns'
 import stream from 'node:stream'
 
+import { command } from 'execa'
+
 import {
   LLM_NAME,
   LLM_NAME_WITH_VERSION,
@@ -11,8 +13,10 @@ import {
   LLM_PATH,
   LLM_VERSION,
   LLM_HF_DOWNLOAD_URL,
-  LLM_MIRROR_DOWNLOAD_URL
+  LLM_MIRROR_DOWNLOAD_URL,
+  LLM_LLAMA_CPP_RELEASE_TAG
 } from '@/constants'
+import { OSTypes, CPUArchitectures } from '@/types'
 import { SystemHelper } from '@/helpers/system-helper'
 import { LogHelper } from '@/helpers/log-helper'
 import { FileHelper } from '@/helpers/file-helper'
@@ -22,9 +26,13 @@ import { FileHelper } from '@/helpers/file-helper'
  * 1. Check minimum hardware requirements
  * 2. Check if Hugging Face is accessible
  * 3. Download the latest LLM from Hugging Face or mirror
- * 4. Create manifest file
+ * 4. Download and compile the latest llama.cpp release
+ * 5. Create manifest file
  */
 
+const LLM_MANIFEST_PATH = path.join(LLM_DIR_PATH, 'manifest.json')
+let manifest = null
+
 function checkMinimumHardwareRequirements() {
   return SystemHelper.getTotalRAM() >= LLM_MINIMUM_TOTAL_RAM
 }
@@ -39,29 +47,32 @@ async function canAccessHuggingFace() {
   }
 }
 
-async function setupLLM() {
+async function downloadLLM() {
   try {
-    LogHelper.info('Setting up LLM...')
+    LogHelper.info('Downloading LLM...')
 
-    const llmManifestPath = path.join(LLM_DIR_PATH, 'manifest.json')
-    let manifest = null
-
-    if (fs.existsSync(llmManifestPath)) {
-      manifest = JSON.parse(await fs.promises.readFile(llmManifestPath, 'utf8'))
+    if (fs.existsSync(LLM_MANIFEST_PATH)) {
+      manifest = JSON.parse(
+        await fs.promises.readFile(LLM_MANIFEST_PATH, 'utf8')
+      )
 
       LogHelper.info(`Found ${LLM_NAME} ${manifest.version}`)
       LogHelper.info(`Latest version is ${LLM_VERSION}`)
     }
 
     if (!manifest || manifest.version !== LLM_VERSION) {
-      const downloadURL = await canAccessHuggingFace() ? LLM_HF_DOWNLOAD_URL : LLM_MIRROR_DOWNLOAD_URL
+      const downloadURL = (await canAccessHuggingFace())
+        ? LLM_HF_DOWNLOAD_URL
+        : LLM_MIRROR_DOWNLOAD_URL
 
       // Just in case the LLM file already exists, delete it first
       if (fs.existsSync(LLM_PATH)) {
         await fs.promises.unlink(LLM_PATH)
       }
 
-      LogHelper.info(`Downloading ${LLM_NAME_WITH_VERSION} from ${downloadURL}...`)
+      LogHelper.info(
+        `Downloading ${LLM_NAME_WITH_VERSION} from ${downloadURL}...`
+      )
 
       const llmWriter = fs.createWriteStream(LLM_PATH)
       const response = await FileHelper.downloadFile(downloadURL, 'stream')
@@ -71,17 +82,69 @@ async function setupLLM() {
 
       LogHelper.success(`${LLM_NAME_WITH_VERSION} downloaded`)
 
-      await FileHelper.createManifestFile(llmManifestPath, LLM_NAME, LLM_VERSION)
-
-      LogHelper.success('Manifest file created')
       LogHelper.success(`${LLM_NAME_WITH_VERSION} ready`)
     } else {
-      LogHelper.info(`${LLM_NAME_WITH_VERSION} is already set up and use the latest version`)
+      LogHelper.success(
+        `${LLM_NAME_WITH_VERSION} is already set up and use the latest version`
+      )
+    }
+  } catch (e) {
+    LogHelper.error(`Failed to download LLM: ${e}`)
+  }
+}
+
+async function downloadAndCompileLlamaCPP() {
+  try {
+    LogHelper.info(
+      `Downloading and compiling "${LLM_LLAMA_CPP_RELEASE_TAG}" llama.cpp release...`
+    )
+
+    if (manifest.llamaCPPVersion) {
+      LogHelper.info(`Found llama.cpp ${manifest.llamaCPPVersion}`)
+      LogHelper.info(`Latest version is ${LLM_LLAMA_CPP_RELEASE_TAG}`)
     }
 
-    LogHelper.success('LLM is set up')
+    if (!manifest || manifest.llamaCPPVersion !== LLM_LLAMA_CPP_RELEASE_TAG) {
+      if (manifest.llamaCPPVersion !== LLM_LLAMA_CPP_RELEASE_TAG) {
+        LogHelper.info(`Updating llama.cpp to ${LLM_LLAMA_CPP_RELEASE_TAG}...`)
+      }
+
+      const { type: osType, cpuArchitecture } = SystemHelper.getInformation()
+      let llamaCPPDownloadCommand = `npx --no node-llama-cpp download --release "${LLM_LLAMA_CPP_RELEASE_TAG}"`
+
+      if (
+        osType === OSTypes.MacOS &&
+        cpuArchitecture === CPUArchitectures.X64
+      ) {
+        llamaCPPDownloadCommand = `${llamaCPPDownloadCommand} --no-metal`
+
+        LogHelper.info(`macOS Intel chipset detected, Metal support disabled`)
+      }
+
+      await command(llamaCPPDownloadCommand, {
+        shell: true,
+        stdio: 'inherit'
+      })
+
+      await FileHelper.createManifestFile(
+        LLM_MANIFEST_PATH,
+        LLM_NAME,
+        LLM_VERSION,
+        {
+          llamaCPPVersion: LLM_LLAMA_CPP_RELEASE_TAG
+        }
+      )
+
+      LogHelper.success('Manifest file created')
+      LogHelper.success(`llama.cpp downloaded and compiled`)
+      LogHelper.success('The LLM is ready to go')
+    } else {
+      LogHelper.success(
+        `llama.cpp is already set up and use the latest version (${LLM_LLAMA_CPP_RELEASE_TAG})`
+      )
+    }
   } catch (e) {
-    LogHelper.error(`Failed to set up LLM: ${e}`)
+    LogHelper.error(`Failed to set up llama.cpp: ${e}`)
   }
 }
 
@@ -91,8 +154,11 @@ export default async () => {
   if (!canSetupLLM) {
     const totalRAM = SystemHelper.getTotalRAM()
 
-    LogHelper.warning(`LLM requires at least ${LLM_MINIMUM_TOTAL_RAM} of total RAM. Current total RAM is ${totalRAM} GB. No worries though, Leon can still run without LLM.`)
+    LogHelper.warning(
+      `LLM requires at least ${LLM_MINIMUM_TOTAL_RAM} of total RAM. Current total RAM is ${totalRAM} GB. No worries though, Leon can still run without LLM.`
+    )
   } else {
-    await setupLLM()
+    await downloadLLM()
+    await downloadAndCompileLlamaCPP()
   }
 }
diff --git a/server/src/constants.ts b/server/src/constants.ts
index 408a552d..b0600a17 100644
--- a/server/src/constants.ts
+++ b/server/src/constants.ts
@@ -85,7 +85,7 @@ export const LEON_VERSION = process.env['npm_package_version']
 
 /**
  * spaCy models
- * Find new spaCy models: https://github.com/explosion/spacy-models/releases
+ * @see Find new spaCy models: https://github.com/explosion/spacy-models/releases
  */
 export const EN_SPACY_MODEL_NAME = 'en_core_web_trf'
 export const EN_SPACY_MODEL_VERSION = '3.4.0'
@@ -158,8 +158,14 @@ export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
 export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
 export const LLM_MINIMUM_TOTAL_RAM = 8
 export const LLM_MINIMUM_FREE_RAM = 8
-export const LLM_HF_DOWNLOAD_URL = 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
-export const LLM_MIRROR_DOWNLOAD_URL = 'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
+export const LLM_HF_DOWNLOAD_URL =
+  'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
+export const LLM_MIRROR_DOWNLOAD_URL =
+  'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
+/**
+ * @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases
+ */
+export const LLM_LLAMA_CPP_RELEASE_TAG = 'b2096'
 
 /**
  * Misc
diff --git a/server/src/helpers/file-helper.ts b/server/src/helpers/file-helper.ts
index 410f27a6..f631ccc8 100644
--- a/server/src/helpers/file-helper.ts
+++ b/server/src/helpers/file-helper.ts
@@ -15,7 +15,10 @@ export class FileHelper {
    * @param responseType The Axios request response type
    * @example downloadFile('https://example.com/file.zip', 'arraybuffer') // ArrayBuffer
    */
-  public static downloadFile(fileURL: string, responseType: AxiosResponseType): Promise<AxiosResponse> {
+  public static downloadFile(
+    fileURL: string,
+    responseType: AxiosResponseType
+  ): Promise<AxiosResponse> {
     return axios.get(fileURL, {
       responseType,
       onDownloadProgress: ({ loaded, total, progress, estimated, rate }) => {
@@ -45,12 +48,19 @@ export class FileHelper {
    * @param manifestPath The manifest file path
    * @param manifestName The manifest name
    * @param manifestVersion The manifest version
+   * @param extraData Extra data to add to the manifest
    */
-  public static async createManifestFile(manifestPath: string, manifestName: string, manifestVersion: string): Promise<void> {
+  public static async createManifestFile(
+    manifestPath: string,
+    manifestName: string,
+    manifestVersion: string,
+    extraData?: Record<string, unknown>
+  ): Promise<void> {
     const manifest = {
       name: manifestName,
       version: manifestVersion,
-      setupDate: Date.now()
+      setupDate: Date.now(),
+      ...extraData
     }
 
     await fs.promises.writeFile(manifestPath, JSON.stringify(manifest, null, 2))