feat: new ASR engine ready

2024-11-27 16:16:48 +03:00 · 2024-05-21 23:57:36 +08:00 · 2024-05-21 23:57:36 +08:00 · 596b7552dd
commit 596b7552dd
parent f051c1d2cd
8 changed files with 119 additions and 6 deletions
--- a/app/src/js/client.js
+++ b/app/src/js/client.js
@ -91,8 +91,8 @@ export default class Client {
      this.chatbot.createBubble('leon', data)
    })

-    this.socket.on('asr-speech', (data) => {
-      console.log('Wake word detected', data)
+    this.socket.on('asr-speech', (text) => {
+      console.log('Wake word detected', text)
    })

    this.socket.on('asr-end-of-owner-speech', () => {
--- a/package.json
+++ b/package.json
@ -63,7 +63,8 @@
    "pre-release:nodejs-bridge": "tsx scripts/release/pre-release-binaries.js nodejs-bridge",
    "pre-release:python-bridge": "tsx scripts/release/pre-release-binaries.js python-bridge",
    "pre-release:tcp-server": "tsx scripts/release/pre-release-binaries.js tcp-server",
-    "check": "tsx scripts/check.js"
+    "check": "tsx scripts/check.js",
+    "kill": "pkill -f node && pkill -f leon-tcp-server && pkill -f pt_main_thread"
  },
  "dependencies": {
    "@aws-sdk/client-polly": "3.18.0",
--- a/scripts/setup/setup-python-dev-env.js
+++ b/scripts/setup/setup-python-dev-env.js
@ -160,6 +160,32 @@ SPACY_MODELS.set('fr', {
        stdio: 'inherit'
      })
      LogHelper.success('PyTorch with CUDA support installed')
+
+      if (osType === OSTypes.Linux) {
+        LogHelper.info(
+          'Exporting LD_LIBRARY_PATH to map NVIDIA libs as it is needed by Whisper Faster. Cf. https://github.com/SYSTRAN/faster-whisper/issues/153...'
+        )
+
+        try {
+          await command(
+            // eslint-disable-next-line no-useless-escape
+            'export LD_LIBRARY_PATH=`pipenv run python -c "import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))"`',
+            {
+              shell: true,
+              stdio: 'inherit'
+            }
+          )
+          await command('echo $LD_LIBRARY_PATH', {
+            shell: true,
+            stdio: 'inherit'
+          })
+
+          LogHelper.success('LD_LIBRARY_PATH exported')
+        } catch (e) {
+          LogHelper.error(`Failed to export LD_LIBRARY_PATH: ${e}`)
+          process.exit(1)
+        }
+      }
    } catch (e) {
      LogHelper.error(`Failed to install PyTorch with CUDA support: ${e}`)
      process.exit(1)
--- a/server/src/constants.ts
+++ b/server/src/constants.ts
@ -72,6 +72,28 @@ export const NODEJS_BRIDGE_BIN_NAME = 'leon-nodejs-bridge.js'
 export const PYTHON_BRIDGE_BIN_NAME = 'leon-python-bridge'
 export const PYTHON_TCP_SERVER_BIN_NAME = 'leon-tcp-server'

+/**
+ * NVIDIA libraries paths for CUDA. Needed by Whisper Faster
+ * @see https://github.com/SYSTRAN/faster-whisper/issues/153
+ */
+export const PYTHON_TCP_SERVER_NVIDIA_CUBLAS_LIB_PATH = path.join(
+  PYTHON_TCP_SERVER_DIST_PATH,
+  BINARIES_FOLDER_NAME,
+  'lib',
+  'nvidia',
+  'cublas',
+  'lib'
+)
+export const PYTHON_TCP_SERVER_NVIDIA_CUDNN_LIB_PATH = path.join(
+  PYTHON_TCP_SERVER_DIST_PATH,
+  BINARIES_FOLDER_NAME,
+  'lib',
+  'nvidia',
+  'cudnn',
+  'lib'
+)
+export const PYTHON_TCP_SERVER_LD_LIBRARY_PATH = `${PYTHON_TCP_SERVER_NVIDIA_CUBLAS_LIB_PATH}:${PYTHON_TCP_SERVER_NVIDIA_CUDNN_LIB_PATH}`
+
 export const PYTHON_TCP_SERVER_BIN_PATH = path.join(
  PYTHON_TCP_SERVER_DIST_PATH,
  BINARIES_FOLDER_NAME,
--- a/server/src/core/index.ts
+++ b/server/src/core/index.ts
@ -2,7 +2,8 @@ import {
  HOST,
  PORT,
  PYTHON_TCP_SERVER_HOST,
-  PYTHON_TCP_SERVER_PORT
+  PYTHON_TCP_SERVER_PORT,
+  PYTHON_TCP_SERVER_LD_LIBRARY_PATH
 } from '@/constants'
 import TCPClient from '@/core/tcp-client'
 import HTTPServer from '@/core/http-server/http-server'
@ -18,6 +19,17 @@ import LLMManager from '@/core/llm-manager/llm-manager'
 import LLMProvider from '@/core/llm-manager/llm-provider'
 import Persona from '@/core/llm-manager/persona'
 import { ConversationLogger } from '@/conversation-logger'
+import { SystemHelper } from '@/helpers/system-helper'
+import { LogHelper } from '@/helpers/log-helper'
+
+/**
+ * Set environment variables
+ */
+
+if (SystemHelper.isLinux()) {
+  process.env['LD_LIBRARY_PATH'] = PYTHON_TCP_SERVER_LD_LIBRARY_PATH
+  LogHelper.info(`LD_LIBRARY_PATH set to: ${process.env['LD_LIBRARY_PATH']}`)
+}

 /**
 * Register core nodes
--- a/server/src/helpers/system-helper.ts
+++ b/server/src/helpers/system-helper.ts
@ -155,4 +155,34 @@ export class SystemHelper {

    return str.replace(new RegExp(username, 'g'), '{username}')
  }
+
+  /**
+   * Check if the current OS is Windows
+   * @example isWindows() // false
+   */
+  public static isWindows(): boolean {
+    const { type } = this.getInformation()
+
+    return type === OSTypes.Windows
+  }
+
+  /**
+   * Check if the current OS is macOS
+   * @example isMacOS() // false
+   */
+  public static isMacOS(): boolean {
+    const { type } = this.getInformation()
+
+    return type === OSTypes.MacOS
+  }
+
+  /**
+   * Check if the current OS is Linux
+   * @example isLinux() // true
+   */
+  public static isLinux(): boolean {
+    const { type } = this.getInformation()
+
+    return type === OSTypes.Linux
+  }
 }
--- a/server/src/index.ts
+++ b/server/src/index.ts
@ -47,7 +47,8 @@ import { LogHelper } from '@/helpers/log-helper'
      '<00:00',
      '00:00<',
      'CUDNN_STATUS_NOT_SUPPORTED',
-      'cls.seq_relationship.weight'
+      'cls.seq_relationship.weight',
+      'ALSA lib'
    ]

    if (skipError.some((error) => formattedData.includes(error))) {
--- a/tcp_server/src/lib/asr.py
+++ b/tcp_server/src/lib/asr.py
@ -25,6 +25,13 @@ class ASR:

        self.log(f'Device: {device}')

+        compute_type = "float16"
+
+        if device == 'cpu':
+            compute_type = "int8_float32"
+
+        self.compute_type = compute_type
+
        self.transcription_callback = transcription_callback
        self.wake_word_callback = wake_word_callback
        self.end_of_owner_speech_callback = end_of_owner_speech_callback
@ -51,8 +58,22 @@ class ASR:
        self.buffer_size = 64  # Size of the circular buffer

        self.audio = pyaudio.PyAudio()
-        self.model = WhisperModel(self.model_size, device=self.device, compute_type="float16")
        self.stream = None
+        self.model = None
+
+        if self.device == 'cpu':
+            self.model = WhisperModel(
+                self.model_size,
+                device=self.device,
+                compute_type=self.compute_type,
+                cpu_threads=4
+            )
+        else:
+            self.model = WhisperModel(
+                self.model_size,
+                device=self.device,
+                compute_type=self.compute_type
+            )

        self.log('Model loaded')