diff --git a/app/src/js/client.js b/app/src/js/client.js index b2ee75c4..f68628a0 100644 --- a/app/src/js/client.js +++ b/app/src/js/client.js @@ -91,8 +91,8 @@ export default class Client { this.chatbot.createBubble('leon', data) }) - this.socket.on('asr-speech', (data) => { - console.log('Wake word detected', data) + this.socket.on('asr-speech', (text) => { + console.log('Wake word detected', text) }) this.socket.on('asr-end-of-owner-speech', () => { diff --git a/package.json b/package.json index eceabb24..8a0f1e1d 100644 --- a/package.json +++ b/package.json @@ -63,7 +63,8 @@ "pre-release:nodejs-bridge": "tsx scripts/release/pre-release-binaries.js nodejs-bridge", "pre-release:python-bridge": "tsx scripts/release/pre-release-binaries.js python-bridge", "pre-release:tcp-server": "tsx scripts/release/pre-release-binaries.js tcp-server", - "check": "tsx scripts/check.js" + "check": "tsx scripts/check.js", + "kill": "pkill -f node && pkill -f leon-tcp-server && pkill -f pt_main_thread" }, "dependencies": { "@aws-sdk/client-polly": "3.18.0", diff --git a/scripts/setup/setup-python-dev-env.js b/scripts/setup/setup-python-dev-env.js index 006d7548..d50d9d1c 100644 --- a/scripts/setup/setup-python-dev-env.js +++ b/scripts/setup/setup-python-dev-env.js @@ -160,6 +160,32 @@ SPACY_MODELS.set('fr', { stdio: 'inherit' }) LogHelper.success('PyTorch with CUDA support installed') + + if (osType === OSTypes.Linux) { + LogHelper.info( + 'Exporting LD_LIBRARY_PATH to map NVIDIA libs as it is needed by Whisper Faster. Cf. https://github.com/SYSTRAN/faster-whisper/issues/153...' + ) + + try { + await command( + // eslint-disable-next-line no-useless-escape + 'export LD_LIBRARY_PATH=`pipenv run python -c "import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))"`', + { + shell: true, + stdio: 'inherit' + } + ) + await command('echo $LD_LIBRARY_PATH', { + shell: true, + stdio: 'inherit' + }) + + LogHelper.success('LD_LIBRARY_PATH exported') + } catch (e) { + LogHelper.error(`Failed to export LD_LIBRARY_PATH: ${e}`) + process.exit(1) + } + } } catch (e) { LogHelper.error(`Failed to install PyTorch with CUDA support: ${e}`) process.exit(1) diff --git a/server/src/constants.ts b/server/src/constants.ts index 33ecaec3..78e6a912 100644 --- a/server/src/constants.ts +++ b/server/src/constants.ts @@ -72,6 +72,28 @@ export const NODEJS_BRIDGE_BIN_NAME = 'leon-nodejs-bridge.js' export const PYTHON_BRIDGE_BIN_NAME = 'leon-python-bridge' export const PYTHON_TCP_SERVER_BIN_NAME = 'leon-tcp-server' +/** + * NVIDIA libraries paths for CUDA. Needed by Whisper Faster + * @see https://github.com/SYSTRAN/faster-whisper/issues/153 + */ +export const PYTHON_TCP_SERVER_NVIDIA_CUBLAS_LIB_PATH = path.join( + PYTHON_TCP_SERVER_DIST_PATH, + BINARIES_FOLDER_NAME, + 'lib', + 'nvidia', + 'cublas', + 'lib' +) +export const PYTHON_TCP_SERVER_NVIDIA_CUDNN_LIB_PATH = path.join( + PYTHON_TCP_SERVER_DIST_PATH, + BINARIES_FOLDER_NAME, + 'lib', + 'nvidia', + 'cudnn', + 'lib' +) +export const PYTHON_TCP_SERVER_LD_LIBRARY_PATH = `${PYTHON_TCP_SERVER_NVIDIA_CUBLAS_LIB_PATH}:${PYTHON_TCP_SERVER_NVIDIA_CUDNN_LIB_PATH}` + export const PYTHON_TCP_SERVER_BIN_PATH = path.join( PYTHON_TCP_SERVER_DIST_PATH, BINARIES_FOLDER_NAME, diff --git a/server/src/core/index.ts b/server/src/core/index.ts index 6b17afbf..19faaf8d 100644 --- a/server/src/core/index.ts +++ b/server/src/core/index.ts @@ -2,7 +2,8 @@ import { HOST, PORT, PYTHON_TCP_SERVER_HOST, - PYTHON_TCP_SERVER_PORT + PYTHON_TCP_SERVER_PORT, + PYTHON_TCP_SERVER_LD_LIBRARY_PATH } from '@/constants' import TCPClient from '@/core/tcp-client' import HTTPServer from '@/core/http-server/http-server' @@ -18,6 +19,17 @@ import LLMManager from '@/core/llm-manager/llm-manager' import LLMProvider from '@/core/llm-manager/llm-provider' import Persona from '@/core/llm-manager/persona' import { ConversationLogger } from '@/conversation-logger' +import { SystemHelper } from '@/helpers/system-helper' +import { LogHelper } from '@/helpers/log-helper' + +/** + * Set environment variables + */ + +if (SystemHelper.isLinux()) { + process.env['LD_LIBRARY_PATH'] = PYTHON_TCP_SERVER_LD_LIBRARY_PATH + LogHelper.info(`LD_LIBRARY_PATH set to: ${process.env['LD_LIBRARY_PATH']}`) +} /** * Register core nodes diff --git a/server/src/helpers/system-helper.ts b/server/src/helpers/system-helper.ts index e7df50f8..372b6cce 100644 --- a/server/src/helpers/system-helper.ts +++ b/server/src/helpers/system-helper.ts @@ -155,4 +155,34 @@ export class SystemHelper { return str.replace(new RegExp(username, 'g'), '{username}') } + + /** + * Check if the current OS is Windows + * @example isWindows() // false + */ + public static isWindows(): boolean { + const { type } = this.getInformation() + + return type === OSTypes.Windows + } + + /** + * Check if the current OS is macOS + * @example isMacOS() // false + */ + public static isMacOS(): boolean { + const { type } = this.getInformation() + + return type === OSTypes.MacOS + } + + /** + * Check if the current OS is Linux + * @example isLinux() // true + */ + public static isLinux(): boolean { + const { type } = this.getInformation() + + return type === OSTypes.Linux + } } diff --git a/server/src/index.ts b/server/src/index.ts index eb3b2a99..cedaf58e 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -47,7 +47,8 @@ import { LogHelper } from '@/helpers/log-helper' '<00:00', '00:00<', 'CUDNN_STATUS_NOT_SUPPORTED', - 'cls.seq_relationship.weight' + 'cls.seq_relationship.weight', + 'ALSA lib' ] if (skipError.some((error) => formattedData.includes(error))) { diff --git a/tcp_server/src/lib/asr.py b/tcp_server/src/lib/asr.py index 9d395827..d9f40fd3 100644 --- a/tcp_server/src/lib/asr.py +++ b/tcp_server/src/lib/asr.py @@ -25,6 +25,13 @@ class ASR: self.log(f'Device: {device}') + compute_type = "float16" + + if device == 'cpu': + compute_type = "int8_float32" + + self.compute_type = compute_type + self.transcription_callback = transcription_callback self.wake_word_callback = wake_word_callback self.end_of_owner_speech_callback = end_of_owner_speech_callback @@ -51,8 +58,22 @@ class ASR: self.buffer_size = 64 # Size of the circular buffer self.audio = pyaudio.PyAudio() - self.model = WhisperModel(self.model_size, device=self.device, compute_type="float16") self.stream = None + self.model = None + + if self.device == 'cpu': + self.model = WhisperModel( + self.model_size, + device=self.device, + compute_type=self.compute_type, + cpu_threads=4 + ) + else: + self.model = WhisperModel( + self.model_size, + device=self.device, + compute_type=self.compute_type + ) self.log('Model loaded')