refactor(server): HTTP server; brain entries; STT parsers; TTS synthesizers (WIP)

2024-11-20 19:52:55 +03:00 · 2023-02-03 00:14:23 +08:00 · 2023-02-03 00:14:23 +08:00 · c8bc9ecbb0
commit c8bc9ecbb0
parent 19ce6eb9c2
37 changed files with 820 additions and 532 deletions
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@ -1,4 +1,5 @@
 #!/bin/sh
 . "$(dirname "$0")/_/husky.sh"

-npx lint-staged
+# TODO: add lint-staged back in when JavaScript to TypeScript is complete
+# npx lint-staged
--- a/app/src/js/client.js
+++ b/app/src/js/client.js
@ -100,7 +100,7 @@ export default class Client {
                  }
                }
              }
-            }, 1000)
+            }, 1_000)
          }, data.duration + 500)
        }
      })
--- a/app/src/js/main.js
+++ b/app/src/js/main.js
@ -11,7 +11,7 @@ const config = {
  server_host: import.meta.env.VITE_LEON_HOST,
  server_port: import.meta.env.VITE_LEON_PORT,
  min_decibels: -40, // Noise detection sensitivity
-  max_blank_time: 1000 // Maximum time to consider a blank (ms)
+  max_blank_time: 1_000 // Maximum time to consider a blank (ms)
 }
 const serverUrl =
  import.meta.env.VITE_LEON_NODE_ENV === 'production'
@ -58,7 +58,7 @@ document.addEventListener('DOMContentLoaded', async () => {
              rec.enabled = false

              // Ensure there are some data
-              if (blob.size >= 1000) {
+              if (blob.size >= 1_000) {
                client.socket.emit('recognize', blob)
              }
            })
--- a/package.json
+++ b/package.json
@ -106,7 +106,9 @@
    "@swc/core": "^1.3.14",
    "@tsconfig/node16-strictest": "^1.0.3",
    "@types/cli-spinner": "0.2.1",
+    "@types/fluent-ffmpeg": "^2.1.20",
    "@types/node": "^18.7.13",
+    "@types/node-wav": "^0.0.0",
    "@typescript-eslint/eslint-plugin": "^5.36.1",
    "@typescript-eslint/parser": "^5.36.1",
    "cli-spinner": "^0.2.10",
--- a/server/src/constants.ts
+++ b/server/src/constants.ts
@ -99,5 +99,11 @@ export const TCP_SERVER_PORT = Number(process.env['LEON_PY_TCP_SERVER_PORT'])
 /**
 * Paths
 */
+export const BIN_PATH = path.join('bin')
 export const GLOBAL_DATA_PATH = path.join('core', 'data')
 export const VOICE_CONFIG_PATH = path.join('core', 'config', 'voice')
+export const SERVER_PATH = path.join(
+  'server',
+  IS_PRODUCTION_ENV ? 'dist' : 'src'
+)
+export const TMP_PATH = path.join(SERVER_PATH, 'tmp')
--- a/server/src/core/asr.js
+++ b/server/src/core/asr.js
@ -1,77 +0,0 @@
-import fs from 'node:fs'
-
-import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
-import Ffmpeg from 'fluent-ffmpeg'
-
-import { LogHelper } from '@/helpers/log-helper'
-
-const audios = {
-  webm: `${__dirname}/../tmp/speech.webm`,
-  wav: `${__dirname}/../tmp/speech.wav`
-}
-
-class Asr {
-  constructor() {
-    this.blob = {}
-
-    LogHelper.title('ASR')
-    LogHelper.success('New instance')
-  }
-
-  static get audios() {
-    return audios
-  }
-
-  /**
-   * Encode audio blob to WAVE file
-   * and forward the WAVE file to the STT parser
-   */
-  run(blob, stt) {
-    return new Promise((resolve, reject) => {
-      LogHelper.title('ASR')
-
-      this.blob = blob
-
-      fs.writeFile(audios.webm, Buffer.from(this.blob), 'binary', (err) => {
-        if (err) {
-          reject({ type: 'error', obj: err })
-          return
-        }
-
-        const ffmpeg = new Ffmpeg()
-        ffmpeg.setFfmpegPath(ffmpegPath)
-
-        /**
-         * Encode WebM file to WAVE file
-         * ffmpeg -i speech.webm -acodec pcm_s16le -ar 16000 -ac 1 speech.wav
-         */
-        ffmpeg
-          .addInput(audios.webm)
-          .on('start', () => {
-            LogHelper.info('Encoding WebM file to WAVE file...')
-          })
-          .on('end', () => {
-            LogHelper.success('Encoding done')
-
-            if (Object.keys(stt).length === 0) {
-              reject({
-                type: 'warning',
-                obj: new Error('The speech recognition is not ready yet')
-              })
-            } else {
-              stt.parse(audios.wav)
-              resolve()
-            }
-          })
-          .on('error', (err) => {
-            reject({ type: 'error', obj: new Error(`Encoding error ${err}`) })
-          })
-          .outputOptions(['-acodec pcm_s16le', '-ar 16000', '-ac 1'])
-          .output(audios.wav)
-          .run()
-      })
-    })
-  }
-}
-
-export default Asr
--- a/server/src/core/asr/asr.ts
+++ b/server/src/core/asr/asr.ts
@ -0,0 +1,77 @@
+import path from 'node:path'
+import fs from 'node:fs'
+
+import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
+import ffmpeg from 'fluent-ffmpeg'
+
+import { TMP_PATH } from '@/constants'
+import { STT } from '@/core'
+import { LogHelper } from '@/helpers/log-helper'
+
+export default class ASR {
+  private static instance: ASR
+
+  public audioPaths = {
+    webm: path.join(TMP_PATH, 'speech.webm'),
+    wav: path.join(TMP_PATH, 'speech.wav')
+  }
+
+  constructor() {
+    if (!ASR.instance) {
+      LogHelper.title('ASR')
+      LogHelper.success('New instance')
+
+      ASR.instance = this
+    }
+  }
+
+  /**
+   * Encode audio blob to WAVE file
+   * and forward the WAVE file to the STT parser
+   */
+  public encode(blob: Buffer): Promise<void> {
+    return new Promise((resolve, reject) => {
+      LogHelper.title('ASR')
+
+      fs.writeFile(
+        this.audioPaths.webm,
+        Buffer.from(blob),
+        'binary',
+        async (err) => {
+          if (err) {
+            reject(new Error(`${err}`))
+            return
+          }
+
+          ffmpeg.setFfmpegPath(ffmpegPath)
+
+          /**
+           * Encode WebM file to WAVE file
+           * ffmpeg -i speech.webm -acodec pcm_s16le -ar 16000 -ac 1 speech.wav
+           */
+          ffmpeg()
+            .addInput(this.audioPaths.webm)
+            .on('start', () => {
+              LogHelper.info('Encoding WebM file to WAVE file...')
+            })
+            .on('end', () => {
+              LogHelper.success('Encoding done')
+
+              if (STT.isParserReady) {
+                reject(new Error('The speech recognition is not ready yet'))
+              } else {
+                STT.transcribe(this.audioPaths.wav)
+                resolve()
+              }
+            })
+            .on('error', (err) => {
+              reject(new Error(`Encoding error ${err}`))
+            })
+            .outputOptions(['-acodec pcm_s16le', '-ar 16000', '-ac 1'])
+            .output(this.audioPaths.wav)
+            .run()
+        }
+      )
+    })
+  }
+}
--- a/server/src/core/asr/types.ts
+++ b/server/src/core/asr/types.ts
@ -0,0 +1 @@
+export type ASRAudioFormat = 'wav' | 'webm'
--- a/server/src/core/brain.js
+++ b/server/src/core/brain.js
@ -266,7 +266,6 @@ class Brain {
                  output += data
                }
              } else {
-                /* istanbul ignore next */
                reject({
                  type: 'warning',
                  obj: new Error(
@ -280,7 +279,6 @@ class Brain {
              LogHelper.title('Brain')
              LogHelper.debug(`process.stdout: ${String(data)}`)

-              /* istanbul ignore next */
              reject({
                type: 'error',
                obj: new Error(
@ -338,7 +336,6 @@ class Brain {
                }
                speeches.push(speech)

-                /* istanbul ignore next */
                // Synchronize the downloaded content if enabled
                if (
                  this.finalOutput.type === 'end' &&
--- a/server/src/core/brain/brain.ts
+++ b/server/src/core/brain/brain.ts
@ -1,16 +1,14 @@
 // TODO: remove ignore
-// eslint-disable-next-line @typescript-eslint/ban-ts-comment
-// @ts-nocheck

-import type { Socket } from 'node:net'
 import fs from 'node:fs'
 import path from 'node:path'
 import { spawn, ChildProcessWithoutNullStreams } from 'node:child_process'

+import { langs } from '@@/core/langs.json'
 import type { ShortLanguageCode } from '@/types'
 import type { GlobalAnswers } from '@/schemas/global-data-schemas'
-import { langs } from '@@/core/langs.json'
-import { HAS_TTS, PYTHON_BRIDGE_BIN_PATH } from '@/constants'
+import { HAS_TTS, PYTHON_BRIDGE_BIN_PATH, TMP_PATH } from '@/constants'
+import { SOCKET_SERVER } from '@/core'
 import { LangHelper } from '@/helpers/lang-helper'
 import { LogHelper } from '@/helpers/log-helper'
 import { SkillDomainHelper } from '@/helpers/skill-domain-helper'
@ -19,7 +17,8 @@ import Synchronizer from '@/core/synchronizer'

 // TODO: split class

-class Brain {
+export default class Brain {
+  private static instance: Brain
  private _lang: ShortLanguageCode = 'en'
  private broca: GlobalAnswers = JSON.parse(
    fs.readFileSync(
@ -33,15 +32,16 @@ class Brain {
  // TODO: type
  private finalOutput: unknown

-  // TODO: not readonly?
-  public readonly socket: Socket
-
  constructor() {
    this._stt = {}
    this._tts = {}

-    LogHelper.title('Brain')
-    LogHelper.success('New instance')
+    if (!Brain.instance) {
+      LogHelper.title('Brain')
+      LogHelper.success('New instance')
+
+      Brain.instance = this
+    }
  }

  // TODO: handle return type
@ -113,7 +113,7 @@ class Brain {
        this._tts.add(speech, end)
      }

-      this._socket.emit('answer', rawSpeech)
+      SOCKET_SERVER.socket.emit('answer', rawSpeech)
    }
  }

@ -157,10 +157,7 @@ class Brain {

    return new Promise(async (resolve, reject) => {
      const utteranceId = `${Date.now()}-${StringHelper.random(4)}`
-      const intentObjectPath = path.join(
-        __dirname,
-        `../tmp/${utteranceId}.json`
-      )
+      const intentObjectPath = path.join(TMP_PATH, `${utteranceId}.json`)
      const speeches = []

      // Ask to repeat if Leon is not sure about the request
@ -173,7 +170,7 @@ class Brain {

          speeches.push(speech)
          this.talk(speech, true)
-          this._socket.emit('is-typing', false)
+          SOCKET_SERVER.socket.emit('is-typing', false)
        }

        const executionTimeEnd = Date.now()
@ -276,7 +273,6 @@ class Brain {
                  output += data
                }
              } else {
-                /* istanbul ignore next */
                reject({
                  type: 'warning',
                  obj: new Error(
@ -290,7 +286,6 @@ class Brain {
              LogHelper.title('Brain')
              LogHelper.debug(`process.stdout: ${String(data)}`)

-              /* istanbul ignore next */
              reject({
                type: 'error',
                obj: new Error(
@ -310,7 +305,7 @@ class Brain {
            })}!`
            if (!opts.mute) {
              this.talk(speech)
-              this._socket.emit('is-typing', false)
+              SOCKET_SERVER.socket.emit('is-typing', false)
            }
            speeches.push(speech)

@ -348,7 +343,6 @@ class Brain {
                }
                speeches.push(speech)

-                /* istanbul ignore next */
                // Synchronize the downloaded content if enabled
                if (
                  this.finalOutput.type === 'end' &&
@ -376,7 +370,7 @@ class Brain {
            Brain.deleteIntentObjFile(intentObjectPath)

            if (!opts.mute) {
-              this._socket.emit('is-typing', false)
+              SOCKET_SERVER.socket.emit('is-typing', false)
            }

            const executionTimeEnd = Date.now()
@ -387,10 +381,10 @@ class Brain {
              nextAction?.suggestions &&
              this.finalOutput.core?.showNextActionSuggestions
            ) {
-              this._socket.emit('suggest', nextAction.suggestions)
+              SOCKET_SERVER.socket.emit('suggest', nextAction.suggestions)
            }
            if (action?.suggestions && this.finalOutput.core?.showSuggestions) {
-              this._socket.emit('suggest', action.suggestions)
+              SOCKET_SERVER.socket.emit('suggest', action.suggestions)
            }

            resolve({
@ -493,12 +487,12 @@ class Brain {

          if (!opts.mute) {
            this.talk(answer, true)
-            this._socket.emit('is-typing', false)
+            SOCKET_SERVER.socket.emit('is-typing', false)
          }

          // Send suggestions to the client
          if (nextAction?.suggestions) {
-            this._socket.emit('suggest', nextAction.suggestions)
+            SOCKET_SERVER.socket.emit('suggest', nextAction.suggestions)
          }

          resolve({
@ -516,5 +510,3 @@ class Brain {
    })
  }
 }
-
-export default Brain
--- a/server/src/core/http-server/http-server.ts
+++ b/server/src/core/http-server/http-server.ts
@ -106,10 +106,7 @@ export default class HTTPServer {
    try {
      await this.listen()
    } catch (e) {
-      // TODO: remove ts-ignore
-      // eslint-disable-next-line @typescript-eslint/ban-ts-comment
-      // @ts-ignore
-      LogHelper.error(e.message)
+      LogHelper.error((e as Error).message)
    }
  }

@ -123,7 +120,7 @@ export default class HTTPServer {
        })
      : new SocketIOServer(this.httpServer)

-    // TODO: instanciate new socket server
+    // TODO: instantiate new socket server
    io.on('connection', server.handleOnConnection)

    this.fastify.listen(
--- a/server/src/core/http-server/old-server.js
+++ b/server/src/core/http-server/old-server.js
@ -21,7 +21,7 @@ import {
 import { TCP_CLIENT } from '@/core'
 import Nlu from '@/core/nlu'
 import Brain from '@/core/brain'
-import Asr from '@/core/asr'
+import Asr from '@/core/asr/asr'
 import Stt from '@/stt/stt'
 import Tts from '@/tts/tts'
 import corsMidd from '@/core/http-server/plugins/cors'
--- a/server/src/core/index.ts
+++ b/server/src/core/index.ts
@ -2,6 +2,9 @@ import { HOST, PORT, TCP_SERVER_HOST, TCP_SERVER_PORT } from '@/constants'
 import TCPClient from '@/core/tcp-client'
 import HTTPServer from '@/core/http-server/http-server'
 import SocketServer from '@/core/socket-server'
+import SpeechToText from '@/core/stt/stt'
+import TextToSpeech from '@/core/tts/tts'
+import AutomaticSpeechRecognition from '@/core/asr/asr'

 /**
 * Register core singletons
@ -15,3 +18,9 @@ export const TCP_CLIENT = new TCPClient(
 export const HTTP_SERVER = new HTTPServer(String(HOST), PORT)

 export const SOCKET_SERVER = new SocketServer()
+
+export const STT = new SpeechToText()
+
+export const TTS = new TextToSpeech()
+
+export const ASR = new AutomaticSpeechRecognition()
--- a/server/src/core/nlu.js
+++ b/server/src/core/nlu.js
@ -233,7 +233,6 @@ class Nlu {
   * Collaborative logger request
   */
  sendLog(utterance) {
-    /* istanbul ignore next */
    if (HAS_LOGGER && !IS_TESTING_ENV) {
      axios.request({
        method: 'POST',
@ -402,7 +401,7 @@ class Nlu {
      }

      return processedData
-    } catch (e) /* istanbul ignore next */ {
+    } catch (e) {
      return null
    }
  }
@ -595,7 +594,7 @@ class Nlu {
          configDataFilePath,
          this.nluResultObj
        )
-      } catch (e) /* istanbul ignore next */ {
+      } catch (e) {
        if (LogHelper[e.type]) {
          LogHelper[e.type](e.obj.message)
        }
@ -672,7 +671,7 @@ class Nlu {
          ...processedData,
          nluProcessingTime: processingTime - processedData?.executionTime // In ms, NLU processing time only
        })
-      } catch (e) /* istanbul ignore next */ {
+      } catch (e) {
        LogHelper[e.type](e.obj.message)

        if (!opts.mute) {
--- a/server/src/core/socket-server.ts
+++ b/server/src/core/socket-server.ts
@ -2,6 +2,8 @@
 // eslint-disable-next-line @typescript-eslint/ban-ts-comment
 // @ts-nocheck

+import type { Socket } from 'node:net'
+
 import { Server as SocketIOServer } from 'socket.io'

 import {
@ -11,15 +13,17 @@ import {
  STT_PROVIDER,
  TTS_PROVIDER
 } from '@/constants'
-import { HTTP_SERVER, TCP_CLIENT } from '@/core'
+import { HTTP_SERVER, TCP_CLIENT, ASR, STT, TTS } from '@/core'
 import { LogHelper } from '@/helpers/log-helper'
-import Asr from '@/core/asr'
+import Asr from '@/core/asr/asr'
 // import Stt from '@/stt/stt'
 // import Tts from '@/tts/tts'

 export default class SocketServer {
  private static instance: SocketServer

+  public socket: Socket
+
  constructor() {
    if (!SocketServer.instance) {
      LogHelper.title('Socket Server')
@ -37,38 +41,38 @@ export default class SocketServer {
        })
      : new SocketIOServer(httpServer)

-    // TODO: instantiate new socket server
    io.on('connection', (socket) => {
      LogHelper.title('Client')
      LogHelper.success('Connected')

+      this.socket = socket
+
      // Init
-      socket.on('init', async (data) => {
+      this.socket.on('init', async (data) => {
        LogHelper.info(`Type: ${data}`)
-        LogHelper.info(`Socket id: ${socket.id}`)
+        LogHelper.info(`Socket id: ${this.socket.id}`)

        // TODO
        // const provider = await addProvider(socket.id)

        // Check whether the TCP client is connected to the TCP server
        if (TCP_CLIENT.isConnected) {
-          socket.emit('ready')
+          this.socket.emit('ready')
        } else {
          TCP_CLIENT.ee.on('connected', () => {
-            socket.emit('ready')
+            this.socket.emit('ready')
          })
        }

        if (data === 'hotword-node') {
          // Hotword triggered
-          socket.on('hotword-detected', (data) => {
+          this.socket.on('hotword-detected', (data) => {
            LogHelper.title('Socket')
            LogHelper.success(`Hotword ${data.hotword} detected`)

-            socket.broadcast.emit('enable-record')
+            this.socket.broadcast.emit('enable-record')
          })
        } else {
-          const asr = new Asr()
          let sttState = 'disabled'
          let ttsState = 'disabled'

@ -81,6 +85,7 @@ export default class SocketServer {
            // TODO
            // provider.brain.stt = new Stt(socket, STT_PROVIDER)
            // provider.brain.stt.init(() => null)
+            await STT.init()
          }
          if (HAS_TTS) {
            ttsState = 'enabled'
@ -88,6 +93,7 @@ export default class SocketServer {
            // TODO
            // provider.brain.tts = new Tts(socket, TTS_PROVIDER)
            // provider.brain.tts.init('en', () => null)
+            await TTS.init()
          }

          LogHelper.title('Initialization')
@ -95,11 +101,11 @@ export default class SocketServer {
          LogHelper.success(`TTS ${ttsState}`)

          // Listen for new utterance
-          socket.on('utterance', async (data) => {
+          this.socket.on('utterance', async (data) => {
            LogHelper.title('Socket')
            LogHelper.info(`${data.client} emitted: ${data.value}`)

-            socket.emit('is-typing', true)
+            this.socket.emit('is-typing', true)

            // TODO
            // const utterance = data.value
@ -115,10 +121,9 @@ export default class SocketServer {
          })

          // Handle automatic speech recognition
-          socket.on('recognize', async (data) => {
+          this.socket.on('recognize', async (data) => {
            try {
-              // TODO
-              await asr.run(data, provider.brain.stt)
+              await ASR.encode(data)
            } catch (e) {
              LogHelper[e.type](e.obj.message)
            }
@ -126,9 +131,9 @@ export default class SocketServer {
        }
      })

-      socket.once('disconnect', () => {
+      this.socket.once('disconnect', () => {
        // TODO
-        // deleteProvider(socket.id)
+        // deleteProvider(this.socket.id)
      })
    })
  }
--- a/server/src/core/stt/parsers/coqui-stt-parser.ts
+++ b/server/src/core/stt/parsers/coqui-stt-parser.ts
@ -0,0 +1,73 @@
+import path from 'node:path'
+import fs from 'node:fs'
+
+import wav from 'node-wav'
+import { Model } from 'stt'
+
+import type { STTParserFacade } from '@/core/stt/types'
+import { BIN_PATH } from '@/constants'
+import { LogHelper } from '@/helpers/log-helper'
+
+export class CoquiSTTParser implements STTParserFacade {
+  private readonly name = 'Coqui STT Parser'
+  private readonly model: Model | undefined = undefined
+  private readonly desiredSampleRate: number = 16_000
+
+  constructor() {
+    LogHelper.title(this.name)
+    LogHelper.success('New instance')
+
+    const modelPath = path.join(BIN_PATH, 'coqui', 'model.tflite')
+    const scorerPath = path.join(BIN_PATH, 'coqui', 'huge-vocabulary.scorer')
+
+    LogHelper.info(`Loading model from file ${modelPath}...`)
+
+    if (!fs.existsSync(modelPath)) {
+      LogHelper.error(
+        `Cannot find ${modelPath}. You can set up the offline STT by running: "npm run setup:offline-stt"`
+      )
+    }
+
+    if (!fs.existsSync(scorerPath)) {
+      LogHelper.error(
+        `Cannot find ${scorerPath}. You can setup the offline STT by running: "npm run setup:offline-stt"`
+      )
+    }
+
+    try {
+      this.model = new Model(modelPath)
+    } catch (e) {
+      throw Error(`${this.name}: failed to load the model. ${e}`)
+    }
+
+    this.desiredSampleRate = this.model.sampleRate()
+
+    try {
+      this.model.enableExternalScorer(scorerPath)
+    } catch (e) {
+      throw Error(`${this.name}: failed to enable external scorer. ${e}`)
+    }
+
+    LogHelper.success('Parser initialized')
+  }
+
+  /**
+   * Read audio buffer and return the transcript (decoded string)
+   */
+  public async parse(buffer: Buffer): Promise<string | null> {
+    const wavDecode = wav.decode(buffer)
+
+    if (this.model) {
+      if (wavDecode.sampleRate < this.desiredSampleRate) {
+        LogHelper.warning(
+          `Original sample rate (${wavDecode.sampleRate}) is lower than ${this.desiredSampleRate}Hz. Up-sampling might produce erratic speech recognition`
+        )
+      }
+
+      // Decoded string
+      return this.model.stt(buffer)
+    }
+
+    return null
+  }
+}
--- a/server/src/core/stt/parsers/google-cloud-stt-parser.ts
+++ b/server/src/core/stt/parsers/google-cloud-stt-parser.ts
@ -0,0 +1,67 @@
+import path from 'node:path'
+
+import stt, { SpeechClient } from '@google-cloud/speech'
+
+import type { STTParserFacade } from '@/core/stt/types'
+import { LANG, VOICE_CONFIG_PATH } from '@/constants'
+import { LogHelper } from '@/helpers/log-helper'
+
+export class GoogleCloudSTTParser implements STTParserFacade {
+  private readonly name = 'Google Cloud STT Parser'
+  private readonly client: SpeechClient | undefined = undefined
+
+  constructor() {
+    LogHelper.title(this.name)
+    LogHelper.success('New instance')
+
+    /**
+     * Initialize Google Cloud Speech-to-Text based on the credentials in the JSON file
+     * the env variable "GOOGLE_APPLICATION_CREDENTIALS" provides the JSON file path
+     */
+
+    process.env['GOOGLE_APPLICATION_CREDENTIALS'] = path.join(
+      VOICE_CONFIG_PATH,
+      'google-cloud.json'
+    )
+
+    try {
+      this.client = new stt.SpeechClient()
+
+      LogHelper.success('Parser initialized')
+    } catch (e) {
+      LogHelper.error(`${this.name}: ${e}`)
+    }
+  }
+
+  /**
+   * Read audio buffer and return the transcript (decoded string)
+   */
+  public async parse(buffer: Buffer): Promise<string | null> {
+    if (this.client) {
+      const audioBytes = buffer.toString('base64')
+      const audio = { content: audioBytes }
+
+      try {
+        const [res] = await this.client.recognize({
+          audio,
+          config: {
+            languageCode: LANG,
+            encoding: 'LINEAR16',
+            sampleRateHertz: 16000
+          }
+        })
+
+        // Decoded string
+        return (res.results || [])
+          .map((data) => data.alternatives && data.alternatives[0]?.transcript)
+          .join('\n')
+      } catch (e) {
+        LogHelper.error(`${this.name}: ${e}`)
+      }
+    } else {
+      LogHelper.error(`${this.name}: not initialized`)
+    }
+
+    return null
+  }
+}
--- a/server/src/core/stt/parsers/watson-stt-parser.ts
+++ b/server/src/core/stt/parsers/watson-stt-parser.ts
@ -0,0 +1,65 @@
+import path from 'node:path'
+import fs from 'node:fs'
+import { Duplex } from 'node:stream'
+
+import Stt from 'ibm-watson/speech-to-text/v1'
+import { IamAuthenticator } from 'ibm-watson/auth'
+
+import type { STTParserFacade } from '@/core/stt/types'
+import type { WatsonVoiceConfiguration } from '@/schemas/voice-config-schemas'
+import { LANG, VOICE_CONFIG_PATH } from '@/constants'
+import { LogHelper } from '@/helpers/log-helper'
+
+export class WatsonSTTParser implements STTParserFacade {
+  private readonly name = 'Watson STT Parser'
+  private readonly client: Stt | undefined = undefined
+
+  constructor() {
+    LogHelper.title(this.name)
+    LogHelper.success('New instance')
+
+    const config: WatsonVoiceConfiguration = JSON.parse(
+      fs.readFileSync(path.join(VOICE_CONFIG_PATH, 'watson-stt.json'), 'utf8')
+    )
+
+    try {
+      this.client = new Stt({
+        authenticator: new IamAuthenticator({ apikey: config.apikey }),
+        serviceUrl: config.url
+      })
+
+      LogHelper.success('Parser initialized')
+    } catch (e) {
+      LogHelper.error(`${this.name}: ${e}`)
+    }
+  }
+
+  /**
+   * Read audio buffer and return the transcript (decoded string)
+   */
+  public async parse(buffer: Buffer): Promise<string | null> {
+    if (this.client) {
+      const stream = new Duplex()
+
+      stream.push(buffer)
+      stream.push(null)
+
+      try {
+        const { result } = await this.client.recognize({
+          contentType: 'audio/wav',
+          model: `${LANG}_BroadbandModel`,
+          audio: stream
+        })
+
+        // Decoded string
+        return (result.results || [])
+          .map((data) => data.alternatives && data.alternatives[0]?.transcript)
+          .join('\n')
+      } catch (e) {
+        LogHelper.error(`${this.name}: ${e}`)
+      }
+    }
+
+    return null
+  }
+}
--- a/server/src/core/stt/stt.ts
+++ b/server/src/core/stt/stt.ts
@ -0,0 +1,132 @@
+import fs from 'node:fs'
+import path from 'node:path'
+
+import type { ASRAudioFormat } from '@/core/asr/types'
+import type { STTParser } from '@/core/stt/types'
+import { STT_PROVIDER, VOICE_CONFIG_PATH } from '@/constants'
+import { SOCKET_SERVER, ASR } from '@/core'
+import { STTParserNames, STTProviders } from '@/core/stt/types'
+import { LogHelper } from '@/helpers/log-helper'
+
+export default class STT {
+  private static instance: STT
+
+  private parser: STTParser = undefined
+
+  constructor() {
+    if (!STT.instance) {
+      LogHelper.title('STT')
+      LogHelper.success('New instance')
+
+      STT.instance = this
+    }
+  }
+
+  public get isParserReady(): boolean {
+    return !!this.parser
+  }
+
+  /**
+   * Initialize the STT provider
+   */
+  public async init(): Promise<boolean> {
+    LogHelper.info('Initializing STT...')
+
+    if (!Object.values(STTProviders).includes(STT_PROVIDER as STTProviders)) {
+      LogHelper.error(
+        `The STT provider "${STT_PROVIDER}" does not exist or is not yet supported`
+      )
+
+      return false
+    }
+
+    if (
+      STT_PROVIDER === STTProviders.GoogleCloudSTT &&
+      typeof process.env['GOOGLE_APPLICATION_CREDENTIALS'] === 'undefined'
+    ) {
+      process.env['GOOGLE_APPLICATION_CREDENTIALS'] = path.join(
+        VOICE_CONFIG_PATH,
+        'google-cloud.json'
+      )
+    } else if (
+      typeof process.env['GOOGLE_APPLICATION_CREDENTIALS'] !== 'undefined' &&
+      process.env['GOOGLE_APPLICATION_CREDENTIALS'].indexOf(
+        'google-cloud.json'
+      ) === -1
+    ) {
+      LogHelper.warning(
+        `The "GOOGLE_APPLICATION_CREDENTIALS" env variable is already settled with the following value: "${process.env['GOOGLE_APPLICATION_CREDENTIALS']}"`
+      )
+    }
+
+    // Dynamically attribute the parser
+    const parser = await import(
+      path.join(
+        __dirname,
+        'parsers',
+        STTParserNames[STT_PROVIDER as keyof typeof STTParserNames]
+      )
+    )
+    this.parser = new parser() as STTParser
+
+    LogHelper.title('STT')
+    LogHelper.success('STT initialized')
+
+    return true
+  }
+
+  /**
+   * Read the speech file and transcribe
+   */
+  public async transcribe(audioFilePath: string): Promise<boolean> {
+    LogHelper.info('Parsing WAVE file...')
+
+    if (!fs.existsSync(audioFilePath)) {
+      LogHelper.error(`The WAVE file "${audioFilePath}" does not exist`)
+
+      return false
+    }
+
+    const buffer = fs.readFileSync(audioFilePath)
+    const transcript = await this.parser?.parse(buffer)
+
+    if (transcript && transcript !== '') {
+      // Forward the string to the client
+      this.forward(transcript)
+    } else {
+      this.deleteAudios()
+    }
+
+    return true
+  }
+
+  /**
+   * Forward string output to the client
+   * and delete audio files once it has been forwarded
+   */
+  private forward(str: string): void {
+    SOCKET_SERVER.socket.emit('recognized', str, (confirmation: string) => {
+      if (confirmation === 'string-received') {
+        this.deleteAudios()
+      }
+    })
+
+    LogHelper.success(`Parsing result: ${str}`)
+  }
+
+  /**
+   * Delete audio files
+   */
+  private deleteAudios(): void {
+    const audioPaths = Object.keys(ASR.audioPaths)
+
+    for (let i = 0; i < audioPaths.length; i += 1) {
+      const audioType = audioPaths[i] as ASRAudioFormat
+      const audioPath = ASR.audioPaths[audioType]
+
+      if (fs.existsSync(audioPath)) {
+        fs.unlinkSync(audioPath)
+      }
+    }
+  }
+}
--- a/server/src/core/stt/types.ts
+++ b/server/src/core/stt/types.ts
@ -0,0 +1,25 @@
+import type { CoquiSTTParser } from '@/core/stt/parsers/coqui-stt-parser'
+import type { GoogleCloudSTTParser } from '@/core/stt/parsers/google-cloud-stt-parser'
+import type { WatsonSTTParser } from '@/core/stt/parsers/watson-stt-parser'
+
+export enum STTProviders {
+  GoogleCloudSTT = 'google-cloud-stt',
+  WatsonSTT = 'watson-stt',
+  CoquiSTT = 'coqui-stt'
+}
+
+export enum STTParserNames {
+  GoogleCloudSTT = 'google-cloud-stt-parser',
+  WatsonSTT = 'watson-stt-parser',
+  CoquiSTT = 'coqui-stt-parser'
+}
+
+export type STTParser =
+  | GoogleCloudSTTParser
+  | WatsonSTTParser
+  | CoquiSTTParser
+  | undefined
+
+export interface STTParserFacade {
+  parse(buffer: Buffer): Promise<string | null>
+}
--- a/server/src/core/synchronizer.js
+++ b/server/src/core/synchronizer.js
@ -60,7 +60,6 @@ class Synchronizer {
   * Google Drive synchronization method
   */
  googleDrive() {
-    /* istanbul ignore next */
    return new Promise((resolve, reject) => {
      const driveFolderName = `leon-${this.classification.domain}-${this.classification.skill}`
      const folderMimeType = 'application/vnd.google-apps.folder'
--- a/server/src/core/tts/synthesizers/amazon-polly-synthesizer.ts
+++ b/server/src/core/tts/synthesizers/amazon-polly-synthesizer.ts
@ -0,0 +1,105 @@
+import type { Stream } from 'node:stream'
+import path from 'node:path'
+import fs from 'node:fs'
+
+import Ffmpeg from 'fluent-ffmpeg'
+import { Polly, SynthesizeSpeechCommand } from '@aws-sdk/client-polly'
+import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
+import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
+
+import type { LongLanguageCode } from '@/types'
+import type { TTSSynthesizerFacade, SynthesizeResult } from '@/core/tts/types'
+import type { AmazonVoiceConfiguration } from '@/schemas/voice-config-schemas'
+import { LANG, VOICE_CONFIG_PATH, TMP_PATH } from '@/constants'
+import { TTS } from '@/core'
+import { LogHelper } from '@/helpers/log-helper'
+import { StringHelper } from '@/helpers/string-helper'
+
+const VOICES = {
+  'en-US': {
+    VoiceId: 'Matthew'
+  },
+  'fr-FR': {
+    VoiceId: 'Mathieu'
+  }
+}
+
+export class AmazonPollyTTSSynthesizer implements TTSSynthesizerFacade {
+  private readonly name = 'Amazon Polly TTS Synthesizer'
+  private readonly client: Polly | undefined = undefined
+  private readonly lang: LongLanguageCode = LANG as LongLanguageCode
+
+  constructor(lang: LongLanguageCode) {
+    LogHelper.title(this.name)
+    LogHelper.success('New instance')
+
+    const config: AmazonVoiceConfiguration = JSON.parse(
+      fs.readFileSync(path.join(VOICE_CONFIG_PATH, 'amazon.json'), 'utf8')
+    )
+
+    try {
+      this.lang = lang
+      this.client = new Polly(config)
+
+      LogHelper.success('Synthesizer initialized')
+    } catch (e) {
+      LogHelper.error(`${this.name}: ${e}`)
+    }
+  }
+
+  public async synthesize(speech: string): Promise<SynthesizeResult | null> {
+    const audioFilePath = path.join(
+      TMP_PATH,
+      `${Date.now()}-${StringHelper.random(4)}.mp3`
+    )
+
+    try {
+      if (this.client) {
+        const result = await this.client.send(
+          new SynthesizeSpeechCommand({
+            OutputFormat: 'mp3',
+            VoiceId: VOICES[this.lang].VoiceId,
+            Text: speech
+          })
+        )
+        // Cast to Node.js stream as the SDK returns a custom type that does not have a pipe method
+        const AudioStream = result.AudioStream as Stream
+
+        if (!AudioStream) {
+          LogHelper.error(`${this.name}: AudioStream is undefined`)
+
+          return null
+        }
+
+        const wStream = fs.createWriteStream(audioFilePath)
+        AudioStream.pipe(wStream)
+
+        await new Promise((resolve, reject) => {
+          wStream.on('finish', resolve)
+          wStream.on('error', reject)
+        })
+
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const ffmpeg = new (Ffmpeg as any)()
+        ffmpeg.setFfmpegPath(ffmpegPath)
+        ffmpeg.setFfprobePath(ffprobePath)
+
+        const data = await ffmpeg.input(audioFilePath).ffprobe()
+        const duration = data.streams[0].duration * 1_000
+
+        TTS.em.emit('saved', duration)
+
+        return {
+          audioFilePath,
+          duration
+        }
+      }
+
+      LogHelper.error(`${this.name}: client is not defined yet`)
+    } catch (e) {
+      LogHelper.error(`${this.name}: Failed to synthesize speech: ${e} `)
+    }
+
+    return null
+  }
+}
--- a/server/src/core/tts/tts.ts
+++ b/server/src/core/tts/tts.ts
@ -0,0 +1,141 @@
+import path from 'node:path'
+import events from 'node:events'
+import fs from 'node:fs'
+
+import type { ShortLanguageCode } from '@/types'
+import type { TTSSynthesizer } from '@/core/tts/types'
+import { SOCKET_SERVER } from '@/core'
+import { TTS_PROVIDER, VOICE_CONFIG_PATH } from '@/constants'
+import { TTSSynthesizers, TTSProviders } from '@/core/tts/types'
+import { LogHelper } from '@/helpers/log-helper'
+import { LangHelper } from '@/helpers/lang-helper'
+
+type Speech = {
+  text: string
+  isFinalAnswer: boolean
+}
+
+export default class TTS {
+  private static instance: TTS
+
+  private synthesizer: TTSSynthesizer = undefined
+  private speeches: Speech[] = []
+
+  public lang: ShortLanguageCode = 'en'
+  public em = new events.EventEmitter()
+
+  constructor() {
+    if (!TTS.instance) {
+      LogHelper.title('TTS')
+      LogHelper.success('New instance')
+
+      TTS.instance = this
+    }
+  }
+
+  /**
+   * Initialize the TTS provider
+   */
+  public async init(newLang: ShortLanguageCode): Promise<boolean> {
+    LogHelper.info('Initializing TTS...')
+
+    this.lang = newLang || this.lang
+
+    if (!Object.values(TTSProviders).includes(TTS_PROVIDER as TTSProviders)) {
+      LogHelper.error(
+        `The TTS provider "${TTS_PROVIDER}" does not exist or is not yet supported`
+      )
+
+      return false
+    }
+
+    if (
+      TTS_PROVIDER === TTSProviders.GoogleCloudTTS &&
+      typeof process.env['GOOGLE_APPLICATION_CREDENTIALS'] === 'undefined'
+    ) {
+      process.env['GOOGLE_APPLICATION_CREDENTIALS'] = path.join(
+        VOICE_CONFIG_PATH,
+        'google-cloud.json'
+      )
+    } else if (
+      typeof process.env['GOOGLE_APPLICATION_CREDENTIALS'] !== 'undefined' &&
+      process.env['GOOGLE_APPLICATION_CREDENTIALS'].indexOf(
+        'google-cloud.json'
+      ) === -1
+    ) {
+      LogHelper.warning(
+        `The "GOOGLE_APPLICATION_CREDENTIALS" env variable is already settled with the following value: "${process.env['GOOGLE_APPLICATION_CREDENTIALS']}"`
+      )
+    }
+
+    // Dynamically attribute the synthesizer
+    const synthesizer = await import(
+      path.join(
+        __dirname,
+        'synthesizers',
+        TTSSynthesizers[TTS_PROVIDER as keyof typeof TTSSynthesizers]
+      )
+    )
+    this.synthesizer = new synthesizer(
+      LangHelper.getLongCode(this.lang)
+    ) as TTSSynthesizer
+
+    this.onSaved()
+
+    LogHelper.title('TTS')
+    LogHelper.success('TTS initialized')
+
+    return true
+  }
+
+  /**
+   * Forward buffer audio file and duration to the client
+   * and delete audio file once it has been forwarded
+   */
+  private async forward(speech: Speech): Promise<void> {
+    if (this.synthesizer) {
+      const result = await this.synthesizer.synthesize(speech.text)
+
+      if (!result) {
+        LogHelper.error(
+          'The TTS synthesizer failed to synthesize the speech as the result is null'
+        )
+      } else {
+        const { audioFilePath, duration } = result
+        const bitmap = fs.readFileSync(audioFilePath)
+
+        SOCKET_SERVER.socket.emit(
+          'audio-forwarded',
+          {
+            buffer: Buffer.from(bitmap),
+            is_final_answer: speech.isFinalAnswer,
+            duration
+          },
+          (confirmation: string) => {
+            if (confirmation === 'audio-received') {
+              fs.unlinkSync(audioFilePath)
+            }
+          }
+        )
+      }
+    } else {
+      LogHelper.error('The TTS synthesizer is not initialized yet')
+    }
+  }
+
+  /**
+   * When the synthesizer saved a new audio file
+   * then shift the queue according to the audio file duration
+   */
+  private onSaved(): void {
+    this.em.on('saved', (duration) => {
+      setTimeout(async () => {
+        this.speeches.shift()
+
+        if (this.speeches[0]) {
+          await this.forward(this.speeches[0])
+        }
+      }, duration)
+    })
+  }
+}
--- a/server/src/core/tts/types.ts
+++ b/server/src/core/tts/types.ts
@ -0,0 +1,28 @@
+import type { AmazonPollyTTSSynthesizer } from '@/core/tts/synthesizers/amazon-polly-synthesizer'
+
+export enum TTSProviders {
+  AmazonPolly = 'amazon-polly',
+  GoogleCloudTTS = 'google-cloud-tts',
+  WatsonTTS = 'watson-tts',
+  Flite = 'flite'
+}
+
+export enum TTSSynthesizers {
+  AmazonPolly = 'amazon-polly-synthesizer',
+  GoogleCloudTTS = 'google-cloud-tts-synthesizer',
+  WatsonTTS = 'watson-tts-synthesizer',
+  Flite = 'flite-synthesizer'
+}
+
+export type SynthesizeResult = {
+  audioFilePath: string
+  duration: number
+}
+
+// TODO
+// export type TTSSynthesizer = AmazonPollyTTSSynthesizer | FliteTTSSynthesizer | GoogleCloudTTSSynthesizer | WatsonTTSSynthesizer | undefined
+export type TTSSynthesizer = AmazonPollyTTSSynthesizer | undefined
+
+export interface TTSSynthesizerFacade {
+  synthesize(speech: string): Promise<SynthesizeResult | null>
+}
--- a/server/src/declarations.d.ts
+++ b/server/src/declarations.d.ts
@ -0,0 +1,3 @@
+declare module '@ffprobe-installer/ffprobe' {
+  export const path: string
+}
--- a/server/src/helpers/log-helper.ts
+++ b/server/src/helpers/log-helper.ts
@ -1,7 +1,6 @@
 import fs from 'node:fs'
 import path from 'node:path'

-import { IS_TESTING_ENV } from '@/constants'
 import { DateHelper } from '@/helpers/date-helper'

 export class LogHelper {
@ -48,12 +47,10 @@ export class LogHelper {
  public static error(value: string): void {
    const data = `${DateHelper.getDateTime()} - ${value}`

-    if (!IS_TESTING_ENV) {
-      if (fs.existsSync(LogHelper.ERRORS_PATH)) {
-        fs.appendFileSync(LogHelper.ERRORS_PATH, `\n${data}`)
-      } else {
-        fs.writeFileSync(LogHelper.ERRORS_PATH, data, { flag: 'wx' })
-      }
+    if (fs.existsSync(LogHelper.ERRORS_PATH)) {
+      fs.appendFileSync(LogHelper.ERRORS_PATH, `\n${data}`)
+    } else {
+      fs.writeFileSync(LogHelper.ERRORS_PATH, data, { flag: 'wx' })
    }

    console.error('\x1b[31m🚨 %s\x1b[0m', value)
--- a/server/src/stt/coqui-stt/parser.js
+++ b/server/src/stt/coqui-stt/parser.js
@ -1,88 +0,0 @@
-import fs from 'node:fs'
-
-import wav from 'node-wav'
-import { Model } from 'stt'
-
-import { IS_TESTING_ENV } from '@/constants'
-import { LogHelper } from '@/helpers/log-helper'
-
-LogHelper.title('Coqui STT Parser')
-
-const parser = {}
-let model = {}
-let desiredSampleRate = 16000
-
-/**
- * Model and language model paths
- */
-parser.conf = {
-  model: 'bin/coqui/model.tflite',
-  scorer: 'bin/coqui/huge-vocabulary.scorer'
-}
-
-/**
- * Load models
- */
-parser.init = (args) => {
-  LogHelper.info(`Loading model from file ${args.model}...`)
-
-  if (!fs.existsSync(args.model)) {
-    LogHelper.error(
-      `Cannot find ${args.model}. You can set up the offline STT by running: "npm run setup:offline-stt"`
-    )
-
-    return false
-  }
-
-  if (!fs.existsSync(args.scorer)) {
-    LogHelper.error(
-      `Cannot find ${args.scorer}. You can setup the offline STT by running: "npm run setup:offline-stt"`
-    )
-
-    return false
-  }
-
-  /* istanbul ignore if */
-  if (!IS_TESTING_ENV) {
-    try {
-      model = new Model(args.model)
-    } catch (error) {
-      throw Error(`model.stt: ${error}`)
-    }
-    desiredSampleRate = model.sampleRate()
-
-    try {
-      model.enableExternalScorer(args.scorer)
-    } catch (error) {
-      throw Error(`model.enableExternalScorer: ${error}`)
-    }
-  }
-
-  LogHelper.success('Model loaded')
-
-  return true
-}
-
-/**
- * Parse file and infer
- */
-parser.parse = (buffer, cb) => {
-  const wavDecode = wav.decode(buffer)
-
-  if (wavDecode.sampleRate < desiredSampleRate) {
-    LogHelper.warning(
-      `Original sample rate (${wavDecode.sampleRate}) is lower than ${desiredSampleRate}Hz. Up-sampling might produce erratic speech recognition`
-    )
-  }
-
-  /* istanbul ignore if */
-  if (!IS_TESTING_ENV) {
-    const string = model.stt(buffer)
-
-    cb({ string })
-  }
-
-  return true
-}
-
-export default parser
--- a/server/src/stt/google-cloud-stt/parser.js
+++ b/server/src/stt/google-cloud-stt/parser.js
@ -1,60 +0,0 @@
-import path from 'node:path'
-
-import stt from '@google-cloud/speech'
-
-import { LANG } from '@/constants'
-import { LogHelper } from '@/helpers/log-helper'
-
-LogHelper.title('Google Cloud STT Parser')
-
-const parser = {}
-let client = {}
-
-parser.conf = {
-  languageCode: LANG,
-  encoding: 'LINEAR16',
-  sampleRateHertz: 16000
-}
-
-/**
- * Initialize Google Cloud Speech-to-Text based on the credentials in the JSON file
- * the env variable "GOOGLE_APPLICATION_CREDENTIALS" provides the JSON file path
- */
-parser.init = () => {
-  process.env.GOOGLE_APPLICATION_CREDENTIALS = path.join(
-    process.cwd(),
-    'core/config/voice/google-cloud.json'
-  )
-
-  try {
-    client = new stt.SpeechClient()
-
-    LogHelper.success('Parser initialized')
-  } catch (e) {
-    LogHelper.error(`Google Cloud STT: ${e}`)
-  }
-}
-
-/**
- * Read buffer and give back a string
- */
-parser.parse = async (buffer, cb) => {
-  const audioBytes = buffer.toString('base64')
-  const audio = { content: audioBytes }
-
-  try {
-    const res = await client.recognize({
-      audio,
-      config: parser.conf
-    })
-    const string = res[0].results
-      .map((data) => data.alternatives[0].transcript)
-      .join('\n')
-
-    cb({ string })
-  } catch (e) {
-    LogHelper.error(`Google Cloud STT: ${e}`)
-  }
-}
-
-export default parser
--- a/server/src/stt/stt.js
+++ b/server/src/stt/stt.js
@ -1,133 +0,0 @@
-import fs from 'node:fs'
-import path from 'node:path'
-
-import { IS_TESTING_ENV } from '@/constants'
-import Asr from '@/core/asr'
-import { LogHelper } from '@/helpers/log-helper'
-
-class Stt {
-  constructor(socket, provider) {
-    this.socket = socket
-    this.provider = provider
-    this.providers = ['google-cloud-stt', 'watson-stt', 'coqui-stt']
-    this.parser = {}
-
-    LogHelper.title('STT')
-    LogHelper.success('New instance')
-  }
-
-  /**
-   * Initialize the STT provider
-   */
-  init(cb) {
-    LogHelper.info('Initializing STT...')
-
-    if (!this.providers.includes(this.provider)) {
-      LogHelper.error(
-        `The STT provider "${this.provider}" does not exist or is not yet supported`
-      )
-
-      return false
-    }
-
-    /* istanbul ignore next */
-    if (
-      this.provider === 'google-cloud-stt' &&
-      typeof process.env.GOOGLE_APPLICATION_CREDENTIALS === 'undefined'
-    ) {
-      process.env.GOOGLE_APPLICATION_CREDENTIALS = path.join(
-        process.cwd(),
-        'core/config/voice/google-cloud.json'
-      )
-    } else if (
-      typeof process.env.GOOGLE_APPLICATION_CREDENTIALS !== 'undefined' &&
-      process.env.GOOGLE_APPLICATION_CREDENTIALS.indexOf(
-        'google-cloud.json'
-      ) === -1
-    ) {
-      LogHelper.warning(
-        `The "GOOGLE_APPLICATION_CREDENTIALS" env variable is already settled with the following value: "${process.env.GOOGLE_APPLICATION_CREDENTIALS}"`
-      )
-    }
-
-    /* istanbul ignore if */
-    if (!IS_TESTING_ENV) {
-      // Dynamically attribute the parser
-      this.parser = require(`${__dirname}/${this.provider}/parser`)
-      this.parser.default.init(this.parser.default.conf)
-    }
-
-    LogHelper.title('STT')
-    LogHelper.success('STT initialized')
-
-    cb(this)
-
-    return true
-  }
-
-  /**
-   * Forward string output to the client
-   * and delete audio files once it has been forwarded
-   */
-  forward(string) {
-    this.socket.emit('recognized', string, (confirmation) => {
-      /* istanbul ignore next */
-      if (confirmation === 'string-received') {
-        Stt.deleteAudios()
-      }
-    })
-
-    LogHelper.success(`Parsing result: ${string}`)
-  }
-
-  /**
-   * Read the speech file and parse
-   */
-  parse(file) {
-    LogHelper.info('Parsing WAVE file...')
-
-    if (!fs.existsSync(file)) {
-      LogHelper.error(`The WAVE file "${file}" does not exist`)
-
-      return false
-    }
-
-    const buffer = fs.readFileSync(file)
-    /* istanbul ignore if */
-    if (!IS_TESTING_ENV) {
-      this.parser.default.parse(buffer, (data) => {
-        if (data.string !== '') {
-          // Forward the string to the client
-          this.forward(data.string)
-        } else {
-          Stt.deleteAudios()
-        }
-      })
-    }
-
-    return true
-  }
-
-  /**
-   * Delete audio files
-   */
-  static deleteAudios() {
-    return new Promise((resolve) => {
-      const audios = Object.keys(Asr.audios)
-
-      for (let i = 0; i < audios.length; i += 1) {
-        const audio = Asr.audios[audios[i]]
-
-        if (fs.existsSync(audio)) {
-          fs.unlinkSync(Asr.audios[audios[i]])
-        }
-
-        if (i + 1 === audios.length) {
-          resolve()
-        }
-      }
-    })
-  }
-}
-
-export default Stt
--- a/server/src/stt/watson-stt/parser.js
+++ b/server/src/stt/watson-stt/parser.js
@ -1,79 +0,0 @@
-import fs from 'node:fs'
-import path from 'node:path'
-import { Duplex } from 'node:stream'
-
-import Stt from 'ibm-watson/speech-to-text/v1'
-import { IamAuthenticator } from 'ibm-watson/auth'
-
-import { LANG } from '@/constants'
-import { LogHelper } from '@/helpers/log-helper'
-
-LogHelper.title('Watson STT Parser')
-
-const parser = {}
-let client = {}
-
-parser.conf = {
-  contentType: 'audio/wav',
-  model: `${LANG}_BroadbandModel`
-}
-
-/**
- * Initialize Watson Speech-to-Text based on credentials in the JSON file
- */
-parser.init = () => {
-  const config = JSON.parse(
-    fs.readFileSync(
-      path.join(process.cwd(), 'core/config/voice/watson-stt.json'),
-      'utf8'
-    )
-  )
-
-  try {
-    client = new Stt({
-      authenticator: new IamAuthenticator({ apikey: config.apikey }),
-      serviceUrl: config.url
-    })
-
-    LogHelper.success('Parser initialized')
-  } catch (e) {
-    LogHelper.error(`Watson STT: ${e}`)
-  }
-}
-
-/**
- * Read buffer and give back a string
- */
-parser.parse = async (buffer, cb) => {
-  const stream = new Duplex()
-  stream.push(buffer)
-  stream.push(null)
-  parser.conf.audio = stream
-
-  client
-    .recognize(parser.conf)
-    .then(({ result }) => {
-      const string = result.results
-        .map((data) => data.alternatives[0].transcript)
-        .join('\n')
-
-      cb({ string })
-    })
-    .catch((err) => {
-      LogHelper.error(`Watson STT: ${err}`)
-    })
-
-  client.recognize(parser.conf, (err, res) => {
-    if (err) {
-      LogHelper.error(`Watson STT: ${err}`)
-    } else {
-      const string = res.results
-        .map((data) => data.alternatives[0].transcript)
-        .join('\n')
-
-      cb({ string })
-    }
-  })
-}
-
-export default parser
--- a/server/src/tts/amazon-polly/synthesizer.js
+++ b/server/src/tts/amazon-polly/synthesizer.js
@ -6,6 +6,7 @@ import Ffmpeg from 'fluent-ffmpeg'
 import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
 import { path as ffprobePath } from '@ffprobe-installer/ffprobe'

+import { TMP_PATH } from '@/constants'
 import { LogHelper } from '@/helpers/log-helper'
 import { StringHelper } from '@/helpers/string-helper'

@ -52,9 +53,10 @@ synthesizer.init = (lang) => {
 * Save string to audio file
 */
 synthesizer.save = (speech, em, cb) => {
-  const file = `${__dirname}/../../tmp/${Date.now()}-${StringHelper.random(
-    4
-  )}.mp3`
+  const file = path.join(
+    TMP_PATH,
+    `${Date.now()}-${StringHelper.random(4)}.mp3`
+  )

  synthesizer.conf.Text = speech

@ -74,7 +76,7 @@ synthesizer.save = (speech, em, cb) => {
        ffmpeg.input(file).ffprobe((err, data) => {
          if (err) LogHelper.error(err)
          else {
-            const duration = data.streams[0].duration * 1000
+            const duration = data.streams[0].duration * 1_000
            em.emit('saved', duration)
            cb(file, duration)
          }
--- a/server/src/tts/flite/synthesizer.js
+++ b/server/src/tts/flite/synthesizer.js
@ -1,10 +1,12 @@
 import { spawn } from 'node:child_process'
 import fs from 'node:fs'
+import path from 'node:path'

 import Ffmpeg from 'fluent-ffmpeg'
 import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
 import { path as ffprobePath } from '@ffprobe-installer/ffprobe'

+import { TMP_PATH } from '@/constants'
 import { LogHelper } from '@/helpers/log-helper'
 import { StringHelper } from '@/helpers/string-helper'

@ -25,14 +27,12 @@ synthesizer.conf = {
 synthesizer.init = (lang) => {
  const flitePath = 'bin/flite/flite'

-  /* istanbul ignore if */
  if (lang !== 'en-US') {
    LogHelper.warning(
      'The Flite synthesizer only accepts the "en-US" language for the moment'
    )
  }

-  /* istanbul ignore if */
  if (!fs.existsSync(flitePath)) {
    LogHelper.error(
      `Cannot find ${flitePath} You can set up the offline TTS by running: "npm run setup:offline-tts"`
@ -49,9 +49,10 @@ synthesizer.init = (lang) => {
 * Save string to audio file
 */
 synthesizer.save = (speech, em, cb) => {
-  const file = `${__dirname}/../../tmp/${Date.now()}-${StringHelper.random(
-    4
-  )}.wav`
+  const file = path.join(
+    TMP_PATH,
+    `${Date.now()}-${StringHelper.random(4)}.wav`
+  )
  const process = spawn('bin/flite/flite', [
    speech,
    '--setf',
@ -66,7 +67,6 @@ synthesizer.save = (speech, em, cb) => {
    file
  ])

-  /* istanbul ignore next */
  // Handle error
  process.stderr.on('data', (data) => {
    LogHelper.error(data.toString())
@ -79,10 +79,9 @@ synthesizer.save = (speech, em, cb) => {

    // Get file duration thanks to ffprobe
    ffmpeg.input(file).ffprobe((err, data) => {
-      /* istanbul ignore if */
      if (err) LogHelper.error(err)
      else {
-        const duration = data.streams[0].duration * 1000
+        const duration = data.streams[0].duration * 1_000
        em.emit('saved', duration)
        cb(file, duration)
      }
--- a/server/src/tts/google-cloud-tts/synthesizer.js
+++ b/server/src/tts/google-cloud-tts/synthesizer.js
@ -6,6 +6,7 @@ import Ffmpeg from 'fluent-ffmpeg'
 import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
 import { path as ffprobePath } from '@ffprobe-installer/ffprobe'

+import { TMP_PATH } from '@/constants'
 import { LogHelper } from '@/helpers/log-helper'
 import { StringHelper } from '@/helpers/string-helper'

@ -58,9 +59,10 @@ synthesizer.init = (lang) => {
 * Save string to audio file
 */
 synthesizer.save = (speech, em, cb) => {
-  const file = `${__dirname}/../../tmp/${Date.now()}-${StringHelper.random(
-    4
-  )}.mp3`
+  const file = path.join(
+    TMP_PATH,
+    `${Date.now()}-${StringHelper.random(4)}.mp3`
+  )

  synthesizer.conf.input = { text: speech }

@ -84,7 +86,7 @@ synthesizer.save = (speech, em, cb) => {
      ffmpeg.input(file).ffprobe((err, data) => {
        if (err) LogHelper.error(err)
        else {
-          const duration = data.streams[0].duration * 1000
+          const duration = data.streams[0].duration * 1_000
          em.emit('saved', duration)
          cb(file, duration)
        }
--- a/server/src/tts/tts.js
+++ b/server/src/tts/tts.js
@ -35,7 +35,6 @@ class Tts {
      return false
    }

-    /* istanbul ignore next */
    if (
      this.provider === 'google-cloud-tts' &&
      typeof process.env.GOOGLE_APPLICATION_CREDENTIALS === 'undefined'
@ -59,6 +58,7 @@ class Tts {
    this.synthesizer = require(`${__dirname}/${this.provider}/synthesizer`)
    this.synthesizer.default.init(LangHelper.getLongCode(this.lang))

+    // TODO: do not use event emitter; and use async/await
    this.onSaved()

    LogHelper.title('TTS')
@ -75,9 +75,8 @@ class Tts {
   */
  forward(speech) {
    this.synthesizer.default.save(speech.text, this.em, (file, duration) => {
-      /* istanbul ignore next */
      const bitmap = fs.readFileSync(file)
-      /* istanbul ignore next */
+
      this.socket.emit(
        'audio-forwarded',
        {
--- a/server/src/tts/watson-tts/synthesizer.js
+++ b/server/src/tts/watson-tts/synthesizer.js
@ -7,6 +7,7 @@ import Ffmpeg from 'fluent-ffmpeg'
 import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
 import { path as ffprobePath } from '@ffprobe-installer/ffprobe'

+import { TMP_PATH } from '@/constants'
 import { LogHelper } from '@/helpers/log-helper'
 import { StringHelper } from '@/helpers/string-helper'

@ -56,9 +57,10 @@ synthesizer.init = (lang) => {
 * Save string to audio file
 */
 synthesizer.save = (speech, em, cb) => {
-  const file = `${__dirname}/../../tmp/${Date.now()}-${StringHelper.random(
-    4
-  )}.wav`
+  const file = path.join(
+    TMP_PATH,
+    `${Date.now()}-${StringHelper.random(4)}.wav`
+  )

  synthesizer.conf.text = speech

@ -78,7 +80,7 @@ synthesizer.save = (speech, em, cb) => {
        ffmpeg.input(file).ffprobe((err, data) => {
          if (err) LogHelper.error(err)
          else {
-            const duration = data.streams[0].duration * 1000
+            const duration = data.streams[0].duration * 1_000
            em.emit('saved', duration)
            cb(file, duration)
          }
--- a/server/src/types.ts
+++ b/server/src/types.ts
@ -2,7 +2,7 @@ import type { langs } from '@@/core/langs.json'

 /**
 * Contain common/shared types that are universal across the project
- * and cannot be placed in the respective core chunks
+ * and cannot be placed in the respective core nodes
 */

 /**
--- a/test/unit/server/core/asr.spec.js
+++ b/test/unit/server/core/asr.spec.js
@ -1,6 +1,6 @@
 import fs from 'node:fs'

-import Asr from '@/core/asr'
+import Asr from '@/core/asr/asr'
 import Stt from '@/stt/stt'

 describe('ASR', () => {