1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-11-20 19:52:55 +03:00

refactor(server): HTTP server; brain entries; STT parsers; TTS synthesizers (WIP)

This commit is contained in:
louistiti 2023-02-03 00:14:23 +08:00
parent 19ce6eb9c2
commit c8bc9ecbb0
No known key found for this signature in database
GPG Key ID: 0A1C3B043E70C77D
37 changed files with 820 additions and 532 deletions

View File

@ -1,4 +1,5 @@
#!/bin/sh
. "$(dirname "$0")/_/husky.sh"
npx lint-staged
# TODO: add lint-staged back in when JavaScript to TypeScript is complete
# npx lint-staged

View File

@ -100,7 +100,7 @@ export default class Client {
}
}
}
}, 1000)
}, 1_000)
}, data.duration + 500)
}
})

View File

@ -11,7 +11,7 @@ const config = {
server_host: import.meta.env.VITE_LEON_HOST,
server_port: import.meta.env.VITE_LEON_PORT,
min_decibels: -40, // Noise detection sensitivity
max_blank_time: 1000 // Maximum time to consider a blank (ms)
max_blank_time: 1_000 // Maximum time to consider a blank (ms)
}
const serverUrl =
import.meta.env.VITE_LEON_NODE_ENV === 'production'
@ -58,7 +58,7 @@ document.addEventListener('DOMContentLoaded', async () => {
rec.enabled = false
// Ensure there are some data
if (blob.size >= 1000) {
if (blob.size >= 1_000) {
client.socket.emit('recognize', blob)
}
})

View File

@ -106,7 +106,9 @@
"@swc/core": "^1.3.14",
"@tsconfig/node16-strictest": "^1.0.3",
"@types/cli-spinner": "0.2.1",
"@types/fluent-ffmpeg": "^2.1.20",
"@types/node": "^18.7.13",
"@types/node-wav": "^0.0.0",
"@typescript-eslint/eslint-plugin": "^5.36.1",
"@typescript-eslint/parser": "^5.36.1",
"cli-spinner": "^0.2.10",

View File

@ -99,5 +99,11 @@ export const TCP_SERVER_PORT = Number(process.env['LEON_PY_TCP_SERVER_PORT'])
/**
* Paths
*/
export const BIN_PATH = path.join('bin')
export const GLOBAL_DATA_PATH = path.join('core', 'data')
export const VOICE_CONFIG_PATH = path.join('core', 'config', 'voice')
export const SERVER_PATH = path.join(
'server',
IS_PRODUCTION_ENV ? 'dist' : 'src'
)
export const TMP_PATH = path.join(SERVER_PATH, 'tmp')

View File

@ -1,77 +0,0 @@
import fs from 'node:fs'
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
import Ffmpeg from 'fluent-ffmpeg'
import { LogHelper } from '@/helpers/log-helper'
const audios = {
webm: `${__dirname}/../tmp/speech.webm`,
wav: `${__dirname}/../tmp/speech.wav`
}
class Asr {
constructor() {
this.blob = {}
LogHelper.title('ASR')
LogHelper.success('New instance')
}
static get audios() {
return audios
}
/**
* Encode audio blob to WAVE file
* and forward the WAVE file to the STT parser
*/
run(blob, stt) {
return new Promise((resolve, reject) => {
LogHelper.title('ASR')
this.blob = blob
fs.writeFile(audios.webm, Buffer.from(this.blob), 'binary', (err) => {
if (err) {
reject({ type: 'error', obj: err })
return
}
const ffmpeg = new Ffmpeg()
ffmpeg.setFfmpegPath(ffmpegPath)
/**
* Encode WebM file to WAVE file
* ffmpeg -i speech.webm -acodec pcm_s16le -ar 16000 -ac 1 speech.wav
*/
ffmpeg
.addInput(audios.webm)
.on('start', () => {
LogHelper.info('Encoding WebM file to WAVE file...')
})
.on('end', () => {
LogHelper.success('Encoding done')
if (Object.keys(stt).length === 0) {
reject({
type: 'warning',
obj: new Error('The speech recognition is not ready yet')
})
} else {
stt.parse(audios.wav)
resolve()
}
})
.on('error', (err) => {
reject({ type: 'error', obj: new Error(`Encoding error ${err}`) })
})
.outputOptions(['-acodec pcm_s16le', '-ar 16000', '-ac 1'])
.output(audios.wav)
.run()
})
})
}
}
export default Asr

View File

@ -0,0 +1,77 @@
import path from 'node:path'
import fs from 'node:fs'
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
import ffmpeg from 'fluent-ffmpeg'
import { TMP_PATH } from '@/constants'
import { STT } from '@/core'
import { LogHelper } from '@/helpers/log-helper'
export default class ASR {
private static instance: ASR
public audioPaths = {
webm: path.join(TMP_PATH, 'speech.webm'),
wav: path.join(TMP_PATH, 'speech.wav')
}
constructor() {
if (!ASR.instance) {
LogHelper.title('ASR')
LogHelper.success('New instance')
ASR.instance = this
}
}
/**
* Encode audio blob to WAVE file
* and forward the WAVE file to the STT parser
*/
public encode(blob: Buffer): Promise<void> {
return new Promise((resolve, reject) => {
LogHelper.title('ASR')
fs.writeFile(
this.audioPaths.webm,
Buffer.from(blob),
'binary',
async (err) => {
if (err) {
reject(new Error(`${err}`))
return
}
ffmpeg.setFfmpegPath(ffmpegPath)
/**
* Encode WebM file to WAVE file
* ffmpeg -i speech.webm -acodec pcm_s16le -ar 16000 -ac 1 speech.wav
*/
ffmpeg()
.addInput(this.audioPaths.webm)
.on('start', () => {
LogHelper.info('Encoding WebM file to WAVE file...')
})
.on('end', () => {
LogHelper.success('Encoding done')
if (STT.isParserReady) {
reject(new Error('The speech recognition is not ready yet'))
} else {
STT.transcribe(this.audioPaths.wav)
resolve()
}
})
.on('error', (err) => {
reject(new Error(`Encoding error ${err}`))
})
.outputOptions(['-acodec pcm_s16le', '-ar 16000', '-ac 1'])
.output(this.audioPaths.wav)
.run()
}
)
})
}
}

View File

@ -0,0 +1 @@
export type ASRAudioFormat = 'wav' | 'webm'

View File

@ -266,7 +266,6 @@ class Brain {
output += data
}
} else {
/* istanbul ignore next */
reject({
type: 'warning',
obj: new Error(
@ -280,7 +279,6 @@ class Brain {
LogHelper.title('Brain')
LogHelper.debug(`process.stdout: ${String(data)}`)
/* istanbul ignore next */
reject({
type: 'error',
obj: new Error(
@ -338,7 +336,6 @@ class Brain {
}
speeches.push(speech)
/* istanbul ignore next */
// Synchronize the downloaded content if enabled
if (
this.finalOutput.type === 'end' &&

View File

@ -1,16 +1,14 @@
// TODO: remove ignore
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-nocheck
import type { Socket } from 'node:net'
import fs from 'node:fs'
import path from 'node:path'
import { spawn, ChildProcessWithoutNullStreams } from 'node:child_process'
import { langs } from '@@/core/langs.json'
import type { ShortLanguageCode } from '@/types'
import type { GlobalAnswers } from '@/schemas/global-data-schemas'
import { langs } from '@@/core/langs.json'
import { HAS_TTS, PYTHON_BRIDGE_BIN_PATH } from '@/constants'
import { HAS_TTS, PYTHON_BRIDGE_BIN_PATH, TMP_PATH } from '@/constants'
import { SOCKET_SERVER } from '@/core'
import { LangHelper } from '@/helpers/lang-helper'
import { LogHelper } from '@/helpers/log-helper'
import { SkillDomainHelper } from '@/helpers/skill-domain-helper'
@ -19,7 +17,8 @@ import Synchronizer from '@/core/synchronizer'
// TODO: split class
class Brain {
export default class Brain {
private static instance: Brain
private _lang: ShortLanguageCode = 'en'
private broca: GlobalAnswers = JSON.parse(
fs.readFileSync(
@ -33,15 +32,16 @@ class Brain {
// TODO: type
private finalOutput: unknown
// TODO: not readonly?
public readonly socket: Socket
constructor() {
this._stt = {}
this._tts = {}
LogHelper.title('Brain')
LogHelper.success('New instance')
if (!Brain.instance) {
LogHelper.title('Brain')
LogHelper.success('New instance')
Brain.instance = this
}
}
// TODO: handle return type
@ -113,7 +113,7 @@ class Brain {
this._tts.add(speech, end)
}
this._socket.emit('answer', rawSpeech)
SOCKET_SERVER.socket.emit('answer', rawSpeech)
}
}
@ -157,10 +157,7 @@ class Brain {
return new Promise(async (resolve, reject) => {
const utteranceId = `${Date.now()}-${StringHelper.random(4)}`
const intentObjectPath = path.join(
__dirname,
`../tmp/${utteranceId}.json`
)
const intentObjectPath = path.join(TMP_PATH, `${utteranceId}.json`)
const speeches = []
// Ask to repeat if Leon is not sure about the request
@ -173,7 +170,7 @@ class Brain {
speeches.push(speech)
this.talk(speech, true)
this._socket.emit('is-typing', false)
SOCKET_SERVER.socket.emit('is-typing', false)
}
const executionTimeEnd = Date.now()
@ -276,7 +273,6 @@ class Brain {
output += data
}
} else {
/* istanbul ignore next */
reject({
type: 'warning',
obj: new Error(
@ -290,7 +286,6 @@ class Brain {
LogHelper.title('Brain')
LogHelper.debug(`process.stdout: ${String(data)}`)
/* istanbul ignore next */
reject({
type: 'error',
obj: new Error(
@ -310,7 +305,7 @@ class Brain {
})}!`
if (!opts.mute) {
this.talk(speech)
this._socket.emit('is-typing', false)
SOCKET_SERVER.socket.emit('is-typing', false)
}
speeches.push(speech)
@ -348,7 +343,6 @@ class Brain {
}
speeches.push(speech)
/* istanbul ignore next */
// Synchronize the downloaded content if enabled
if (
this.finalOutput.type === 'end' &&
@ -376,7 +370,7 @@ class Brain {
Brain.deleteIntentObjFile(intentObjectPath)
if (!opts.mute) {
this._socket.emit('is-typing', false)
SOCKET_SERVER.socket.emit('is-typing', false)
}
const executionTimeEnd = Date.now()
@ -387,10 +381,10 @@ class Brain {
nextAction?.suggestions &&
this.finalOutput.core?.showNextActionSuggestions
) {
this._socket.emit('suggest', nextAction.suggestions)
SOCKET_SERVER.socket.emit('suggest', nextAction.suggestions)
}
if (action?.suggestions && this.finalOutput.core?.showSuggestions) {
this._socket.emit('suggest', action.suggestions)
SOCKET_SERVER.socket.emit('suggest', action.suggestions)
}
resolve({
@ -493,12 +487,12 @@ class Brain {
if (!opts.mute) {
this.talk(answer, true)
this._socket.emit('is-typing', false)
SOCKET_SERVER.socket.emit('is-typing', false)
}
// Send suggestions to the client
if (nextAction?.suggestions) {
this._socket.emit('suggest', nextAction.suggestions)
SOCKET_SERVER.socket.emit('suggest', nextAction.suggestions)
}
resolve({
@ -516,5 +510,3 @@ class Brain {
})
}
}
export default Brain

View File

@ -106,10 +106,7 @@ export default class HTTPServer {
try {
await this.listen()
} catch (e) {
// TODO: remove ts-ignore
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
LogHelper.error(e.message)
LogHelper.error((e as Error).message)
}
}
@ -123,7 +120,7 @@ export default class HTTPServer {
})
: new SocketIOServer(this.httpServer)
// TODO: instanciate new socket server
// TODO: instantiate new socket server
io.on('connection', server.handleOnConnection)
this.fastify.listen(

View File

@ -21,7 +21,7 @@ import {
import { TCP_CLIENT } from '@/core'
import Nlu from '@/core/nlu'
import Brain from '@/core/brain'
import Asr from '@/core/asr'
import Asr from '@/core/asr/asr'
import Stt from '@/stt/stt'
import Tts from '@/tts/tts'
import corsMidd from '@/core/http-server/plugins/cors'

View File

@ -2,6 +2,9 @@ import { HOST, PORT, TCP_SERVER_HOST, TCP_SERVER_PORT } from '@/constants'
import TCPClient from '@/core/tcp-client'
import HTTPServer from '@/core/http-server/http-server'
import SocketServer from '@/core/socket-server'
import SpeechToText from '@/core/stt/stt'
import TextToSpeech from '@/core/tts/tts'
import AutomaticSpeechRecognition from '@/core/asr/asr'
/**
* Register core singletons
@ -15,3 +18,9 @@ export const TCP_CLIENT = new TCPClient(
export const HTTP_SERVER = new HTTPServer(String(HOST), PORT)
export const SOCKET_SERVER = new SocketServer()
export const STT = new SpeechToText()
export const TTS = new TextToSpeech()
export const ASR = new AutomaticSpeechRecognition()

View File

@ -233,7 +233,6 @@ class Nlu {
* Collaborative logger request
*/
sendLog(utterance) {
/* istanbul ignore next */
if (HAS_LOGGER && !IS_TESTING_ENV) {
axios.request({
method: 'POST',
@ -402,7 +401,7 @@ class Nlu {
}
return processedData
} catch (e) /* istanbul ignore next */ {
} catch (e) {
return null
}
}
@ -595,7 +594,7 @@ class Nlu {
configDataFilePath,
this.nluResultObj
)
} catch (e) /* istanbul ignore next */ {
} catch (e) {
if (LogHelper[e.type]) {
LogHelper[e.type](e.obj.message)
}
@ -672,7 +671,7 @@ class Nlu {
...processedData,
nluProcessingTime: processingTime - processedData?.executionTime // In ms, NLU processing time only
})
} catch (e) /* istanbul ignore next */ {
} catch (e) {
LogHelper[e.type](e.obj.message)
if (!opts.mute) {

View File

@ -2,6 +2,8 @@
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-nocheck
import type { Socket } from 'node:net'
import { Server as SocketIOServer } from 'socket.io'
import {
@ -11,15 +13,17 @@ import {
STT_PROVIDER,
TTS_PROVIDER
} from '@/constants'
import { HTTP_SERVER, TCP_CLIENT } from '@/core'
import { HTTP_SERVER, TCP_CLIENT, ASR, STT, TTS } from '@/core'
import { LogHelper } from '@/helpers/log-helper'
import Asr from '@/core/asr'
import Asr from '@/core/asr/asr'
// import Stt from '@/stt/stt'
// import Tts from '@/tts/tts'
export default class SocketServer {
private static instance: SocketServer
public socket: Socket
constructor() {
if (!SocketServer.instance) {
LogHelper.title('Socket Server')
@ -37,38 +41,38 @@ export default class SocketServer {
})
: new SocketIOServer(httpServer)
// TODO: instantiate new socket server
io.on('connection', (socket) => {
LogHelper.title('Client')
LogHelper.success('Connected')
this.socket = socket
// Init
socket.on('init', async (data) => {
this.socket.on('init', async (data) => {
LogHelper.info(`Type: ${data}`)
LogHelper.info(`Socket id: ${socket.id}`)
LogHelper.info(`Socket id: ${this.socket.id}`)
// TODO
// const provider = await addProvider(socket.id)
// Check whether the TCP client is connected to the TCP server
if (TCP_CLIENT.isConnected) {
socket.emit('ready')
this.socket.emit('ready')
} else {
TCP_CLIENT.ee.on('connected', () => {
socket.emit('ready')
this.socket.emit('ready')
})
}
if (data === 'hotword-node') {
// Hotword triggered
socket.on('hotword-detected', (data) => {
this.socket.on('hotword-detected', (data) => {
LogHelper.title('Socket')
LogHelper.success(`Hotword ${data.hotword} detected`)
socket.broadcast.emit('enable-record')
this.socket.broadcast.emit('enable-record')
})
} else {
const asr = new Asr()
let sttState = 'disabled'
let ttsState = 'disabled'
@ -81,6 +85,7 @@ export default class SocketServer {
// TODO
// provider.brain.stt = new Stt(socket, STT_PROVIDER)
// provider.brain.stt.init(() => null)
await STT.init()
}
if (HAS_TTS) {
ttsState = 'enabled'
@ -88,6 +93,7 @@ export default class SocketServer {
// TODO
// provider.brain.tts = new Tts(socket, TTS_PROVIDER)
// provider.brain.tts.init('en', () => null)
await TTS.init()
}
LogHelper.title('Initialization')
@ -95,11 +101,11 @@ export default class SocketServer {
LogHelper.success(`TTS ${ttsState}`)
// Listen for new utterance
socket.on('utterance', async (data) => {
this.socket.on('utterance', async (data) => {
LogHelper.title('Socket')
LogHelper.info(`${data.client} emitted: ${data.value}`)
socket.emit('is-typing', true)
this.socket.emit('is-typing', true)
// TODO
// const utterance = data.value
@ -115,10 +121,9 @@ export default class SocketServer {
})
// Handle automatic speech recognition
socket.on('recognize', async (data) => {
this.socket.on('recognize', async (data) => {
try {
// TODO
await asr.run(data, provider.brain.stt)
await ASR.encode(data)
} catch (e) {
LogHelper[e.type](e.obj.message)
}
@ -126,9 +131,9 @@ export default class SocketServer {
}
})
socket.once('disconnect', () => {
this.socket.once('disconnect', () => {
// TODO
// deleteProvider(socket.id)
// deleteProvider(this.socket.id)
})
})
}

View File

@ -0,0 +1,73 @@
import path from 'node:path'
import fs from 'node:fs'
import wav from 'node-wav'
import { Model } from 'stt'
import type { STTParserFacade } from '@/core/stt/types'
import { BIN_PATH } from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
export class CoquiSTTParser implements STTParserFacade {
private readonly name = 'Coqui STT Parser'
private readonly model: Model | undefined = undefined
private readonly desiredSampleRate: number = 16_000
constructor() {
LogHelper.title(this.name)
LogHelper.success('New instance')
const modelPath = path.join(BIN_PATH, 'coqui', 'model.tflite')
const scorerPath = path.join(BIN_PATH, 'coqui', 'huge-vocabulary.scorer')
LogHelper.info(`Loading model from file ${modelPath}...`)
if (!fs.existsSync(modelPath)) {
LogHelper.error(
`Cannot find ${modelPath}. You can set up the offline STT by running: "npm run setup:offline-stt"`
)
}
if (!fs.existsSync(scorerPath)) {
LogHelper.error(
`Cannot find ${scorerPath}. You can setup the offline STT by running: "npm run setup:offline-stt"`
)
}
try {
this.model = new Model(modelPath)
} catch (e) {
throw Error(`${this.name}: failed to load the model. ${e}`)
}
this.desiredSampleRate = this.model.sampleRate()
try {
this.model.enableExternalScorer(scorerPath)
} catch (e) {
throw Error(`${this.name}: failed to enable external scorer. ${e}`)
}
LogHelper.success('Parser initialized')
}
/**
* Read audio buffer and return the transcript (decoded string)
*/
public async parse(buffer: Buffer): Promise<string | null> {
const wavDecode = wav.decode(buffer)
if (this.model) {
if (wavDecode.sampleRate < this.desiredSampleRate) {
LogHelper.warning(
`Original sample rate (${wavDecode.sampleRate}) is lower than ${this.desiredSampleRate}Hz. Up-sampling might produce erratic speech recognition`
)
}
// Decoded string
return this.model.stt(buffer)
}
return null
}
}

View File

@ -0,0 +1,67 @@
import path from 'node:path'
import stt, { SpeechClient } from '@google-cloud/speech'
import type { STTParserFacade } from '@/core/stt/types'
import { LANG, VOICE_CONFIG_PATH } from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
export class GoogleCloudSTTParser implements STTParserFacade {
private readonly name = 'Google Cloud STT Parser'
private readonly client: SpeechClient | undefined = undefined
constructor() {
LogHelper.title(this.name)
LogHelper.success('New instance')
/**
* Initialize Google Cloud Speech-to-Text based on the credentials in the JSON file
* the env variable "GOOGLE_APPLICATION_CREDENTIALS" provides the JSON file path
*/
process.env['GOOGLE_APPLICATION_CREDENTIALS'] = path.join(
VOICE_CONFIG_PATH,
'google-cloud.json'
)
try {
this.client = new stt.SpeechClient()
LogHelper.success('Parser initialized')
} catch (e) {
LogHelper.error(`${this.name}: ${e}`)
}
}
/**
* Read audio buffer and return the transcript (decoded string)
*/
public async parse(buffer: Buffer): Promise<string | null> {
if (this.client) {
const audioBytes = buffer.toString('base64')
const audio = { content: audioBytes }
try {
const [res] = await this.client.recognize({
audio,
config: {
languageCode: LANG,
encoding: 'LINEAR16',
sampleRateHertz: 16000
}
})
// Decoded string
return (res.results || [])
.map((data) => data.alternatives && data.alternatives[0]?.transcript)
.join('\n')
} catch (e) {
LogHelper.error(`${this.name}: ${e}`)
}
} else {
LogHelper.error(`${this.name}: not initialized`)
}
return null
}
}

View File

@ -0,0 +1,65 @@
import path from 'node:path'
import fs from 'node:fs'
import { Duplex } from 'node:stream'
import Stt from 'ibm-watson/speech-to-text/v1'
import { IamAuthenticator } from 'ibm-watson/auth'
import type { STTParserFacade } from '@/core/stt/types'
import type { WatsonVoiceConfiguration } from '@/schemas/voice-config-schemas'
import { LANG, VOICE_CONFIG_PATH } from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
export class WatsonSTTParser implements STTParserFacade {
private readonly name = 'Watson STT Parser'
private readonly client: Stt | undefined = undefined
constructor() {
LogHelper.title(this.name)
LogHelper.success('New instance')
const config: WatsonVoiceConfiguration = JSON.parse(
fs.readFileSync(path.join(VOICE_CONFIG_PATH, 'watson-stt.json'), 'utf8')
)
try {
this.client = new Stt({
authenticator: new IamAuthenticator({ apikey: config.apikey }),
serviceUrl: config.url
})
LogHelper.success('Parser initialized')
} catch (e) {
LogHelper.error(`${this.name}: ${e}`)
}
}
/**
* Read audio buffer and return the transcript (decoded string)
*/
public async parse(buffer: Buffer): Promise<string | null> {
if (this.client) {
const stream = new Duplex()
stream.push(buffer)
stream.push(null)
try {
const { result } = await this.client.recognize({
contentType: 'audio/wav',
model: `${LANG}_BroadbandModel`,
audio: stream
})
// Decoded string
return (result.results || [])
.map((data) => data.alternatives && data.alternatives[0]?.transcript)
.join('\n')
} catch (e) {
LogHelper.error(`${this.name}: ${e}`)
}
}
return null
}
}

132
server/src/core/stt/stt.ts Normal file
View File

@ -0,0 +1,132 @@
import fs from 'node:fs'
import path from 'node:path'
import type { ASRAudioFormat } from '@/core/asr/types'
import type { STTParser } from '@/core/stt/types'
import { STT_PROVIDER, VOICE_CONFIG_PATH } from '@/constants'
import { SOCKET_SERVER, ASR } from '@/core'
import { STTParserNames, STTProviders } from '@/core/stt/types'
import { LogHelper } from '@/helpers/log-helper'
export default class STT {
private static instance: STT
private parser: STTParser = undefined
constructor() {
if (!STT.instance) {
LogHelper.title('STT')
LogHelper.success('New instance')
STT.instance = this
}
}
public get isParserReady(): boolean {
return !!this.parser
}
/**
* Initialize the STT provider
*/
public async init(): Promise<boolean> {
LogHelper.info('Initializing STT...')
if (!Object.values(STTProviders).includes(STT_PROVIDER as STTProviders)) {
LogHelper.error(
`The STT provider "${STT_PROVIDER}" does not exist or is not yet supported`
)
return false
}
if (
STT_PROVIDER === STTProviders.GoogleCloudSTT &&
typeof process.env['GOOGLE_APPLICATION_CREDENTIALS'] === 'undefined'
) {
process.env['GOOGLE_APPLICATION_CREDENTIALS'] = path.join(
VOICE_CONFIG_PATH,
'google-cloud.json'
)
} else if (
typeof process.env['GOOGLE_APPLICATION_CREDENTIALS'] !== 'undefined' &&
process.env['GOOGLE_APPLICATION_CREDENTIALS'].indexOf(
'google-cloud.json'
) === -1
) {
LogHelper.warning(
`The "GOOGLE_APPLICATION_CREDENTIALS" env variable is already settled with the following value: "${process.env['GOOGLE_APPLICATION_CREDENTIALS']}"`
)
}
// Dynamically attribute the parser
const parser = await import(
path.join(
__dirname,
'parsers',
STTParserNames[STT_PROVIDER as keyof typeof STTParserNames]
)
)
this.parser = new parser() as STTParser
LogHelper.title('STT')
LogHelper.success('STT initialized')
return true
}
/**
* Read the speech file and transcribe
*/
public async transcribe(audioFilePath: string): Promise<boolean> {
LogHelper.info('Parsing WAVE file...')
if (!fs.existsSync(audioFilePath)) {
LogHelper.error(`The WAVE file "${audioFilePath}" does not exist`)
return false
}
const buffer = fs.readFileSync(audioFilePath)
const transcript = await this.parser?.parse(buffer)
if (transcript && transcript !== '') {
// Forward the string to the client
this.forward(transcript)
} else {
this.deleteAudios()
}
return true
}
/**
* Forward string output to the client
* and delete audio files once it has been forwarded
*/
private forward(str: string): void {
SOCKET_SERVER.socket.emit('recognized', str, (confirmation: string) => {
if (confirmation === 'string-received') {
this.deleteAudios()
}
})
LogHelper.success(`Parsing result: ${str}`)
}
/**
* Delete audio files
*/
private deleteAudios(): void {
const audioPaths = Object.keys(ASR.audioPaths)
for (let i = 0; i < audioPaths.length; i += 1) {
const audioType = audioPaths[i] as ASRAudioFormat
const audioPath = ASR.audioPaths[audioType]
if (fs.existsSync(audioPath)) {
fs.unlinkSync(audioPath)
}
}
}
}

View File

@ -0,0 +1,25 @@
import type { CoquiSTTParser } from '@/core/stt/parsers/coqui-stt-parser'
import type { GoogleCloudSTTParser } from '@/core/stt/parsers/google-cloud-stt-parser'
import type { WatsonSTTParser } from '@/core/stt/parsers/watson-stt-parser'
export enum STTProviders {
GoogleCloudSTT = 'google-cloud-stt',
WatsonSTT = 'watson-stt',
CoquiSTT = 'coqui-stt'
}
export enum STTParserNames {
GoogleCloudSTT = 'google-cloud-stt-parser',
WatsonSTT = 'watson-stt-parser',
CoquiSTT = 'coqui-stt-parser'
}
export type STTParser =
| GoogleCloudSTTParser
| WatsonSTTParser
| CoquiSTTParser
| undefined
export interface STTParserFacade {
parse(buffer: Buffer): Promise<string | null>
}

View File

@ -60,7 +60,6 @@ class Synchronizer {
* Google Drive synchronization method
*/
googleDrive() {
/* istanbul ignore next */
return new Promise((resolve, reject) => {
const driveFolderName = `leon-${this.classification.domain}-${this.classification.skill}`
const folderMimeType = 'application/vnd.google-apps.folder'

View File

@ -0,0 +1,105 @@
import type { Stream } from 'node:stream'
import path from 'node:path'
import fs from 'node:fs'
import Ffmpeg from 'fluent-ffmpeg'
import { Polly, SynthesizeSpeechCommand } from '@aws-sdk/client-polly'
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
import type { LongLanguageCode } from '@/types'
import type { TTSSynthesizerFacade, SynthesizeResult } from '@/core/tts/types'
import type { AmazonVoiceConfiguration } from '@/schemas/voice-config-schemas'
import { LANG, VOICE_CONFIG_PATH, TMP_PATH } from '@/constants'
import { TTS } from '@/core'
import { LogHelper } from '@/helpers/log-helper'
import { StringHelper } from '@/helpers/string-helper'
const VOICES = {
'en-US': {
VoiceId: 'Matthew'
},
'fr-FR': {
VoiceId: 'Mathieu'
}
}
export class AmazonPollyTTSSynthesizer implements TTSSynthesizerFacade {
private readonly name = 'Amazon Polly TTS Synthesizer'
private readonly client: Polly | undefined = undefined
private readonly lang: LongLanguageCode = LANG as LongLanguageCode
constructor(lang: LongLanguageCode) {
LogHelper.title(this.name)
LogHelper.success('New instance')
const config: AmazonVoiceConfiguration = JSON.parse(
fs.readFileSync(path.join(VOICE_CONFIG_PATH, 'amazon.json'), 'utf8')
)
try {
this.lang = lang
this.client = new Polly(config)
LogHelper.success('Synthesizer initialized')
} catch (e) {
LogHelper.error(`${this.name}: ${e}`)
}
}
public async synthesize(speech: string): Promise<SynthesizeResult | null> {
const audioFilePath = path.join(
TMP_PATH,
`${Date.now()}-${StringHelper.random(4)}.mp3`
)
try {
if (this.client) {
const result = await this.client.send(
new SynthesizeSpeechCommand({
OutputFormat: 'mp3',
VoiceId: VOICES[this.lang].VoiceId,
Text: speech
})
)
// Cast to Node.js stream as the SDK returns a custom type that does not have a pipe method
const AudioStream = result.AudioStream as Stream
if (!AudioStream) {
LogHelper.error(`${this.name}: AudioStream is undefined`)
return null
}
const wStream = fs.createWriteStream(audioFilePath)
AudioStream.pipe(wStream)
await new Promise((resolve, reject) => {
wStream.on('finish', resolve)
wStream.on('error', reject)
})
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const ffmpeg = new (Ffmpeg as any)()
ffmpeg.setFfmpegPath(ffmpegPath)
ffmpeg.setFfprobePath(ffprobePath)
const data = await ffmpeg.input(audioFilePath).ffprobe()
const duration = data.streams[0].duration * 1_000
TTS.em.emit('saved', duration)
return {
audioFilePath,
duration
}
}
LogHelper.error(`${this.name}: client is not defined yet`)
} catch (e) {
LogHelper.error(`${this.name}: Failed to synthesize speech: ${e} `)
}
return null
}
}

141
server/src/core/tts/tts.ts Normal file
View File

@ -0,0 +1,141 @@
import path from 'node:path'
import events from 'node:events'
import fs from 'node:fs'
import type { ShortLanguageCode } from '@/types'
import type { TTSSynthesizer } from '@/core/tts/types'
import { SOCKET_SERVER } from '@/core'
import { TTS_PROVIDER, VOICE_CONFIG_PATH } from '@/constants'
import { TTSSynthesizers, TTSProviders } from '@/core/tts/types'
import { LogHelper } from '@/helpers/log-helper'
import { LangHelper } from '@/helpers/lang-helper'
type Speech = {
text: string
isFinalAnswer: boolean
}
export default class TTS {
private static instance: TTS
private synthesizer: TTSSynthesizer = undefined
private speeches: Speech[] = []
public lang: ShortLanguageCode = 'en'
public em = new events.EventEmitter()
constructor() {
if (!TTS.instance) {
LogHelper.title('TTS')
LogHelper.success('New instance')
TTS.instance = this
}
}
/**
* Initialize the TTS provider
*/
public async init(newLang: ShortLanguageCode): Promise<boolean> {
LogHelper.info('Initializing TTS...')
this.lang = newLang || this.lang
if (!Object.values(TTSProviders).includes(TTS_PROVIDER as TTSProviders)) {
LogHelper.error(
`The TTS provider "${TTS_PROVIDER}" does not exist or is not yet supported`
)
return false
}
if (
TTS_PROVIDER === TTSProviders.GoogleCloudTTS &&
typeof process.env['GOOGLE_APPLICATION_CREDENTIALS'] === 'undefined'
) {
process.env['GOOGLE_APPLICATION_CREDENTIALS'] = path.join(
VOICE_CONFIG_PATH,
'google-cloud.json'
)
} else if (
typeof process.env['GOOGLE_APPLICATION_CREDENTIALS'] !== 'undefined' &&
process.env['GOOGLE_APPLICATION_CREDENTIALS'].indexOf(
'google-cloud.json'
) === -1
) {
LogHelper.warning(
`The "GOOGLE_APPLICATION_CREDENTIALS" env variable is already settled with the following value: "${process.env['GOOGLE_APPLICATION_CREDENTIALS']}"`
)
}
// Dynamically attribute the synthesizer
const synthesizer = await import(
path.join(
__dirname,
'synthesizers',
TTSSynthesizers[TTS_PROVIDER as keyof typeof TTSSynthesizers]
)
)
this.synthesizer = new synthesizer(
LangHelper.getLongCode(this.lang)
) as TTSSynthesizer
this.onSaved()
LogHelper.title('TTS')
LogHelper.success('TTS initialized')
return true
}
/**
* Forward buffer audio file and duration to the client
* and delete audio file once it has been forwarded
*/
private async forward(speech: Speech): Promise<void> {
if (this.synthesizer) {
const result = await this.synthesizer.synthesize(speech.text)
if (!result) {
LogHelper.error(
'The TTS synthesizer failed to synthesize the speech as the result is null'
)
} else {
const { audioFilePath, duration } = result
const bitmap = fs.readFileSync(audioFilePath)
SOCKET_SERVER.socket.emit(
'audio-forwarded',
{
buffer: Buffer.from(bitmap),
is_final_answer: speech.isFinalAnswer,
duration
},
(confirmation: string) => {
if (confirmation === 'audio-received') {
fs.unlinkSync(audioFilePath)
}
}
)
}
} else {
LogHelper.error('The TTS synthesizer is not initialized yet')
}
}
/**
* When the synthesizer saved a new audio file
* then shift the queue according to the audio file duration
*/
private onSaved(): void {
this.em.on('saved', (duration) => {
setTimeout(async () => {
this.speeches.shift()
if (this.speeches[0]) {
await this.forward(this.speeches[0])
}
}, duration)
})
}
}

View File

@ -0,0 +1,28 @@
import type { AmazonPollyTTSSynthesizer } from '@/core/tts/synthesizers/amazon-polly-synthesizer'
export enum TTSProviders {
AmazonPolly = 'amazon-polly',
GoogleCloudTTS = 'google-cloud-tts',
WatsonTTS = 'watson-tts',
Flite = 'flite'
}
export enum TTSSynthesizers {
AmazonPolly = 'amazon-polly-synthesizer',
GoogleCloudTTS = 'google-cloud-tts-synthesizer',
WatsonTTS = 'watson-tts-synthesizer',
Flite = 'flite-synthesizer'
}
export type SynthesizeResult = {
audioFilePath: string
duration: number
}
// TODO
// export type TTSSynthesizer = AmazonPollyTTSSynthesizer | FliteTTSSynthesizer | GoogleCloudTTSSynthesizer | WatsonTTSSynthesizer | undefined
export type TTSSynthesizer = AmazonPollyTTSSynthesizer | undefined
export interface TTSSynthesizerFacade {
synthesize(speech: string): Promise<SynthesizeResult | null>
}

3
server/src/declarations.d.ts vendored Normal file
View File

@ -0,0 +1,3 @@
declare module '@ffprobe-installer/ffprobe' {
export const path: string
}

View File

@ -1,7 +1,6 @@
import fs from 'node:fs'
import path from 'node:path'
import { IS_TESTING_ENV } from '@/constants'
import { DateHelper } from '@/helpers/date-helper'
export class LogHelper {
@ -48,12 +47,10 @@ export class LogHelper {
public static error(value: string): void {
const data = `${DateHelper.getDateTime()} - ${value}`
if (!IS_TESTING_ENV) {
if (fs.existsSync(LogHelper.ERRORS_PATH)) {
fs.appendFileSync(LogHelper.ERRORS_PATH, `\n${data}`)
} else {
fs.writeFileSync(LogHelper.ERRORS_PATH, data, { flag: 'wx' })
}
if (fs.existsSync(LogHelper.ERRORS_PATH)) {
fs.appendFileSync(LogHelper.ERRORS_PATH, `\n${data}`)
} else {
fs.writeFileSync(LogHelper.ERRORS_PATH, data, { flag: 'wx' })
}
console.error('\x1b[31m🚨 %s\x1b[0m', value)

View File

@ -1,88 +0,0 @@
import fs from 'node:fs'
import wav from 'node-wav'
import { Model } from 'stt'
import { IS_TESTING_ENV } from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
LogHelper.title('Coqui STT Parser')
const parser = {}
let model = {}
let desiredSampleRate = 16000
/**
* Model and language model paths
*/
parser.conf = {
model: 'bin/coqui/model.tflite',
scorer: 'bin/coqui/huge-vocabulary.scorer'
}
/**
* Load models
*/
parser.init = (args) => {
LogHelper.info(`Loading model from file ${args.model}...`)
if (!fs.existsSync(args.model)) {
LogHelper.error(
`Cannot find ${args.model}. You can set up the offline STT by running: "npm run setup:offline-stt"`
)
return false
}
if (!fs.existsSync(args.scorer)) {
LogHelper.error(
`Cannot find ${args.scorer}. You can setup the offline STT by running: "npm run setup:offline-stt"`
)
return false
}
/* istanbul ignore if */
if (!IS_TESTING_ENV) {
try {
model = new Model(args.model)
} catch (error) {
throw Error(`model.stt: ${error}`)
}
desiredSampleRate = model.sampleRate()
try {
model.enableExternalScorer(args.scorer)
} catch (error) {
throw Error(`model.enableExternalScorer: ${error}`)
}
}
LogHelper.success('Model loaded')
return true
}
/**
* Parse file and infer
*/
parser.parse = (buffer, cb) => {
const wavDecode = wav.decode(buffer)
if (wavDecode.sampleRate < desiredSampleRate) {
LogHelper.warning(
`Original sample rate (${wavDecode.sampleRate}) is lower than ${desiredSampleRate}Hz. Up-sampling might produce erratic speech recognition`
)
}
/* istanbul ignore if */
if (!IS_TESTING_ENV) {
const string = model.stt(buffer)
cb({ string })
}
return true
}
export default parser

View File

@ -1,60 +0,0 @@
import path from 'node:path'
import stt from '@google-cloud/speech'
import { LANG } from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
LogHelper.title('Google Cloud STT Parser')
const parser = {}
let client = {}
parser.conf = {
languageCode: LANG,
encoding: 'LINEAR16',
sampleRateHertz: 16000
}
/**
* Initialize Google Cloud Speech-to-Text based on the credentials in the JSON file
* the env variable "GOOGLE_APPLICATION_CREDENTIALS" provides the JSON file path
*/
parser.init = () => {
process.env.GOOGLE_APPLICATION_CREDENTIALS = path.join(
process.cwd(),
'core/config/voice/google-cloud.json'
)
try {
client = new stt.SpeechClient()
LogHelper.success('Parser initialized')
} catch (e) {
LogHelper.error(`Google Cloud STT: ${e}`)
}
}
/**
* Read buffer and give back a string
*/
parser.parse = async (buffer, cb) => {
const audioBytes = buffer.toString('base64')
const audio = { content: audioBytes }
try {
const res = await client.recognize({
audio,
config: parser.conf
})
const string = res[0].results
.map((data) => data.alternatives[0].transcript)
.join('\n')
cb({ string })
} catch (e) {
LogHelper.error(`Google Cloud STT: ${e}`)
}
}
export default parser

View File

@ -1,133 +0,0 @@
import fs from 'node:fs'
import path from 'node:path'
import { IS_TESTING_ENV } from '@/constants'
import Asr from '@/core/asr'
import { LogHelper } from '@/helpers/log-helper'
class Stt {
constructor(socket, provider) {
this.socket = socket
this.provider = provider
this.providers = ['google-cloud-stt', 'watson-stt', 'coqui-stt']
this.parser = {}
LogHelper.title('STT')
LogHelper.success('New instance')
}
/**
* Initialize the STT provider
*/
init(cb) {
LogHelper.info('Initializing STT...')
if (!this.providers.includes(this.provider)) {
LogHelper.error(
`The STT provider "${this.provider}" does not exist or is not yet supported`
)
return false
}
/* istanbul ignore next */
if (
this.provider === 'google-cloud-stt' &&
typeof process.env.GOOGLE_APPLICATION_CREDENTIALS === 'undefined'
) {
process.env.GOOGLE_APPLICATION_CREDENTIALS = path.join(
process.cwd(),
'core/config/voice/google-cloud.json'
)
} else if (
typeof process.env.GOOGLE_APPLICATION_CREDENTIALS !== 'undefined' &&
process.env.GOOGLE_APPLICATION_CREDENTIALS.indexOf(
'google-cloud.json'
) === -1
) {
LogHelper.warning(
`The "GOOGLE_APPLICATION_CREDENTIALS" env variable is already settled with the following value: "${process.env.GOOGLE_APPLICATION_CREDENTIALS}"`
)
}
/* istanbul ignore if */
if (!IS_TESTING_ENV) {
// Dynamically attribute the parser
this.parser = require(`${__dirname}/${this.provider}/parser`)
this.parser.default.init(this.parser.default.conf)
}
LogHelper.title('STT')
LogHelper.success('STT initialized')
cb(this)
return true
}
/**
* Forward string output to the client
* and delete audio files once it has been forwarded
*/
forward(string) {
this.socket.emit('recognized', string, (confirmation) => {
/* istanbul ignore next */
if (confirmation === 'string-received') {
Stt.deleteAudios()
}
})
LogHelper.success(`Parsing result: ${string}`)
}
/**
* Read the speech file and parse
*/
parse(file) {
LogHelper.info('Parsing WAVE file...')
if (!fs.existsSync(file)) {
LogHelper.error(`The WAVE file "${file}" does not exist`)
return false
}
const buffer = fs.readFileSync(file)
/* istanbul ignore if */
if (!IS_TESTING_ENV) {
this.parser.default.parse(buffer, (data) => {
if (data.string !== '') {
// Forward the string to the client
this.forward(data.string)
} else {
Stt.deleteAudios()
}
})
}
return true
}
/**
* Delete audio files
*/
static deleteAudios() {
return new Promise((resolve) => {
const audios = Object.keys(Asr.audios)
for (let i = 0; i < audios.length; i += 1) {
const audio = Asr.audios[audios[i]]
if (fs.existsSync(audio)) {
fs.unlinkSync(Asr.audios[audios[i]])
}
if (i + 1 === audios.length) {
resolve()
}
}
})
}
}
export default Stt

View File

@ -1,79 +0,0 @@
import fs from 'node:fs'
import path from 'node:path'
import { Duplex } from 'node:stream'
import Stt from 'ibm-watson/speech-to-text/v1'
import { IamAuthenticator } from 'ibm-watson/auth'
import { LANG } from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
LogHelper.title('Watson STT Parser')
const parser = {}
let client = {}
parser.conf = {
contentType: 'audio/wav',
model: `${LANG}_BroadbandModel`
}
/**
* Initialize Watson Speech-to-Text based on credentials in the JSON file
*/
parser.init = () => {
const config = JSON.parse(
fs.readFileSync(
path.join(process.cwd(), 'core/config/voice/watson-stt.json'),
'utf8'
)
)
try {
client = new Stt({
authenticator: new IamAuthenticator({ apikey: config.apikey }),
serviceUrl: config.url
})
LogHelper.success('Parser initialized')
} catch (e) {
LogHelper.error(`Watson STT: ${e}`)
}
}
/**
* Read buffer and give back a string
*/
parser.parse = async (buffer, cb) => {
const stream = new Duplex()
stream.push(buffer)
stream.push(null)
parser.conf.audio = stream
client
.recognize(parser.conf)
.then(({ result }) => {
const string = result.results
.map((data) => data.alternatives[0].transcript)
.join('\n')
cb({ string })
})
.catch((err) => {
LogHelper.error(`Watson STT: ${err}`)
})
client.recognize(parser.conf, (err, res) => {
if (err) {
LogHelper.error(`Watson STT: ${err}`)
} else {
const string = res.results
.map((data) => data.alternatives[0].transcript)
.join('\n')
cb({ string })
}
})
}
export default parser

View File

@ -6,6 +6,7 @@ import Ffmpeg from 'fluent-ffmpeg'
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
import { TMP_PATH } from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
import { StringHelper } from '@/helpers/string-helper'
@ -52,9 +53,10 @@ synthesizer.init = (lang) => {
* Save string to audio file
*/
synthesizer.save = (speech, em, cb) => {
const file = `${__dirname}/../../tmp/${Date.now()}-${StringHelper.random(
4
)}.mp3`
const file = path.join(
TMP_PATH,
`${Date.now()}-${StringHelper.random(4)}.mp3`
)
synthesizer.conf.Text = speech
@ -74,7 +76,7 @@ synthesizer.save = (speech, em, cb) => {
ffmpeg.input(file).ffprobe((err, data) => {
if (err) LogHelper.error(err)
else {
const duration = data.streams[0].duration * 1000
const duration = data.streams[0].duration * 1_000
em.emit('saved', duration)
cb(file, duration)
}

View File

@ -1,10 +1,12 @@
import { spawn } from 'node:child_process'
import fs from 'node:fs'
import path from 'node:path'
import Ffmpeg from 'fluent-ffmpeg'
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
import { TMP_PATH } from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
import { StringHelper } from '@/helpers/string-helper'
@ -25,14 +27,12 @@ synthesizer.conf = {
synthesizer.init = (lang) => {
const flitePath = 'bin/flite/flite'
/* istanbul ignore if */
if (lang !== 'en-US') {
LogHelper.warning(
'The Flite synthesizer only accepts the "en-US" language for the moment'
)
}
/* istanbul ignore if */
if (!fs.existsSync(flitePath)) {
LogHelper.error(
`Cannot find ${flitePath} You can set up the offline TTS by running: "npm run setup:offline-tts"`
@ -49,9 +49,10 @@ synthesizer.init = (lang) => {
* Save string to audio file
*/
synthesizer.save = (speech, em, cb) => {
const file = `${__dirname}/../../tmp/${Date.now()}-${StringHelper.random(
4
)}.wav`
const file = path.join(
TMP_PATH,
`${Date.now()}-${StringHelper.random(4)}.wav`
)
const process = spawn('bin/flite/flite', [
speech,
'--setf',
@ -66,7 +67,6 @@ synthesizer.save = (speech, em, cb) => {
file
])
/* istanbul ignore next */
// Handle error
process.stderr.on('data', (data) => {
LogHelper.error(data.toString())
@ -79,10 +79,9 @@ synthesizer.save = (speech, em, cb) => {
// Get file duration thanks to ffprobe
ffmpeg.input(file).ffprobe((err, data) => {
/* istanbul ignore if */
if (err) LogHelper.error(err)
else {
const duration = data.streams[0].duration * 1000
const duration = data.streams[0].duration * 1_000
em.emit('saved', duration)
cb(file, duration)
}

View File

@ -6,6 +6,7 @@ import Ffmpeg from 'fluent-ffmpeg'
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
import { TMP_PATH } from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
import { StringHelper } from '@/helpers/string-helper'
@ -58,9 +59,10 @@ synthesizer.init = (lang) => {
* Save string to audio file
*/
synthesizer.save = (speech, em, cb) => {
const file = `${__dirname}/../../tmp/${Date.now()}-${StringHelper.random(
4
)}.mp3`
const file = path.join(
TMP_PATH,
`${Date.now()}-${StringHelper.random(4)}.mp3`
)
synthesizer.conf.input = { text: speech }
@ -84,7 +86,7 @@ synthesizer.save = (speech, em, cb) => {
ffmpeg.input(file).ffprobe((err, data) => {
if (err) LogHelper.error(err)
else {
const duration = data.streams[0].duration * 1000
const duration = data.streams[0].duration * 1_000
em.emit('saved', duration)
cb(file, duration)
}

View File

@ -35,7 +35,6 @@ class Tts {
return false
}
/* istanbul ignore next */
if (
this.provider === 'google-cloud-tts' &&
typeof process.env.GOOGLE_APPLICATION_CREDENTIALS === 'undefined'
@ -59,6 +58,7 @@ class Tts {
this.synthesizer = require(`${__dirname}/${this.provider}/synthesizer`)
this.synthesizer.default.init(LangHelper.getLongCode(this.lang))
// TODO: do not use event emitter; and use async/await
this.onSaved()
LogHelper.title('TTS')
@ -75,9 +75,8 @@ class Tts {
*/
forward(speech) {
this.synthesizer.default.save(speech.text, this.em, (file, duration) => {
/* istanbul ignore next */
const bitmap = fs.readFileSync(file)
/* istanbul ignore next */
this.socket.emit(
'audio-forwarded',
{

View File

@ -7,6 +7,7 @@ import Ffmpeg from 'fluent-ffmpeg'
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
import { TMP_PATH } from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
import { StringHelper } from '@/helpers/string-helper'
@ -56,9 +57,10 @@ synthesizer.init = (lang) => {
* Save string to audio file
*/
synthesizer.save = (speech, em, cb) => {
const file = `${__dirname}/../../tmp/${Date.now()}-${StringHelper.random(
4
)}.wav`
const file = path.join(
TMP_PATH,
`${Date.now()}-${StringHelper.random(4)}.wav`
)
synthesizer.conf.text = speech
@ -78,7 +80,7 @@ synthesizer.save = (speech, em, cb) => {
ffmpeg.input(file).ffprobe((err, data) => {
if (err) LogHelper.error(err)
else {
const duration = data.streams[0].duration * 1000
const duration = data.streams[0].duration * 1_000
em.emit('saved', duration)
cb(file, duration)
}

View File

@ -2,7 +2,7 @@ import type { langs } from '@@/core/langs.json'
/**
* Contain common/shared types that are universal across the project
* and cannot be placed in the respective core chunks
* and cannot be placed in the respective core nodes
*/
/**

View File

@ -1,6 +1,6 @@
import fs from 'node:fs'
import Asr from '@/core/asr'
import Asr from '@/core/asr/asr'
import Stt from '@/stt/stt'
describe('ASR', () => {