1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-08-16 21:50:33 +03:00

feat: simple coqui-ai stt integration

This commit is contained in:
Johann Barbie 2022-01-23 12:50:51 +01:00
parent e6246d1f8f
commit 86a4816b77
5 changed files with 108 additions and 8 deletions

0
bin/coqui/.gitkeep Normal file
View File

View File

@ -78,6 +78,7 @@
"node-wav": "0.0.2",
"socket.io": "^4.4.0",
"socket.io-client": "^4.4.0",
"stt": "^1.1.0",
"superagent": "^6.1.0"
},
"devDependencies": {

View File

@ -10,23 +10,23 @@ import os from '@/helpers/os'
export default () => new Promise(async (resolve, reject) => {
log.info('Setting up offline speech-to-text...')
const destDeepSpeechFolder = 'bin/deepspeech'
const destCoquiFolder = 'bin/coqui'
const tmpDir = 'scripts/tmp'
const deepSpeechVersion = '0.9.3'
const coquiVersion = '1.0.0'
let downloader = 'wget'
if (os.get().type === 'macos') {
downloader = 'curl -L -O'
}
if (!fs.existsSync(`${destDeepSpeechFolder}/deepspeech.scorer`)) {
if (!fs.existsSync(`${destCoquiFolder}/model.tflite`)) {
try {
log.info('Downloading pre-trained model...')
await command(`cd ${tmpDir} && ${downloader} https://github.com/mozilla/DeepSpeech/releases/download/v${deepSpeechVersion}/deepspeech-${deepSpeechVersion}-models.pbmm`, { shell: true })
await command(`cd ${tmpDir} && ${downloader} https://github.com/mozilla/DeepSpeech/releases/download/v${deepSpeechVersion}/deepspeech-${deepSpeechVersion}-models.scorer`, { shell: true })
await command(`cd ${tmpDir} && ${downloader} https://github.com/coqui-ai/STT-models/releases/download/english/coqui/v${coquiVersion}-huge-vocab/model.tflite`, { shell: true })
await command(`cd ${tmpDir} && ${downloader} https://github.com/coqui-ai/STT-models/releases/download/english/coqui/v${coquiVersion}-huge-vocab/huge-vocabulary.scorer`, { shell: true })
log.success('Pre-trained model download done')
log.info('Moving...')
await command(`mv -f ${tmpDir}/deepspeech-${deepSpeechVersion}-models.pbmm ${destDeepSpeechFolder}/deepspeech.pbmm`, { shell: true })
await command(`mv -f ${tmpDir}/deepspeech-${deepSpeechVersion}-models.scorer ${destDeepSpeechFolder}/deepspeech.scorer`, { shell: true })
await command(`mv -f ${tmpDir}/model.tflite ${destCoquiFolder}/model.tflite`, { shell: true })
await command(`mv -f ${tmpDir}/huge-vocabulary.scorer ${destCoquiFolder}/huge-vocabulary.scorer`, { shell: true })
log.success('Move done')
log.success('Offline speech-to-text installed')

View File

@ -0,0 +1,98 @@
import wav from 'node-wav'
import fs from 'fs'
import log from '@/helpers/log'
log.title('Coqui-ai Parser')
const parser = { }
let STT = { }
/* istanbul ignore next */
try {
STT = require('stt-gpu') // eslint-disable-line global-require, import/no-unresolved
log.success('GPU version found')
} catch (eGpu) {
log.info('GPU version not found, trying to get the CPU version...')
try {
STT = require('stt') // eslint-disable-line global-require, import/no-unresolved
log.success('CPU version found')
} catch (eCpu) {
log.error(`No Coqui-ai library found:\nGPU: ${eGpu}\nCPU: ${eCpu}`)
}
}
let model = { }
let desiredSampleRate = 16000
/**
* Model and language model paths
*/
parser.conf = {
model: 'bin/coqui/model.tflite',
scorer: 'bin/coqui/huge-vocabulary.scorer'
}
/**
* Load models
*/
parser.init = (args) => {
log.info(`Loading model from file ${args.model}...`)
if (!fs.existsSync(args.model)) {
log.error(`Cannot find ${args.model}. You can setup the offline STT by running: "npm run setup:offline-stt"`)
return false
}
if (!fs.existsSync(args.scorer)) {
log.error(`Cannot find ${args.scorer}. You can setup the offline STT by running: "npm run setup:offline-stt"`)
return false
}
/* istanbul ignore if */
if (process.env.LEON_NODE_ENV !== 'testing') {
try {
model = new STT.Model(args.model)
} catch (error) {
throw Error(`model.stt: ${error}`)
}
desiredSampleRate = model.sampleRate()
try {
model.enableExternalScorer(args.scorer)
} catch (error) {
throw Error(`model.enableExternalScorer: ${error}`)
}
}
log.success('Model loaded')
return true
}
/**
* Parse file and infer
*/
parser.parse = (buffer, cb) => {
const wavDecode = wav.decode(buffer)
if (wavDecode.sampleRate < desiredSampleRate) {
log.warning(`Original sample rate (${wavDecode.sampleRate}) is lower than ${desiredSampleRate}Hz. Up-sampling might produce erratic speech recognition`)
}
/* istanbul ignore if */
if (process.env.LEON_NODE_ENV !== 'testing') {
const string = model.stt(buffer)
cb({ string })
}
return true
}
export default parser

View File

@ -10,7 +10,8 @@ class Stt {
this.providers = [
'deepspeech',
'google-cloud-stt',
'watson-stt'
'watson-stt',
'coqui-stt'
]
this.parser = { }