mirror of
https://github.com/leon-ai/leon.git
synced 2024-10-26 18:18:46 +03:00
feat: simple coqui-ai stt integration
This commit is contained in:
parent
e6246d1f8f
commit
86a4816b77
0
bin/coqui/.gitkeep
Normal file
0
bin/coqui/.gitkeep
Normal file
@ -78,6 +78,7 @@
|
||||
"node-wav": "0.0.2",
|
||||
"socket.io": "^4.4.0",
|
||||
"socket.io-client": "^4.4.0",
|
||||
"stt": "^1.1.0",
|
||||
"superagent": "^6.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
@ -10,23 +10,23 @@ import os from '@/helpers/os'
|
||||
export default () => new Promise(async (resolve, reject) => {
|
||||
log.info('Setting up offline speech-to-text...')
|
||||
|
||||
const destDeepSpeechFolder = 'bin/deepspeech'
|
||||
const destCoquiFolder = 'bin/coqui'
|
||||
const tmpDir = 'scripts/tmp'
|
||||
const deepSpeechVersion = '0.9.3'
|
||||
const coquiVersion = '1.0.0'
|
||||
let downloader = 'wget'
|
||||
if (os.get().type === 'macos') {
|
||||
downloader = 'curl -L -O'
|
||||
}
|
||||
|
||||
if (!fs.existsSync(`${destDeepSpeechFolder}/deepspeech.scorer`)) {
|
||||
if (!fs.existsSync(`${destCoquiFolder}/model.tflite`)) {
|
||||
try {
|
||||
log.info('Downloading pre-trained model...')
|
||||
await command(`cd ${tmpDir} && ${downloader} https://github.com/mozilla/DeepSpeech/releases/download/v${deepSpeechVersion}/deepspeech-${deepSpeechVersion}-models.pbmm`, { shell: true })
|
||||
await command(`cd ${tmpDir} && ${downloader} https://github.com/mozilla/DeepSpeech/releases/download/v${deepSpeechVersion}/deepspeech-${deepSpeechVersion}-models.scorer`, { shell: true })
|
||||
await command(`cd ${tmpDir} && ${downloader} https://github.com/coqui-ai/STT-models/releases/download/english/coqui/v${coquiVersion}-huge-vocab/model.tflite`, { shell: true })
|
||||
await command(`cd ${tmpDir} && ${downloader} https://github.com/coqui-ai/STT-models/releases/download/english/coqui/v${coquiVersion}-huge-vocab/huge-vocabulary.scorer`, { shell: true })
|
||||
log.success('Pre-trained model download done')
|
||||
log.info('Moving...')
|
||||
await command(`mv -f ${tmpDir}/deepspeech-${deepSpeechVersion}-models.pbmm ${destDeepSpeechFolder}/deepspeech.pbmm`, { shell: true })
|
||||
await command(`mv -f ${tmpDir}/deepspeech-${deepSpeechVersion}-models.scorer ${destDeepSpeechFolder}/deepspeech.scorer`, { shell: true })
|
||||
await command(`mv -f ${tmpDir}/model.tflite ${destCoquiFolder}/model.tflite`, { shell: true })
|
||||
await command(`mv -f ${tmpDir}/huge-vocabulary.scorer ${destCoquiFolder}/huge-vocabulary.scorer`, { shell: true })
|
||||
log.success('Move done')
|
||||
log.success('Offline speech-to-text installed')
|
||||
|
||||
|
98
server/src/stt/coqui-stt/parser.js
Normal file
98
server/src/stt/coqui-stt/parser.js
Normal file
@ -0,0 +1,98 @@
|
||||
import wav from 'node-wav'
|
||||
import fs from 'fs'
|
||||
|
||||
import log from '@/helpers/log'
|
||||
|
||||
log.title('Coqui-ai Parser')
|
||||
|
||||
const parser = { }
|
||||
let STT = { }
|
||||
|
||||
/* istanbul ignore next */
|
||||
try {
|
||||
STT = require('stt-gpu') // eslint-disable-line global-require, import/no-unresolved
|
||||
|
||||
log.success('GPU version found')
|
||||
} catch (eGpu) {
|
||||
log.info('GPU version not found, trying to get the CPU version...')
|
||||
|
||||
try {
|
||||
STT = require('stt') // eslint-disable-line global-require, import/no-unresolved
|
||||
|
||||
log.success('CPU version found')
|
||||
} catch (eCpu) {
|
||||
log.error(`No Coqui-ai library found:\nGPU: ${eGpu}\nCPU: ${eCpu}`)
|
||||
}
|
||||
}
|
||||
|
||||
let model = { }
|
||||
let desiredSampleRate = 16000
|
||||
|
||||
/**
|
||||
* Model and language model paths
|
||||
*/
|
||||
parser.conf = {
|
||||
model: 'bin/coqui/model.tflite',
|
||||
scorer: 'bin/coqui/huge-vocabulary.scorer'
|
||||
}
|
||||
|
||||
/**
|
||||
* Load models
|
||||
*/
|
||||
parser.init = (args) => {
|
||||
log.info(`Loading model from file ${args.model}...`)
|
||||
|
||||
if (!fs.existsSync(args.model)) {
|
||||
log.error(`Cannot find ${args.model}. You can setup the offline STT by running: "npm run setup:offline-stt"`)
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
if (!fs.existsSync(args.scorer)) {
|
||||
log.error(`Cannot find ${args.scorer}. You can setup the offline STT by running: "npm run setup:offline-stt"`)
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/* istanbul ignore if */
|
||||
if (process.env.LEON_NODE_ENV !== 'testing') {
|
||||
try {
|
||||
model = new STT.Model(args.model)
|
||||
} catch (error) {
|
||||
throw Error(`model.stt: ${error}`)
|
||||
}
|
||||
desiredSampleRate = model.sampleRate()
|
||||
|
||||
try {
|
||||
model.enableExternalScorer(args.scorer)
|
||||
} catch (error) {
|
||||
throw Error(`model.enableExternalScorer: ${error}`)
|
||||
}
|
||||
}
|
||||
|
||||
log.success('Model loaded')
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse file and infer
|
||||
*/
|
||||
parser.parse = (buffer, cb) => {
|
||||
const wavDecode = wav.decode(buffer)
|
||||
|
||||
if (wavDecode.sampleRate < desiredSampleRate) {
|
||||
log.warning(`Original sample rate (${wavDecode.sampleRate}) is lower than ${desiredSampleRate}Hz. Up-sampling might produce erratic speech recognition`)
|
||||
}
|
||||
|
||||
/* istanbul ignore if */
|
||||
if (process.env.LEON_NODE_ENV !== 'testing') {
|
||||
const string = model.stt(buffer)
|
||||
|
||||
cb({ string })
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
export default parser
|
@ -10,7 +10,8 @@ class Stt {
|
||||
this.providers = [
|
||||
'deepspeech',
|
||||
'google-cloud-stt',
|
||||
'watson-stt'
|
||||
'watson-stt',
|
||||
'coqui-stt'
|
||||
]
|
||||
this.parser = { }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user