mirror of
https://github.com/leon-ai/leon.git
synced 2024-11-27 16:16:48 +03:00
feat(server): make Coqui STT the default STT solution
This commit is contained in:
parent
79be099cc5
commit
7039918760
@ -17,7 +17,7 @@ LEON_AFTER_SPEECH=false
|
||||
# Enable/disable Leon's speech-to-text
|
||||
LEON_STT=false
|
||||
# Speech-to-text provider
|
||||
LEON_STT_PROVIDER=deepspeech
|
||||
LEON_STT_PROVIDER=coqui-stt
|
||||
|
||||
# Enable/disable Leon's text-to-speech
|
||||
LEON_TTS=false
|
||||
|
28
package-lock.json
generated
28
package-lock.json
generated
@ -23,7 +23,6 @@
|
||||
"archiver": "^5.3.0",
|
||||
"async": "^3.2.0",
|
||||
"cross-env": "^7.0.3",
|
||||
"deepspeech": "^0.9.3",
|
||||
"dotenv": "^10.0.0",
|
||||
"execa": "^5.0.0",
|
||||
"fastify": "^3.25.2",
|
||||
@ -7989,21 +7988,6 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/deepspeech": {
|
||||
"version": "0.9.3",
|
||||
"resolved": "https://registry.npmjs.org/deepspeech/-/deepspeech-0.9.3.tgz",
|
||||
"integrity": "sha512-80yqUWEWgzclY9pjukpq0TdkQEbJ6NzqZ899vsLZfa4YcK35uOWIf+ILK55zQ+Ii/TEJ6Eo62Vc2saedQ/AK6w==",
|
||||
"dependencies": {
|
||||
"argparse": "1.0.x",
|
||||
"memory-stream": "1.0.x",
|
||||
"node-pre-gyp": "0.15.x",
|
||||
"node-wav": "0.0.2",
|
||||
"sox-stream": "2.0.x"
|
||||
},
|
||||
"bin": {
|
||||
"deepspeech": "client.js"
|
||||
}
|
||||
},
|
||||
"node_modules/defaults": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/defaults/-/defaults-1.0.3.tgz",
|
||||
@ -26790,18 +26774,6 @@
|
||||
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.2.2.tgz",
|
||||
"integrity": "sha512-FJ3UgI4gIl+PHZm53knsuSFpE+nESMr7M4v9QcgB7S63Kj/6WqMiFQJpBBYz1Pt+66bZpP3Q7Lye0Oo9MPKEdg=="
|
||||
},
|
||||
"deepspeech": {
|
||||
"version": "0.9.3",
|
||||
"resolved": "https://registry.npmjs.org/deepspeech/-/deepspeech-0.9.3.tgz",
|
||||
"integrity": "sha512-80yqUWEWgzclY9pjukpq0TdkQEbJ6NzqZ899vsLZfa4YcK35uOWIf+ILK55zQ+Ii/TEJ6Eo62Vc2saedQ/AK6w==",
|
||||
"requires": {
|
||||
"argparse": "1.0.x",
|
||||
"memory-stream": "1.0.x",
|
||||
"node-pre-gyp": "0.15.x",
|
||||
"node-wav": "0.0.2",
|
||||
"sox-stream": "2.0.x"
|
||||
}
|
||||
},
|
||||
"defaults": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/defaults/-/defaults-1.0.3.tgz",
|
||||
|
@ -66,7 +66,6 @@
|
||||
"archiver": "^5.3.0",
|
||||
"async": "^3.2.0",
|
||||
"cross-env": "^7.0.3",
|
||||
"deepspeech": "^0.9.3",
|
||||
"dotenv": "^10.0.0",
|
||||
"execa": "^5.0.0",
|
||||
"fastify": "^3.25.2",
|
||||
|
@ -1,32 +1 @@
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const os = require('os')
|
||||
|
||||
/**
|
||||
* Trigger preinstall hook to remove DeepSpeech on Windows
|
||||
*/
|
||||
|
||||
console.info('\x1b[36m➡ %s\x1b[0m', 'Running Leon\'s installation...')
|
||||
|
||||
if (os.type().indexOf('Windows') !== -1) {
|
||||
const packageJsonPath = path.join(__dirname, '../../package.json')
|
||||
const packageJson = require(packageJsonPath) // eslint-disable-line global-require
|
||||
|
||||
console.warn('\x1b[33m❗ %s\x1b[0m', 'The Leon\'s voice offline mode is not available on Windows')
|
||||
console.info('\x1b[36m➡ %s\x1b[0m', 'Backing up package.json...')
|
||||
fs.copyFileSync('package.json', 'package.json.backup')
|
||||
console.log('\x1b[32m✔ %s\x1b[0m', 'package.json has been backed up')
|
||||
|
||||
try {
|
||||
if (packageJson?.dependencies.deepspeech) {
|
||||
console.info('\x1b[36m➡ %s\x1b[0m', 'Removing DeepSpeech dependency...')
|
||||
|
||||
delete packageJson.dependencies.deepspeech
|
||||
fs.writeFileSync(packageJsonPath, JSON.stringify(packageJson, null, 2))
|
||||
|
||||
console.log('\x1b[32m✔ %s\x1b[0m', 'DeepSpeech dependency has been removed.')
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('\x1b[31m✖ %s\x1b[0m', 'Failed to remove DeepSpeech dependency')
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,5 @@
|
||||
import fs from 'fs'
|
||||
|
||||
import loader from '@/helpers/loader'
|
||||
import log from '@/helpers/log'
|
||||
import os from '@/helpers/os'
|
||||
|
||||
import train from '../train'
|
||||
import setupDotenv from './setup-dotenv'
|
||||
@ -17,8 +14,6 @@ import setupPythonPackages from './setup-python-packages'
|
||||
*/
|
||||
(async () => {
|
||||
try {
|
||||
const info = os.get()
|
||||
|
||||
// Required env vars to setup
|
||||
process.env.LEON_LANG = 'en-US'
|
||||
process.env.PIPENV_PIPFILE = 'bridges/python/Pipfile'
|
||||
@ -32,12 +27,6 @@ import setupPythonPackages from './setup-python-packages'
|
||||
])
|
||||
await setupPythonPackages()
|
||||
await train()
|
||||
if (info.type === 'windows') {
|
||||
log.info('Windows detected, reinjecting DeepSpeech into package.json...')
|
||||
fs.unlinkSync('package.json')
|
||||
fs.renameSync('package.json.backup', 'package.json')
|
||||
log.success('DeepSpeech has been reinjected into package.json')
|
||||
}
|
||||
|
||||
log.default('')
|
||||
log.success('Hooray! Leon is installed and ready to go!')
|
||||
|
@ -1,95 +0,0 @@
|
||||
import wav from 'node-wav'
|
||||
import fs from 'fs'
|
||||
|
||||
import log from '@/helpers/log'
|
||||
|
||||
log.title('DeepSpeech Parser')
|
||||
|
||||
const parser = { }
|
||||
let DeepSpeech = { }
|
||||
|
||||
/* istanbul ignore next */
|
||||
try {
|
||||
DeepSpeech = require('deepspeech-gpu') // eslint-disable-line global-require, import/no-unresolved
|
||||
|
||||
log.success('GPU version found')
|
||||
} catch (eGpu) {
|
||||
log.info('GPU version not found, trying to get the CPU version...')
|
||||
|
||||
try {
|
||||
DeepSpeech = require('deepspeech') // eslint-disable-line global-require, import/no-unresolved
|
||||
|
||||
log.success('CPU version found')
|
||||
} catch (eCpu) {
|
||||
log.error(`No DeepSpeech library found:\nGPU: ${eGpu}\nCPU: ${eCpu}`)
|
||||
}
|
||||
}
|
||||
|
||||
let model = { }
|
||||
let desiredSampleRate = 16000
|
||||
|
||||
/**
|
||||
* Model and language model paths
|
||||
*/
|
||||
parser.conf = {
|
||||
model: 'bin/deepspeech/deepspeech.pbmm',
|
||||
scorer: 'bin/deepspeech/deepspeech.scorer'
|
||||
}
|
||||
|
||||
/**
|
||||
* Load models
|
||||
*/
|
||||
parser.init = (args) => {
|
||||
/* istanbul ignore if */
|
||||
if (process.env.LEON_LANG !== 'en-US') {
|
||||
log.warning('The DeepSpeech parser only accepts the "en-US" language for the moment')
|
||||
}
|
||||
|
||||
log.info(`Loading model from file ${args.model}...`)
|
||||
|
||||
if (!fs.existsSync(args.model)) {
|
||||
log.error(`Cannot find ${args.model}. You can setup the offline STT by running: "npm run setup:offline-stt"`)
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
if (!fs.existsSync(args.scorer)) {
|
||||
log.error(`Cannot find ${args.scorer}. You can setup the offline STT by running: "npm run setup:offline-stt"`)
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/* istanbul ignore if */
|
||||
if (process.env.LEON_NODE_ENV !== 'testing') {
|
||||
model = new DeepSpeech.Model(args.model)
|
||||
desiredSampleRate = model.sampleRate()
|
||||
|
||||
model.enableExternalScorer(args.scorer)
|
||||
}
|
||||
|
||||
log.success('Model loaded')
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse file and infer
|
||||
*/
|
||||
parser.parse = (buffer, cb) => {
|
||||
const wavDecode = wav.decode(buffer)
|
||||
|
||||
if (wavDecode.sampleRate < desiredSampleRate) {
|
||||
log.warning(`Original sample rate (${wavDecode.sampleRate}) is lower than ${desiredSampleRate}Hz. Up-sampling might produce erratic speech recognition`)
|
||||
}
|
||||
|
||||
/* istanbul ignore if */
|
||||
if (process.env.LEON_NODE_ENV !== 'testing') {
|
||||
const string = model.stt(buffer)
|
||||
|
||||
cb({ string })
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
export default parser
|
@ -8,7 +8,6 @@ class Stt {
|
||||
this.socket = socket
|
||||
this.provider = provider
|
||||
this.providers = [
|
||||
'deepspeech',
|
||||
'google-cloud-stt',
|
||||
'watson-stt',
|
||||
'coqui-stt'
|
||||
|
@ -1,11 +1,11 @@
|
||||
import fs from 'fs'
|
||||
|
||||
import parser from '@/stt/deepspeech/parser'
|
||||
import parser from '@/stt/coqui-stt/parser'
|
||||
|
||||
describe('DeepSpeech STT parser', () => {
|
||||
describe('Coqui STT parser', () => {
|
||||
// Only run these tests if the models exist
|
||||
if (fs.existsSync(`${global.paths.root}/bin/deepspeech/deepspeech.pbmm`)
|
||||
&& fs.existsSync(`${global.paths.root}/bin/deepspeech/deepspeech.scorer`)) {
|
||||
if (fs.existsSync(`${global.paths.root}/bin/coqui/model.tflite`)
|
||||
&& fs.existsSync(`${global.paths.root}/bin/coqui/huge-vocabulary.scorer`)) {
|
||||
describe('init()', () => {
|
||||
test('returns error cannot find model', () => {
|
||||
expect(parser.init({
|
||||
@ -15,15 +15,15 @@ describe('DeepSpeech STT parser', () => {
|
||||
|
||||
test('returns error cannot find scorer', () => {
|
||||
expect(parser.init({
|
||||
model: `${global.paths.root}/bin/deepspeech/deepspeech.pbmm`,
|
||||
model: `${global.paths.root}/bin/coqui/model.tflite`,
|
||||
scorer: 'fake-scorer-path'
|
||||
})).toBeFalsy()
|
||||
})
|
||||
|
||||
test('returns true because all of the paths are good', () => {
|
||||
expect(parser.init({
|
||||
model: `${global.paths.root}/bin/deepspeech/deepspeech.pbmm`,
|
||||
scorer: `${global.paths.root}/bin/deepspeech/deepspeech.scorer`
|
||||
model: `${global.paths.root}/bin/coqui/model.tflite`,
|
||||
scorer: `${global.paths.root}/bin/coqui/huge-vocabulary.scorer`
|
||||
})).toBeTruthy()
|
||||
})
|
||||
})
|
@ -3,7 +3,7 @@ import Stt from '@/stt/stt'
|
||||
describe('STT', () => {
|
||||
describe('constructor()', () => {
|
||||
test('creates a new instance of Stt', () => {
|
||||
const stt = new Stt({ }, 'deepspeech')
|
||||
const stt = new Stt({ }, 'coqui-stt')
|
||||
|
||||
expect(stt).toBeInstanceOf(Stt)
|
||||
})
|
||||
@ -17,7 +17,7 @@ describe('STT', () => {
|
||||
})
|
||||
|
||||
test('initializes the STT parser', () => {
|
||||
const stt = new Stt({ }, 'deepspeech')
|
||||
const stt = new Stt({ }, 'coqui-stt')
|
||||
|
||||
expect(stt.init()).toBeTruthy()
|
||||
})
|
||||
|
Loading…
Reference in New Issue
Block a user