1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-11-27 16:16:48 +03:00

feat(server): make Coqui STT the default STT solution

This commit is contained in:
louistiti 2022-01-31 14:43:54 +08:00
parent 79be099cc5
commit 7039918760
No known key found for this signature in database
GPG Key ID: 0A1C3B043E70C77D
9 changed files with 10 additions and 177 deletions

View File

@ -17,7 +17,7 @@ LEON_AFTER_SPEECH=false
# Enable/disable Leon's speech-to-text
LEON_STT=false
# Speech-to-text provider
LEON_STT_PROVIDER=deepspeech
LEON_STT_PROVIDER=coqui-stt
# Enable/disable Leon's text-to-speech
LEON_TTS=false

28
package-lock.json generated
View File

@ -23,7 +23,6 @@
"archiver": "^5.3.0",
"async": "^3.2.0",
"cross-env": "^7.0.3",
"deepspeech": "^0.9.3",
"dotenv": "^10.0.0",
"execa": "^5.0.0",
"fastify": "^3.25.2",
@ -7989,21 +7988,6 @@
"node": ">=0.10.0"
}
},
"node_modules/deepspeech": {
"version": "0.9.3",
"resolved": "https://registry.npmjs.org/deepspeech/-/deepspeech-0.9.3.tgz",
"integrity": "sha512-80yqUWEWgzclY9pjukpq0TdkQEbJ6NzqZ899vsLZfa4YcK35uOWIf+ILK55zQ+Ii/TEJ6Eo62Vc2saedQ/AK6w==",
"dependencies": {
"argparse": "1.0.x",
"memory-stream": "1.0.x",
"node-pre-gyp": "0.15.x",
"node-wav": "0.0.2",
"sox-stream": "2.0.x"
},
"bin": {
"deepspeech": "client.js"
}
},
"node_modules/defaults": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/defaults/-/defaults-1.0.3.tgz",
@ -26790,18 +26774,6 @@
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.2.2.tgz",
"integrity": "sha512-FJ3UgI4gIl+PHZm53knsuSFpE+nESMr7M4v9QcgB7S63Kj/6WqMiFQJpBBYz1Pt+66bZpP3Q7Lye0Oo9MPKEdg=="
},
"deepspeech": {
"version": "0.9.3",
"resolved": "https://registry.npmjs.org/deepspeech/-/deepspeech-0.9.3.tgz",
"integrity": "sha512-80yqUWEWgzclY9pjukpq0TdkQEbJ6NzqZ899vsLZfa4YcK35uOWIf+ILK55zQ+Ii/TEJ6Eo62Vc2saedQ/AK6w==",
"requires": {
"argparse": "1.0.x",
"memory-stream": "1.0.x",
"node-pre-gyp": "0.15.x",
"node-wav": "0.0.2",
"sox-stream": "2.0.x"
}
},
"defaults": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/defaults/-/defaults-1.0.3.tgz",

View File

@ -66,7 +66,6 @@
"archiver": "^5.3.0",
"async": "^3.2.0",
"cross-env": "^7.0.3",
"deepspeech": "^0.9.3",
"dotenv": "^10.0.0",
"execa": "^5.0.0",
"fastify": "^3.25.2",

View File

@ -1,32 +1 @@
const fs = require('fs')
const path = require('path')
const os = require('os')
/**
* Trigger preinstall hook to remove DeepSpeech on Windows
*/
console.info('\x1b[36m➡ %s\x1b[0m', 'Running Leon\'s installation...')
if (os.type().indexOf('Windows') !== -1) {
const packageJsonPath = path.join(__dirname, '../../package.json')
const packageJson = require(packageJsonPath) // eslint-disable-line global-require
console.warn('\x1b[33m❗ %s\x1b[0m', 'The Leon\'s voice offline mode is not available on Windows')
console.info('\x1b[36m➡ %s\x1b[0m', 'Backing up package.json...')
fs.copyFileSync('package.json', 'package.json.backup')
console.log('\x1b[32m✔ %s\x1b[0m', 'package.json has been backed up')
try {
if (packageJson?.dependencies.deepspeech) {
console.info('\x1b[36m➡ %s\x1b[0m', 'Removing DeepSpeech dependency...')
delete packageJson.dependencies.deepspeech
fs.writeFileSync(packageJsonPath, JSON.stringify(packageJson, null, 2))
console.log('\x1b[32m✔ %s\x1b[0m', 'DeepSpeech dependency has been removed.')
}
} catch (e) {
console.error('\x1b[31m✖ %s\x1b[0m', 'Failed to remove DeepSpeech dependency')
}
}

View File

@ -1,8 +1,5 @@
import fs from 'fs'
import loader from '@/helpers/loader'
import log from '@/helpers/log'
import os from '@/helpers/os'
import train from '../train'
import setupDotenv from './setup-dotenv'
@ -17,8 +14,6 @@ import setupPythonPackages from './setup-python-packages'
*/
(async () => {
try {
const info = os.get()
// Required env vars to setup
process.env.LEON_LANG = 'en-US'
process.env.PIPENV_PIPFILE = 'bridges/python/Pipfile'
@ -32,12 +27,6 @@ import setupPythonPackages from './setup-python-packages'
])
await setupPythonPackages()
await train()
if (info.type === 'windows') {
log.info('Windows detected, reinjecting DeepSpeech into package.json...')
fs.unlinkSync('package.json')
fs.renameSync('package.json.backup', 'package.json')
log.success('DeepSpeech has been reinjected into package.json')
}
log.default('')
log.success('Hooray! Leon is installed and ready to go!')

View File

@ -1,95 +0,0 @@
import wav from 'node-wav'
import fs from 'fs'
import log from '@/helpers/log'
log.title('DeepSpeech Parser')
const parser = { }
let DeepSpeech = { }
/* istanbul ignore next */
try {
DeepSpeech = require('deepspeech-gpu') // eslint-disable-line global-require, import/no-unresolved
log.success('GPU version found')
} catch (eGpu) {
log.info('GPU version not found, trying to get the CPU version...')
try {
DeepSpeech = require('deepspeech') // eslint-disable-line global-require, import/no-unresolved
log.success('CPU version found')
} catch (eCpu) {
log.error(`No DeepSpeech library found:\nGPU: ${eGpu}\nCPU: ${eCpu}`)
}
}
let model = { }
let desiredSampleRate = 16000
/**
* Model and language model paths
*/
parser.conf = {
model: 'bin/deepspeech/deepspeech.pbmm',
scorer: 'bin/deepspeech/deepspeech.scorer'
}
/**
* Load models
*/
parser.init = (args) => {
/* istanbul ignore if */
if (process.env.LEON_LANG !== 'en-US') {
log.warning('The DeepSpeech parser only accepts the "en-US" language for the moment')
}
log.info(`Loading model from file ${args.model}...`)
if (!fs.existsSync(args.model)) {
log.error(`Cannot find ${args.model}. You can setup the offline STT by running: "npm run setup:offline-stt"`)
return false
}
if (!fs.existsSync(args.scorer)) {
log.error(`Cannot find ${args.scorer}. You can setup the offline STT by running: "npm run setup:offline-stt"`)
return false
}
/* istanbul ignore if */
if (process.env.LEON_NODE_ENV !== 'testing') {
model = new DeepSpeech.Model(args.model)
desiredSampleRate = model.sampleRate()
model.enableExternalScorer(args.scorer)
}
log.success('Model loaded')
return true
}
/**
* Parse file and infer
*/
parser.parse = (buffer, cb) => {
const wavDecode = wav.decode(buffer)
if (wavDecode.sampleRate < desiredSampleRate) {
log.warning(`Original sample rate (${wavDecode.sampleRate}) is lower than ${desiredSampleRate}Hz. Up-sampling might produce erratic speech recognition`)
}
/* istanbul ignore if */
if (process.env.LEON_NODE_ENV !== 'testing') {
const string = model.stt(buffer)
cb({ string })
}
return true
}
export default parser

View File

@ -8,7 +8,6 @@ class Stt {
this.socket = socket
this.provider = provider
this.providers = [
'deepspeech',
'google-cloud-stt',
'watson-stt',
'coqui-stt'

View File

@ -1,11 +1,11 @@
import fs from 'fs'
import parser from '@/stt/deepspeech/parser'
import parser from '@/stt/coqui-stt/parser'
describe('DeepSpeech STT parser', () => {
describe('Coqui STT parser', () => {
// Only run these tests if the models exist
if (fs.existsSync(`${global.paths.root}/bin/deepspeech/deepspeech.pbmm`)
&& fs.existsSync(`${global.paths.root}/bin/deepspeech/deepspeech.scorer`)) {
if (fs.existsSync(`${global.paths.root}/bin/coqui/model.tflite`)
&& fs.existsSync(`${global.paths.root}/bin/coqui/huge-vocabulary.scorer`)) {
describe('init()', () => {
test('returns error cannot find model', () => {
expect(parser.init({
@ -15,15 +15,15 @@ describe('DeepSpeech STT parser', () => {
test('returns error cannot find scorer', () => {
expect(parser.init({
model: `${global.paths.root}/bin/deepspeech/deepspeech.pbmm`,
model: `${global.paths.root}/bin/coqui/model.tflite`,
scorer: 'fake-scorer-path'
})).toBeFalsy()
})
test('returns true because all of the paths are good', () => {
expect(parser.init({
model: `${global.paths.root}/bin/deepspeech/deepspeech.pbmm`,
scorer: `${global.paths.root}/bin/deepspeech/deepspeech.scorer`
model: `${global.paths.root}/bin/coqui/model.tflite`,
scorer: `${global.paths.root}/bin/coqui/huge-vocabulary.scorer`
})).toBeTruthy()
})
})

View File

@ -3,7 +3,7 @@ import Stt from '@/stt/stt'
describe('STT', () => {
describe('constructor()', () => {
test('creates a new instance of Stt', () => {
const stt = new Stt({ }, 'deepspeech')
const stt = new Stt({ }, 'coqui-stt')
expect(stt).toBeInstanceOf(Stt)
})
@ -17,7 +17,7 @@ describe('STT', () => {
})
test('initializes the STT parser', () => {
const stt = new Stt({ }, 'deepspeech')
const stt = new Stt({ }, 'coqui-stt')
expect(stt.init()).toBeTruthy()
})