1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-08-16 21:50:33 +03:00

feat: Apple Silicon support for voice models

This commit is contained in:
louistiti 2024-06-18 09:02:09 +08:00
parent c9afb25ed3
commit 32dc3ced0b
25 changed files with 295 additions and 320654 deletions

8
.gitignore vendored
View File

@ -24,21 +24,17 @@ debug.log
leon.json
bridges/python/src/Pipfile.lock
tcp_server/src/Pipfile.lock
tcp_server/src/lib/tts/models/*.pth
tcp_server/src/lib/tts/models/**/*.bin
tcp_server/src/lib/tts/models/**/*.json
tcp_server/src/lib/tts/models/**/*.txt
tcp_server/src/lib/asr/models/**/*.bin
!tcp_server/**/.gitkeep
!bridges/python/**/.gitkeep
!bridges/nodejs/**/.gitkeep
!core/data/models/llm/.gitkeep
!core/data/models/**/.gitkeep
!**/*.sample*
skills/**/src/settings.json
skills/**/memory/*.json
core/data/models/*.nlp
core/data/models/*.json
core/data/models/llm/*
core/data/models/audio/**/*
package.json.backup
.python-version
schemas/**/*.json

View File

@ -9,11 +9,7 @@ export class Settings<T extends Record<string, unknown>> {
constructor() {
this.settingsPath = path.join(SKILL_PATH, 'src', 'settings.json')
this.settingsSamplePath = path.join(
SKILL_PATH,
'src',
'settings.sample.json'
)
this.settingsSamplePath = path.join(SKILL_PATH, 'src', 'settings.json')
}
/**
@ -32,7 +28,7 @@ export class Settings<T extends Record<string, unknown>> {
}
/**
* Clear the settings and set it to the default settings.sample.json file
* Clear the settings and set it to the default settings.json file
* @example clear()
*/
public async clear(): Promise<void> {

View File

@ -9,7 +9,7 @@ from ..constants import SKILL_PATH
class Settings:
def __init__(self):
self.settings_path = path.join(SKILL_PATH, 'src', 'settings.json')
self.settings_sample_path = path.join(SKILL_PATH, 'src', 'settings.sample.json')
self.settings_sample_path = path.join(SKILL_PATH, 'src', 'settings.json')
def is_setting_set(self, key: str) -> bool:
"""
@ -23,7 +23,7 @@ class Settings:
def clear(self) -> None:
"""
Clear the settings and set it to the default settings.sample.json file
Clear the settings and set it to the default settings.json file
"""
settings_sample = self.get_settings_sample()
self.set(settings_sample)

View File

@ -1,6 +1,5 @@
import fs from 'node:fs'
import path from 'node:path'
import stream from 'node:stream'
import { command } from 'execa'
@ -10,23 +9,12 @@ import {
FR_SPACY_MODEL_NAME,
FR_SPACY_MODEL_VERSION,
PYTHON_BRIDGE_SRC_PATH,
PYTHON_TCP_SERVER_SRC_PATH,
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH,
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU,
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU,
// PYTHON_TCP_SERVER_SRC_TTS_BERT_FRENCH_DIR_PATH,
PYTHON_TCP_SERVER_SRC_TTS_BERT_BASE_DIR_PATH,
// PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_TTS_BERT_BASE_MODEL_HF_PREFIX_DOWNLOAD_URL
PYTHON_TCP_SERVER_SRC_PATH
} from '@/constants'
import { CPUArchitectures, OSTypes } from '@/types'
import { LogHelper } from '@/helpers/log-helper'
import { LoaderHelper } from '@/helpers/loader-helper'
import { SystemHelper } from '@/helpers/system-helper'
import { FileHelper } from '@/helpers/file-helper'
/**
* Set up development environment according to the given setup target
@ -54,32 +42,6 @@ function getModelInstallationFileUrl(model, mirror = undefined) {
return `${urlPrefix}/${name}-${version}/${name}-${version}-${suffix}`
}
const ASR_GPU_MODEL_FILES = [
'model.bin',
'config.json',
'preprocessor_config.json',
'tokenizer.json',
'vocabulary.json'
]
const ASR_CPU_MODEL_FILES = [
'model.bin',
'config.json',
'tokenizer.json',
'vocabulary.txt'
]
/*const TTS_BERT_FRENCH_MODEL_FILES = [
'pytorch_model.bin', // Not needed? Compare with HF auto download in ~/.cache/huggingface/hub...
'config.json',
'vocab.txt',
'tokenizer_config.json'
]*/
const TTS_BERT_BASE_MODEL_FILES = [
'pytorch_model.bin',
'config.json',
'vocab.txt',
'tokenizer_config.json',
'tokenizer.json'
]
const SETUP_TARGETS = new Map()
const SPACY_MODELS = new Map()
@ -342,141 +304,6 @@ SPACY_MODELS.set('fr', {
process.exit(1)
}
}
const installTTSModel = async () => {
try {
LogHelper.info('Installing TTS model...')
const destPath = fs.createWriteStream(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH
)
LogHelper.info(`Downloading TTS model...`)
const response = await FileHelper.downloadFile(
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`TTS model downloaded at ${destPath.path}`)
} catch (e) {
LogHelper.error(`Failed to install TTS model: ${e}`)
process.exit(1)
}
}
const installASRModelForGPU = async () => {
try {
LogHelper.info('Installing ASR model for GPU...')
for (const modelFile of ASR_GPU_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('ASR model for GPU installed')
} catch (e) {
LogHelper.error(`Failed to install ASR model for GPU: ${e}`)
process.exit(1)
}
}
const installASRModelForCPU = async () => {
try {
LogHelper.info('Installing ASR model for CPU...')
for (const modelFile of ASR_CPU_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('ASR model for CPU installed')
} catch (e) {
LogHelper.error(`Failed to install ASR model for CPU: ${e}`)
process.exit(1)
}
}
/*const installTTSBERTFrenchModel = async () => {
try {
LogHelper.info('Installing TTS BERT French model...')
for (const modelFile of TTS_BERT_FRENCH_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_SRC_TTS_BERT_FRENCH_DIR_PATH, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('TTS BERT French model installed')
} catch (e) {
LogHelper.error(`Failed to install TTS BERT French model: ${e}`)
process.exit(1)
}
}*/
const installTTSBERTBaseModel = async () => {
try {
LogHelper.info('Installing TTS BERT base model...')
for (const modelFile of TTS_BERT_BASE_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_TTS_BERT_BASE_MODEL_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_SRC_TTS_BERT_BASE_DIR_PATH, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('TTS BERT base model installed')
} catch (e) {
LogHelper.error(`Failed to install TTS BERT base model: ${e}`)
process.exit(1)
}
}
LogHelper.info('Checking whether all spaCy models are installed...')
@ -501,84 +328,6 @@ SPACY_MODELS.set('fr', {
LogHelper.info('Not all spaCy models are installed')
await installSpacyModels()
}
LogHelper.info(
'Checking whether TTS BERT base language model files are downloaded...'
)
const areTTSBERTBaseFilesDownloaded = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_SRC_TTS_BERT_BASE_DIR_PATH,
TTS_BERT_BASE_MODEL_FILES[TTS_BERT_BASE_MODEL_FILES.length - 1]
)
)
if (!areTTSBERTBaseFilesDownloaded) {
LogHelper.info('TTS BERT base language model files not downloaded')
await installTTSBERTBaseModel()
} else {
LogHelper.success(
'TTS BERT base language model files are already downloaded'
)
}
// TODO: later when multiple languages are supported
/*LogHelper.info(
'Checking whether TTS BERT French language model files are downloaded...'
)
const areTTSBERTFrenchFilesDownloaded = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_SRC_TTS_BERT_FRENCH_DIR_PATH,
TTS_BERT_FRENCH_MODEL_FILES[TTS_BERT_FRENCH_MODEL_FILES.length - 1]
)
)
if (!areTTSBERTFrenchFilesDownloaded) {
LogHelper.info('TTS BERT French language model files not downloaded')
await installTTSBERTFrenchModel()
} else {
LogHelper.success(
'TTS BERT French language model files are already downloaded'
)
}*/
LogHelper.info('Checking whether the TTS model is installed...')
const isTTSModelInstalled = fs.existsSync(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH
)
if (!isTTSModelInstalled) {
LogHelper.info('TTS model is not installed')
await installTTSModel()
} else {
LogHelper.success('TTS model is already installed')
}
LogHelper.info('Checking whether the ASR model for GPU is installed...')
// Check if model.bin file exists in directory (last file in the list)
const isASRModelForGPUInstalled = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU,
ASR_GPU_MODEL_FILES[ASR_GPU_MODEL_FILES.length - 1]
)
)
if (!isASRModelForGPUInstalled) {
LogHelper.info('ASR model for GPU is not installed')
await installASRModelForGPU()
} else {
LogHelper.success('ASR model for GPU is already installed')
}
LogHelper.info('Checking whether the ASR model for CPU is installed...')
// Check if model.bin file exists in directory (last file in the list)
const isASRModelForCPUInstalled = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU,
ASR_CPU_MODEL_FILES[ASR_CPU_MODEL_FILES.length - 1]
)
)
if (!isASRModelForCPUInstalled) {
LogHelper.info('ASR model for CPU is not installed')
await installASRModelForCPU()
} else {
LogHelper.success('ASR model for CPU is already installed')
}
}
LogHelper.success(`${setupTarget} development environment ready`)

View File

@ -11,7 +11,7 @@ import { LogHelper } from '@/helpers/log-helper'
export default async function (skillFriendlyName, currentSkill) {
const skillSrcPath = path.join(currentSkill.path, 'src')
const settingsPath = path.join(skillSrcPath, 'settings.json')
const settingsSamplePath = path.join(skillSrcPath, 'settings.sample.json')
const settingsSamplePath = path.join(skillSrcPath, 'settings.json')
// If there is a bridge set from the skill settings
if (currentSkill.bridge) {
@ -61,12 +61,12 @@ export default async function (skillFriendlyName, currentSkill) {
}
}
} else if (!fs.existsSync(settingsSamplePath)) {
// Stop the setup if the settings.sample.json of the current skill does not exist
// Stop the setup if the settings.json of the current skill does not exist
LogHelper.error(
`The "${skillFriendlyName}" skill settings file does not exist. Try to pull the project (git pull)`
)
} else {
// Duplicate settings.sample.json of the current skill to settings.json
// Duplicate settings.json of the current skill to settings.json
fs.createReadStream(settingsSamplePath).pipe(
fs.createWriteStream(`${skillSrcPath}/settings.json`)
)

View File

@ -0,0 +1,206 @@
import fs from 'node:fs'
import path from 'node:path'
import stream from 'node:stream'
import {
PYTHON_TCP_SERVER_TTS_BERT_BASE_DIR_PATH,
// PYTHON_TCP_SERVER_TTS_BERT_FRENCH_DIR_PATH,
// PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_TTS_MODEL_PATH,
PYTHON_TCP_SERVER_ASR_MODEL_DIR_PATH,
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
PYTHON_TCP_SERVER_ASR_MODEL_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_TTS_BERT_BASE_MODEL_HF_PREFIX_DOWNLOAD_URL
} from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
import { FileHelper } from '@/helpers/file-helper'
const ASR_MODEL_FILES = [
'model.bin',
'config.json',
'preprocessor_config.json',
'tokenizer.json',
'vocabulary.json'
]
/*const TTS_BERT_FRENCH_MODEL_FILES = [
'pytorch_model.bin', // Not needed? Compare with HF auto download in ~/.cache/huggingface/hub...
'config.json',
'vocab.txt',
'tokenizer_config.json'
]*/
const TTS_BERT_BASE_MODEL_FILES = [
'pytorch_model.bin',
'config.json',
'vocab.txt',
'tokenizer_config.json',
'tokenizer.json'
]
async function installTTSModel() {
try {
LogHelper.info('Installing TTS model...')
const destPath = fs.createWriteStream(PYTHON_TCP_SERVER_TTS_MODEL_PATH)
LogHelper.info(`Downloading TTS model...`)
const response = await FileHelper.downloadFile(
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`TTS model downloaded at ${destPath.path}`)
} catch (e) {
LogHelper.error(`Failed to install TTS model: ${e}`)
process.exit(1)
}
}
async function installASRModel() {
try {
LogHelper.info('Installing ASR model...')
for (const modelFile of ASR_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_ASR_MODEL_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_ASR_MODEL_DIR_PATH, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('ASR model installed')
} catch (e) {
LogHelper.error(`Failed to install ASR model: ${e}`)
process.exit(1)
}
}
/*async function installTTSBERTFrenchModel() {
try {
LogHelper.info('Installing TTS BERT French model...')
for (const modelFile of TTS_BERT_FRENCH_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_TTS_BERT_FRENCH_DIR_PATH, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('TTS BERT French model installed')
} catch (e) {
LogHelper.error(`Failed to install TTS BERT French model: ${e}`)
process.exit(1)
}
}*/
async function installTTSBERTBaseModel() {
try {
LogHelper.info('Installing TTS BERT base model...')
for (const modelFile of TTS_BERT_BASE_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_TTS_BERT_BASE_MODEL_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_TTS_BERT_BASE_DIR_PATH, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('TTS BERT base model installed')
} catch (e) {
LogHelper.error(`Failed to install TTS BERT base model: ${e}`)
process.exit(1)
}
}
export default async () => {
LogHelper.info(
'Checking whether TTS BERT base language model files are downloaded...'
)
const areTTSBERTBaseFilesDownloaded = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_TTS_BERT_BASE_DIR_PATH,
TTS_BERT_BASE_MODEL_FILES[TTS_BERT_BASE_MODEL_FILES.length - 1]
)
)
if (!areTTSBERTBaseFilesDownloaded) {
LogHelper.info('TTS BERT base language model files not downloaded')
await installTTSBERTBaseModel()
} else {
LogHelper.success(
'TTS BERT base language model files are already downloaded'
)
}
// TODO: later when multiple languages are supported
/*LogHelper.info(
'Checking whether TTS BERT French language model files are downloaded...'
)
const areTTSBERTFrenchFilesDownloaded = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_TTS_BERT_FRENCH_DIR_PATH,
TTS_BERT_FRENCH_MODEL_FILES[TTS_BERT_FRENCH_MODEL_FILES.length - 1]
)
)
if (!areTTSBERTFrenchFilesDownloaded) {
LogHelper.info('TTS BERT French language model files not downloaded')
await installTTSBERTFrenchModel()
} else {
LogHelper.success(
'TTS BERT French language model files are already downloaded'
)
}*/
LogHelper.info('Checking whether the TTS model is installed...')
const isTTSModelInstalled = fs.existsSync(PYTHON_TCP_SERVER_TTS_MODEL_PATH)
if (!isTTSModelInstalled) {
LogHelper.info('TTS model is not installed')
await installTTSModel()
} else {
LogHelper.success('TTS model is already installed')
}
LogHelper.info('Checking whether the ASR model is installed...')
// Check if model.bin file exists in directory (last file in the list)
const isASRModelInstalled = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_ASR_MODEL_DIR_PATH,
ASR_MODEL_FILES[ASR_MODEL_FILES.length - 1]
)
)
if (!isASRModelInstalled) {
LogHelper.info('ASR model is not installed')
await installASRModel()
} else {
LogHelper.success('ASR model is already installed')
}
}

View File

@ -11,6 +11,7 @@ import setupCore from './setup-core'
import setupSkills from './setup-skills/setup-skills'
import setupLLM from './setup-llm'
import setupBinaries from './setup-binaries'
import setupTCPServerModels from './setup-tcp-server-models'
import createInstanceID from './create-instance-id'
// Do not load ".env" file because it is not created yet
@ -32,6 +33,7 @@ import createInstanceID from './create-instance-id'
}
await setupBinaries()
await setupTCPServerModels()
await generateHTTPAPIKey()
await generateJSONSchemas()
LoaderHelper.start()

View File

@ -15,6 +15,37 @@ const TESTING_ENV = 'testing'
export const GITHUB_URL = 'https://github.com/leon-ai/leon'
/**
* Environments
*/
export const LEON_NODE_ENV = process.env['LEON_NODE_ENV'] || PRODUCTION_ENV
export const IS_PRODUCTION_ENV = LEON_NODE_ENV === PRODUCTION_ENV
export const IS_DEVELOPMENT_ENV = LEON_NODE_ENV === DEVELOPMENT_ENV
export const IS_TESTING_ENV = LEON_NODE_ENV === TESTING_ENV
/**
* Paths
*/
export const BIN_PATH = path.join(process.cwd(), 'bin')
export const LOGS_PATH = path.join(process.cwd(), 'logs')
export const SKILLS_PATH = path.join(process.cwd(), 'skills')
export const GLOBAL_DATA_PATH = path.join(process.cwd(), 'core', 'data')
export const MODELS_PATH = path.join(GLOBAL_DATA_PATH, 'models')
export const AUDIO_MODELS_PATH = path.join(MODELS_PATH, 'audio')
export const VOICE_CONFIG_PATH = path.join(
process.cwd(),
'core',
'config',
'voice'
)
export const SERVER_PATH = path.join(
process.cwd(),
'server',
IS_PRODUCTION_ENV ? 'dist' : 'src'
)
export const TMP_PATH = path.join(SERVER_PATH, 'tmp')
export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
/**
* Binaries / distribution
*/
@ -46,52 +77,43 @@ export const PYTHON_TCP_SERVER_SRC_PATH = path.join(
PYTHON_TCP_SERVER_ROOT_PATH,
'src'
)
export const PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME =
'EN-Leon-V1_1-G_600000.pth'
export const PYTHON_TCP_SERVER_SRC_TTS_MODEL_DIR_PATH = path.join(
PYTHON_TCP_SERVER_SRC_PATH,
'lib',
'tts',
'models'
export const PYTHON_TCP_SERVER_SETTINGS_PATH = path.join(
PYTHON_TCP_SERVER_ROOT_PATH,
'settings.json'
)
export const PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH = path.join(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_DIR_PATH,
PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME
export const PYTHON_TCP_SERVER_SETTINGS = JSON.parse(
fs.readFileSync(PYTHON_TCP_SERVER_SETTINGS_PATH, 'utf8')
)
export const PYTHON_TCP_SERVER_SRC_TTS_BERT_FRENCH_DIR_PATH = path.join(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_DIR_PATH,
export const PYTHON_TCP_SERVER_TTS_MODEL_FILE_NAME =
PYTHON_TCP_SERVER_SETTINGS.tts.model_file_name
export const PYTHON_TCP_SERVER_TTS_MODEL_DIR_PATH = path.join(
AUDIO_MODELS_PATH,
'tts'
)
export const PYTHON_TCP_SERVER_TTS_MODEL_PATH = path.join(
PYTHON_TCP_SERVER_TTS_MODEL_DIR_PATH,
PYTHON_TCP_SERVER_TTS_MODEL_FILE_NAME
)
export const PYTHON_TCP_SERVER_TTS_BERT_FRENCH_DIR_PATH = path.join(
PYTHON_TCP_SERVER_TTS_MODEL_DIR_PATH,
'bert-base-french-europeana-cased'
)
export const PYTHON_TCP_SERVER_SRC_TTS_BERT_BASE_DIR_PATH = path.join(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_DIR_PATH,
export const PYTHON_TCP_SERVER_TTS_BERT_BASE_DIR_PATH = path.join(
PYTHON_TCP_SERVER_TTS_MODEL_DIR_PATH,
'bert-base-uncased'
)
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH = path.join(
PYTHON_TCP_SERVER_SRC_PATH,
'lib',
'asr',
'models'
)
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU = path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH,
'gpu'
)
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU = path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH,
'cpu'
export const PYTHON_TCP_SERVER_ASR_MODEL_DIR_PATH = path.join(
AUDIO_MODELS_PATH,
'asr'
)
export const PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL(
`https://huggingface.co/Louistiti/Voice-EN-Leon-V1/resolve/main/${PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME}?download=true`
`https://huggingface.co/Louistiti/Voice-EN-Leon-V1/resolve/main/${PYTHON_TCP_SERVER_TTS_MODEL_FILE_NAME}?download=true`
)
export const PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL =
export const PYTHON_TCP_SERVER_ASR_MODEL_HF_PREFIX_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/Systran/faster-distil-whisper-large-v3/resolve/main'
)
export const PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/Systran/faster-whisper-medium/resolve/main'
)
export const PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/dbmdz/bert-base-french-europeana-cased/resolve/main'
@ -179,14 +201,6 @@ export const EN_SPACY_MODEL_VERSION = '3.4.0'
export const FR_SPACY_MODEL_NAME = 'fr_core_news_md'
export const FR_SPACY_MODEL_VERSION = '3.4.0'
/**
* Environments
*/
export const LEON_NODE_ENV = process.env['LEON_NODE_ENV'] || PRODUCTION_ENV
export const IS_PRODUCTION_ENV = LEON_NODE_ENV === PRODUCTION_ENV
export const IS_DEVELOPMENT_ENV = LEON_NODE_ENV === DEVELOPMENT_ENV
export const IS_TESTING_ENV = LEON_NODE_ENV === TESTING_ENV
/**
* Leon environment preferences
*/
@ -215,28 +229,6 @@ export const PYTHON_TCP_SERVER_PORT = Number(
export const IS_TELEMETRY_ENABLED = process.env['LEON_TELEMETRY'] === 'true'
/**
* Paths
*/
export const BIN_PATH = path.join(process.cwd(), 'bin')
export const LOGS_PATH = path.join(process.cwd(), 'logs')
export const SKILLS_PATH = path.join(process.cwd(), 'skills')
export const GLOBAL_DATA_PATH = path.join(process.cwd(), 'core', 'data')
export const MODELS_PATH = path.join(GLOBAL_DATA_PATH, 'models')
export const VOICE_CONFIG_PATH = path.join(
process.cwd(),
'core',
'config',
'voice'
)
export const SERVER_PATH = path.join(
process.cwd(),
'server',
IS_PRODUCTION_ENV ? 'dist' : 'src'
)
export const TMP_PATH = path.join(SERVER_PATH, 'tmp')
export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
/**
* NLP models paths
*/

View File

@ -3,6 +3,8 @@
"rms_threshold": 196,
"device": "auto"
},
"tts": {},
"tts": {
"model_file_name": "EN-Leon-V1_1-G_600000.pth"
},
"wake_word": {}
}

View File

@ -5,12 +5,14 @@ import torch
import numpy as np
from faster_whisper import WhisperModel
from ..constants import ASR_MODEL_PATH_FOR_GPU, ASR_MODEL_PATH_FOR_CPU
from ..constants import ASR_MODEL_PATH
from ..utils import ThrottledCallback, is_macos, get_settings
class ASR:
def __init__(self,
# @see https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/transcribe.py
# auto, cpu, cuda
device='auto',
interrupt_leon_speech_callback=None,
transcribed_callback=None,
@ -66,7 +68,7 @@ class ASR:
self.channels = 1
self.rate = 16000
self.frames_per_buffer = 1024
self.rms_threshold = get_settings('asr')['']
self.rms_threshold = get_settings('asr')['rms_threshold']
# Duration of silence after which the audio data is considered as a new utterance (in seconds)
self.silence_duration = 1
"""
@ -80,23 +82,16 @@ class ASR:
self.stream = None
self.model = None
model_params = {
'model_size_or_path': ASR_MODEL_PATH,
'device': self.device,
'compute_type': self.compute_type,
'local_files_only': True
}
if self.device == 'cpu':
model_path = ASR_MODEL_PATH_FOR_CPU
self.model = WhisperModel(
model_path,
device=self.device,
compute_type=self.compute_type,
local_files_only=True,
cpu_threads=4
)
else:
model_path = ASR_MODEL_PATH_FOR_GPU
self.model = WhisperModel(
model_path,
device=self.device,
compute_type=self.compute_type,
local_files_only=True
)
model_params['cpu_threads'] = 4
self.model = WhisperModel(**model_params)
self.log('Model loaded')
toc = time.perf_counter()

View File

@ -1,31 +0,0 @@
{
"alignment_heads": [
[13, 15],
[15, 4],
[15, 15],
[16, 1],
[20, 0],
[23, 4]
],
"lang_ids": [
50259, 50260, 50261, 50262, 50263, 50264, 50265, 50266, 50267, 50268, 50269,
50270, 50271, 50272, 50273, 50274, 50275, 50276, 50277, 50278, 50279, 50280,
50281, 50282, 50283, 50284, 50285, 50286, 50287, 50288, 50289, 50290, 50291,
50292, 50293, 50294, 50295, 50296, 50297, 50298, 50299, 50300, 50301, 50302,
50303, 50304, 50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313,
50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321, 50322, 50323, 50324,
50325, 50326, 50327, 50328, 50329, 50330, 50331, 50332, 50333, 50334, 50335,
50336, 50337, 50338, 50339, 50340, 50341, 50342, 50343, 50344, 50345, 50346,
50347, 50348, 50349, 50350, 50351, 50352, 50353, 50354, 50355, 50356, 50357
],
"suppress_ids": [
1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90,
91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853,
1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585,
6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793,
14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520,
26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425,
49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362
],
"suppress_ids_begin": [220, 50257]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,46 +0,0 @@
{
"alignment_heads": [
[1, 0],
[1, 1],
[1, 2],
[1, 3],
[1, 4],
[1, 5],
[1, 6],
[1, 7],
[1, 8],
[1, 9],
[1, 10],
[1, 11],
[1, 12],
[1, 13],
[1, 14],
[1, 15],
[1, 16],
[1, 17],
[1, 18],
[1, 19]
],
"lang_ids": [
50259, 50260, 50261, 50262, 50263, 50264, 50265, 50266, 50267, 50268, 50269,
50270, 50271, 50272, 50273, 50274, 50275, 50276, 50277, 50278, 50279, 50280,
50281, 50282, 50283, 50284, 50285, 50286, 50287, 50288, 50289, 50290, 50291,
50292, 50293, 50294, 50295, 50296, 50297, 50298, 50299, 50300, 50301, 50302,
50303, 50304, 50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313,
50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321, 50322, 50323, 50324,
50325, 50326, 50327, 50328, 50329, 50330, 50331, 50332, 50333, 50334, 50335,
50336, 50337, 50338, 50339, 50340, 50341, 50342, 50343, 50344, 50345, 50346,
50347, 50348, 50349, 50350, 50351, 50352, 50353, 50354, 50355, 50356, 50357,
50358
],
"suppress_ids": [
1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90,
91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853,
1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585,
6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793,
14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520,
26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425,
49870, 50254, 50258, 50359, 50360, 50361, 50362, 50363
],
"suppress_ids_begin": [220, 50257]
}

View File

@ -1,14 +0,0 @@
{
"chunk_length": 30,
"feature_extractor_type": "WhisperFeatureExtractor",
"feature_size": 128,
"hop_length": 160,
"n_fft": 400,
"n_samples": 480000,
"nb_max_frames": 3000,
"padding_side": "right",
"padding_value": 0.0,
"processor_class": "WhisperProcessor",
"return_attention_mask": false,
"sampling_rate": 16000
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -10,24 +10,16 @@ if IS_RAN_FROM_BINARY:
LIB_PATH = os.path.join(os.path.dirname(sys.executable), 'lib', 'lib')
TMP_PATH = os.path.join(LIB_PATH, 'tmp')
AUDIO_MODELS_PATH = os.path.join(os.getcwd(), 'core', 'data', 'models', 'audio')
SETTINGS_PATH = os.path.join(os.getcwd(), 'tcp_server', 'settings.json')
# TTS
TTS_MODEL_VERSION = 'V1_1'
TTS_MODEL_ITERATION = '600000'
TTS_MODEL_NAME = f'EN-Leon-{TTS_MODEL_VERSION}-G_{TTS_MODEL_ITERATION}'
TTS_MODEL_FILE_NAME = f'{TTS_MODEL_NAME}.pth'
TTS_LIB_PATH = os.path.join(LIB_PATH, 'tts')
TTS_MODEL_FOLDER_PATH = os.path.join(TTS_LIB_PATH, 'models')
TTS_MODEL_FOLDER_PATH = os.path.join(AUDIO_MODELS_PATH, 'tts')
TTS_BERT_FRENCH_MODEL_DIR_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, 'bert-case-french-europeana-cased')
TTS_BERT_BASE_MODEL_DIR_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, 'bert-base-uncased')
TTS_MODEL_CONFIG_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, 'config.json')
TTS_MODEL_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, TTS_MODEL_FILE_NAME)
IS_TTS_ENABLED = os.environ.get('LEON_TTS', 'true') == 'true'
# ASR
ASR_LIB_PATH = os.path.join(LIB_PATH, 'asr')
ASR_MODEL_FOLDER_PATH = os.path.join(ASR_LIB_PATH, 'models')
ASR_MODEL_PATH_FOR_GPU = os.path.join(ASR_MODEL_FOLDER_PATH, 'gpu')
ASR_MODEL_PATH_FOR_CPU = os.path.join(ASR_MODEL_FOLDER_PATH, 'cpu')
ASR_MODEL_PATH = os.path.join(AUDIO_MODELS_PATH, 'asr')
IS_ASR_ENABLED = os.environ.get('LEON_STT', 'true') == 'true'

View File

@ -13,12 +13,14 @@ from .asr.api import ASR
from .tts.api import TTS
from .constants import (
TTS_MODEL_CONFIG_PATH,
TTS_MODEL_PATH,
TTS_MODEL_FOLDER_PATH,
IS_TTS_ENABLED,
TMP_PATH,
IS_ASR_ENABLED
)
TTS_MODEL_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, get_settings('tts')['model_file_name'])
class TCPServer:
def __init__(self, host: str, port: Union[str, int]):

View File

@ -1,288 +0,0 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 52,
"epochs": 10000,
"learning_rate": 0.0003,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 8,
"fp16_run": false,
"lr_decay": 0.999875,
"segment_size": 16384,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0,
"skip_optimizer": true
},
"data": {
"training_files": "/output/V1/model/train.list",
"validation_files": "/output/V1/model/val.list",
"max_wav_value": 32768.0,
"sampling_rate": 44100,
"filter_length": 2048,
"hop_length": 512,
"win_length": 2048,
"n_mel_channels": 128,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 1,
"cleaned_text": true,
"spk2id": {
"EN-Leon-V1_1": 0
}
},
"model": {
"use_spk_conditioned_encoder": true,
"use_noise_scaled_mas": true,
"use_mel_posterior_encoder": false,
"use_duration_discriminator": true,
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"n_layers_trans_flow": 3,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3, 7, 11],
"resblock_dilation_sizes": [
[1, 3, 5],
[1, 3, 5],
[1, 3, 5]
],
"upsample_rates": [8, 8, 2, 2, 2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16, 16, 8, 2, 2],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"num_languages": 8,
"num_tones": 16,
"symbols": [
"_",
"\"",
"(",
")",
"*",
"/",
":",
"AA",
"E",
"EE",
"En",
"N",
"OO",
"Q",
"V",
"[",
"\\",
"]",
"^",
"a",
"a:",
"aa",
"ae",
"ah",
"ai",
"an",
"ang",
"ao",
"aw",
"ay",
"b",
"by",
"c",
"ch",
"d",
"dh",
"dy",
"e",
"e:",
"eh",
"ei",
"en",
"eng",
"er",
"ey",
"f",
"g",
"gy",
"h",
"hh",
"hy",
"i",
"i0",
"i:",
"ia",
"ian",
"iang",
"iao",
"ie",
"ih",
"in",
"ing",
"iong",
"ir",
"iu",
"iy",
"j",
"jh",
"k",
"ky",
"l",
"m",
"my",
"n",
"ng",
"ny",
"o",
"o:",
"ong",
"ou",
"ow",
"oy",
"p",
"py",
"q",
"r",
"ry",
"s",
"sh",
"t",
"th",
"ts",
"ty",
"u",
"u:",
"ua",
"uai",
"uan",
"uang",
"uh",
"ui",
"un",
"uo",
"uw",
"v",
"van",
"ve",
"vn",
"w",
"x",
"y",
"z",
"zh",
"zy",
"~",
"æ",
"ç",
"ð",
"ø",
"ŋ",
"œ",
"ɐ",
"ɑ",
"ɒ",
"ɔ",
"ɕ",
"ə",
"ɛ",
"ɜ",
"ɡ",
"ɣ",
"ɥ",
"ɦ",
"ɪ",
"ɫ",
"ɬ",
"ɭ",
"ɯ",
"ɲ",
"ɵ",
"ɸ",
"ɹ",
"ɾ",
"ʁ",
"ʃ",
"ʊ",
"ʌ",
"ʎ",
"ʏ",
"ʑ",
"ʒ",
"ʝ",
"ʲ",
"ˈ",
"ˌ",
"ː",
"̃",
"̩",
"β",
"θ",
"ᄀ",
"ᄁ",
"ᄂ",
"ᄃ",
"ᄄ",
"ᄅ",
"ᄆ",
"ᄇ",
"ᄈ",
"ᄉ",
"ᄊ",
"ᄋ",
"ᄌ",
"ᄍ",
"ᄎ",
"ᄏ",
"ᄐ",
"ᄑ",
"ᄒ",
"ᅡ",
"ᅢ",
"ᅣ",
"ᅤ",
"ᅥ",
"ᅦ",
"ᅧ",
"ᅨ",
"ᅩ",
"ᅪ",
"ᅫ",
"ᅬ",
"ᅭ",
"ᅮ",
"ᅯ",
"ᅰ",
"ᅱ",
"ᅲ",
"ᅳ",
"ᅴ",
"ᅵ",
"ᆨ",
"ᆫ",
"ᆮ",
"ᆯ",
"ᆷ",
"ᆸ",
"ᆼ",
"ㄸ",
"!",
"?",
"…",
",",
".",
"'",
"-",
"¿",
"¡",
"SP",
"UNK"
]
}