1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-11-28 04:04:58 +03:00

feat(server): always load ASR model from local

This commit is contained in:
louistiti 2024-05-22 15:22:30 +08:00
parent 5855990bd5
commit b02d510490
No known key found for this signature in database
GPG Key ID: 92CD6A2E497E1669
18 changed files with 320286 additions and 102 deletions

1
.gitignore vendored
View File

@ -25,6 +25,7 @@ leon.json
bridges/python/src/Pipfile.lock
tcp_server/src/Pipfile.lock
tcp_server/src/lib/tts/models/*.pth
tcp_server/src/lib/asr/models/**/*.bin
!tcp_server/**/.gitkeep
!bridges/python/**/.gitkeep
!bridges/nodejs/**/.gitkeep

View File

@ -98,7 +98,9 @@ export default class Client {
this.socket.on('asr-end-of-owner-speech', () => {
console.log('End of owner speech')
setTimeout(() => {
this.send('utterance')
}, 300)
})
/**

View File

@ -1,6 +1,5 @@
import fs from 'node:fs'
import path from 'node:path'
import dns from 'node:dns'
import stream from 'node:stream'
import { command } from 'execa'
@ -13,7 +12,6 @@ import {
LLM_PATH,
LLM_VERSION,
LLM_HF_DOWNLOAD_URL,
LLM_MIRROR_DOWNLOAD_URL,
LLM_LLAMA_CPP_RELEASE_TAG
} from '@/constants'
import { OSTypes, CPUArchitectures } from '@/types'
@ -37,17 +35,7 @@ function checkMinimumHardwareRequirements() {
return SystemHelper.getTotalRAM() >= LLM_MINIMUM_TOTAL_RAM
}
async function canAccessHuggingFace() {
try {
await dns.promises.resolve('huggingface.co')
return true
} catch {
return false
}
}
async function downloadLLM(retryWithMirror = false) {
async function downloadLLM() {
try {
LogHelper.info('Downloading LLM...')
@ -61,22 +49,20 @@ async function downloadLLM(retryWithMirror = false) {
}
if (!manifest || manifest.version !== LLM_VERSION) {
const downloadURL =
(await canAccessHuggingFace()) && !retryWithMirror
? LLM_HF_DOWNLOAD_URL
: LLM_MIRROR_DOWNLOAD_URL
// Just in case the LLM file already exists, delete it first
if (fs.existsSync(LLM_PATH)) {
await fs.promises.unlink(LLM_PATH)
}
LogHelper.info(
`Downloading ${LLM_NAME_WITH_VERSION} from ${downloadURL}...`
`Downloading ${LLM_NAME_WITH_VERSION} from ${LLM_HF_DOWNLOAD_URL}...`
)
const llmWriter = fs.createWriteStream(LLM_PATH)
const response = await FileHelper.downloadFile(downloadURL, 'stream')
const response = await FileHelper.downloadFile(
LLM_HF_DOWNLOAD_URL,
'stream'
)
response.data.pipe(llmWriter)
await stream.promises.finished(llmWriter)
@ -102,13 +88,6 @@ async function downloadLLM(retryWithMirror = false) {
}
} catch (e) {
LogHelper.error(`Failed to download LLM: ${e}`)
if (e.code === 'EAI_AGAIN') {
LogHelper.warning(
'Failed to download from Hugging Face, retrying from mirror...'
)
await downloadLLM(true)
}
}
}

View File

@ -1,5 +1,6 @@
import fs from 'node:fs'
import path from 'node:path'
import stream from 'node:stream'
import { command } from 'execa'
@ -9,12 +10,19 @@ import {
FR_SPACY_MODEL_NAME,
FR_SPACY_MODEL_VERSION,
PYTHON_BRIDGE_SRC_PATH,
PYTHON_TCP_SERVER_SRC_PATH
PYTHON_TCP_SERVER_SRC_PATH,
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH,
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU,
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU
} from '@/constants'
import { CPUArchitectures, OSTypes } from '@/types'
import { LogHelper } from '@/helpers/log-helper'
import { LoaderHelper } from '@/helpers/loader-helper'
import { SystemHelper } from '@/helpers/system-helper'
import { FileHelper } from '@/helpers/file-helper'
/**
* Set up development environment according to the given setup target
@ -42,6 +50,19 @@ function getModelInstallationFileUrl(model, mirror = undefined) {
return `${urlPrefix}/${name}-${version}/${name}-${version}-${suffix}`
}
const ASR_GPU_MODEL_FILES = [
'config.json',
'preprocessor_config.json',
'tokenizer.json',
'vocabulary.json',
'model.bin'
]
const ASR_CPU_MODEL_FILES = [
'config.json',
'tokenizer.json',
'vocabulary.txt',
'model.bin'
]
const SETUP_TARGETS = new Map()
const SPACY_MODELS = new Map()
@ -160,32 +181,6 @@ SPACY_MODELS.set('fr', {
stdio: 'inherit'
})
LogHelper.success('PyTorch with CUDA support installed')
if (osType === OSTypes.Linux) {
LogHelper.info(
'Exporting LD_LIBRARY_PATH to map NVIDIA libs as it is needed by Whisper Faster. Cf. https://github.com/SYSTRAN/faster-whisper/issues/153...'
)
try {
await command(
// eslint-disable-next-line no-useless-escape
'export LD_LIBRARY_PATH=`pipenv run python -c "import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))"`',
{
shell: true,
stdio: 'inherit'
}
)
await command('echo $LD_LIBRARY_PATH', {
shell: true,
stdio: 'inherit'
})
LogHelper.success('LD_LIBRARY_PATH exported')
} catch (e) {
LogHelper.error(`Failed to export LD_LIBRARY_PATH: ${e}`)
process.exit(1)
}
}
} catch (e) {
LogHelper.error(`Failed to install PyTorch with CUDA support: ${e}`)
process.exit(1)
@ -325,6 +320,85 @@ SPACY_MODELS.set('fr', {
process.exit(1)
}
}
const installTTSModel = async () => {
try {
LogHelper.info('Installing TTS model...')
const destPath = fs.createWriteStream(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH
)
LogHelper.info(`Downloading TTS model...`)
const response = await FileHelper.downloadFile(
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`TTS model downloaded at ${destPath.path}`)
} catch (e) {
LogHelper.error(`Failed to install TTS model: ${e}`)
process.exit(1)
}
}
const installASRModelForGPU = async () => {
try {
LogHelper.info('Installing ASR model for GPU...')
for (const modelFile of ASR_GPU_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('ASR model for GPU installed')
} catch (e) {
LogHelper.error(`Failed to install ASR model for GPU: ${e}`)
process.exit(1)
}
}
const installASRModelForCPU = async () => {
try {
LogHelper.info('Installing ASR model for CPU...')
for (const modelFile of ASR_CPU_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('ASR model for CPU installed')
} catch (e) {
LogHelper.error(`Failed to install ASR model for CPU: ${e}`)
process.exit(1)
}
}
LogHelper.info('Checking whether all spaCy models are installed...')
@ -349,6 +423,47 @@ SPACY_MODELS.set('fr', {
LogHelper.info('Not all spaCy models are installed')
await installSpacyModels()
}
LogHelper.info('Checking whether the TTS model is installed...')
const isTTSModelInstalled = fs.existsSync(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH
)
if (!isTTSModelInstalled) {
LogHelper.info('TTS model is not installed')
await installTTSModel()
} else {
LogHelper.success('TTS model is already installed')
}
LogHelper.info('Checking whether the ASR model for GPU is installed...')
// Check if model.bin file exists in directory (last file in the list)
const isASRModelForGPUInstalled = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU,
ASR_GPU_MODEL_FILES[ASR_GPU_MODEL_FILES.length - 1]
)
)
if (!isASRModelForGPUInstalled) {
LogHelper.info('ASR model for GPU is not installed')
await installASRModelForGPU()
} else {
LogHelper.success('ASR model for GPU is already installed')
}
LogHelper.info('Checking whether the ASR model for CPU is installed...')
// Check if model.bin file exists in directory (last file in the list)
const isASRModelForCPUInstalled = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU,
ASR_CPU_MODEL_FILES[ASR_CPU_MODEL_FILES.length - 1]
)
)
if (!isASRModelForCPUInstalled) {
LogHelper.info('ASR model for CPU is not installed')
await installASRModelForCPU()
} else {
LogHelper.success('ASR model for CPU is already installed')
}
}
LogHelper.success(`${setupTarget} development environment ready`)

View File

@ -5,6 +5,7 @@ import dotenv from 'dotenv'
import type { LongLanguageCode } from '@/types'
import { SystemHelper } from '@/helpers/system-helper'
import { NetworkHelper } from '@/helpers/network-helper'
dotenv.config()
@ -45,6 +46,41 @@ export const PYTHON_TCP_SERVER_SRC_PATH = path.join(
PYTHON_TCP_SERVER_ROOT_PATH,
'src'
)
export const PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME =
'EN-Leon-V1-G_699000.pth'
export const PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH = path.join(
PYTHON_TCP_SERVER_SRC_PATH,
'lib',
'tts',
'models',
PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME
)
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH = path.join(
PYTHON_TCP_SERVER_SRC_PATH,
'lib',
'asr',
'models'
)
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU = path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH,
'gpu'
)
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU = path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH,
'cpu'
)
export const PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL(
`https://huggingface.co/Louistiti/Voice-EN-Leon-V1/resolve/main/${PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME}?download=true`
)
export const PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/Systran/faster-distil-whisper-large-v3/resolve/main'
)
export const PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/Systran/faster-whisper-medium/resolve/main'
)
const NODEJS_BRIDGE_VERSION_FILE_PATH = path.join(
NODEJS_BRIDGE_SRC_PATH,
@ -73,7 +109,8 @@ export const PYTHON_BRIDGE_BIN_NAME = 'leon-python-bridge'
export const PYTHON_TCP_SERVER_BIN_NAME = 'leon-tcp-server'
/**
* NVIDIA libraries paths for CUDA. Needed by Whisper Faster
* NVIDIA libraries paths for CUDA. Needed by Whisper Faster.
* Otherwise, an error similar to "libcudnn_ops_infer.so.8: cannot open shared object file" occurs.
* @see https://github.com/SYSTRAN/faster-whisper/issues/153
*/
export const PYTHON_TCP_SERVER_NVIDIA_CUBLAS_LIB_PATH = path.join(
@ -216,38 +253,30 @@ export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
export const LLM_MINIMUM_TOTAL_RAM = 8
export const LLM_MINIMUM_FREE_RAM = 8
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'*/
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q5_K_S.gguf?download=true'*/
export const LLM_HF_DOWNLOAD_URL =
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'
)*/
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q5_K_S.gguf?download=true'
)*/
export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/bartowski/Lexi-Llama-3-8B-Uncensored-GGUF/resolve/main/Lexi-Llama-3-8B-Uncensored-Q5_K_S.gguf?download=true'
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/PrunaAI/Phi-3-mini-128k-instruct-GGUF-Imatrix-smashed/resolve/main/Phi-3-mini-128k-instruct.Q5_K_S.gguf?download=true'*/
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf?download=true'*/
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'*/
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'*/
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_S.gguf?download=true'*/
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'*/
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'*/
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q5_K_S.gguf?download=true'*/
export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/bartowski/Lexi-Llama-3-8B-Uncensored-GGUF/resolve/main/Lexi-Llama-3-8B-Uncensored-Q5_K_S.gguf?download=true'
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/PrunaAI/Phi-3-mini-128k-instruct-GGUF-Imatrix-smashed/resolve/main/Phi-3-mini-128k-instruct.Q5_K_S.gguf?download=true'*/
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf?download=true'*/
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'*/
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_S.gguf?download=true'*/
)
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/PrunaAI/Phi-3-mini-128k-instruct-GGUF-Imatrix-smashed/resolve/main/Phi-3-mini-128k-instruct.Q5_K_S.gguf?download=true'
)*/
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf?download=true'
)*/
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
)*/
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
)*/
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_S.gguf?download=true'
)*/
/**
* @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases
*/

View File

@ -0,0 +1,34 @@
import axios from 'axios'
const HUGGING_FACE_URL = 'https://huggingface.co'
const HUGGING_FACE_MIRROR_URL = 'https://hf-mirror.com'
export class NetworkHelper {
/**
* Check if the current network can access Hugging Face
* @example canAccessHuggingFace() // true
*/
public static async canAccessHuggingFace(): Promise<boolean> {
try {
await axios.head(HUGGING_FACE_URL)
return true
} catch (e) {
return false
}
}
/**
* Set the Hugging Face URL based on the network access
* @param url The URL to set
* @example setHuggingFaceURL('https://huggingface.co') // https://hf-mirror.com
*/
public static setHuggingFaceURL(url: string): string {
const canAccess = NetworkHelper.canAccessHuggingFace()
if (!canAccess) {
return url.replace(HUGGING_FACE_URL, HUGGING_FACE_MIRROR_URL)
}
return url
}
}

View File

@ -5,12 +5,15 @@ import torch
import numpy as np
from faster_whisper import WhisperModel
from ..constants import ASR_MODEL_PATH_FOR_GPU, ASR_MODEL_PATH_FOR_CPU
class ASR:
def __init__(self,
device='auto',
transcription_callback=None,
wake_word_callback=None,
end_of_owner_speech_callback=None):
tic = time.perf_counter()
self.log('Loading model...')
if device == 'auto':
@ -54,7 +57,6 @@ class ASR:
self.chunk = 4096
self.threshold = 200
self.silence_duration = 1 # duration of silence in seconds
self.model_size = "distil-large-v3"
self.buffer_size = 64 # Size of the circular buffer
self.audio = pyaudio.PyAudio()
@ -62,20 +64,27 @@ class ASR:
self.model = None
if self.device == 'cpu':
model_path = ASR_MODEL_PATH_FOR_CPU
self.model = WhisperModel(
self.model_size,
model_path,
device=self.device,
compute_type=self.compute_type,
local_files_only=True,
cpu_threads=4
)
else:
model_path = ASR_MODEL_PATH_FOR_GPU
self.model = WhisperModel(
self.model_size,
model_path,
device=self.device,
compute_type=self.compute_type
compute_type=self.compute_type,
local_files_only=True
)
self.log('Model loaded')
toc = time.perf_counter()
self.log(f"Time taken to load model: {toc - tic:0.4f} seconds")
def detect_wake_word(self, speech: str) -> bool:
lowercased_speech = speech.lower().strip()
@ -90,14 +99,17 @@ class ASR:
self.circular_buffer.pop(0)
audio_data = np.concatenate(self.circular_buffer)
segments, info = self.model.transcribe(
audio_data,
beam_size=5,
language="en",
task="transcribe",
condition_on_previous_text=False,
hotwords="talking to Leon"
)
transcribe_params = {
"beam_size": 5,
"language": "en",
"task": "transcribe",
"condition_on_previous_text": False,
"hotwords": "talking to Leon"
}
if self.device == 'cpu':
transcribe_params["temperature"] = 0
segments, info = self.model.transcribe(audio_data, **transcribe_params)
for segment in segments:
words = segment.text.split()
self.segment_text += ' '.join(words) + ' '

View File

@ -0,0 +1,31 @@
{
"alignment_heads": [
[13, 15],
[15, 4],
[15, 15],
[16, 1],
[20, 0],
[23, 4]
],
"lang_ids": [
50259, 50260, 50261, 50262, 50263, 50264, 50265, 50266, 50267, 50268, 50269,
50270, 50271, 50272, 50273, 50274, 50275, 50276, 50277, 50278, 50279, 50280,
50281, 50282, 50283, 50284, 50285, 50286, 50287, 50288, 50289, 50290, 50291,
50292, 50293, 50294, 50295, 50296, 50297, 50298, 50299, 50300, 50301, 50302,
50303, 50304, 50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313,
50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321, 50322, 50323, 50324,
50325, 50326, 50327, 50328, 50329, 50330, 50331, 50332, 50333, 50334, 50335,
50336, 50337, 50338, 50339, 50340, 50341, 50342, 50343, 50344, 50345, 50346,
50347, 50348, 50349, 50350, 50351, 50352, 50353, 50354, 50355, 50356, 50357
],
"suppress_ids": [
1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90,
91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853,
1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585,
6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793,
14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520,
26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425,
49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362
],
"suppress_ids_begin": [220, 50257]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,46 @@
{
"alignment_heads": [
[1, 0],
[1, 1],
[1, 2],
[1, 3],
[1, 4],
[1, 5],
[1, 6],
[1, 7],
[1, 8],
[1, 9],
[1, 10],
[1, 11],
[1, 12],
[1, 13],
[1, 14],
[1, 15],
[1, 16],
[1, 17],
[1, 18],
[1, 19]
],
"lang_ids": [
50259, 50260, 50261, 50262, 50263, 50264, 50265, 50266, 50267, 50268, 50269,
50270, 50271, 50272, 50273, 50274, 50275, 50276, 50277, 50278, 50279, 50280,
50281, 50282, 50283, 50284, 50285, 50286, 50287, 50288, 50289, 50290, 50291,
50292, 50293, 50294, 50295, 50296, 50297, 50298, 50299, 50300, 50301, 50302,
50303, 50304, 50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313,
50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321, 50322, 50323, 50324,
50325, 50326, 50327, 50328, 50329, 50330, 50331, 50332, 50333, 50334, 50335,
50336, 50337, 50338, 50339, 50340, 50341, 50342, 50343, 50344, 50345, 50346,
50347, 50348, 50349, 50350, 50351, 50352, 50353, 50354, 50355, 50356, 50357,
50358
],
"suppress_ids": [
1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90,
91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853,
1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585,
6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793,
14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520,
26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425,
49870, 50254, 50258, 50359, 50360, 50361, 50362, 50363
],
"suppress_ids_begin": [220, 50257]
}

View File

@ -0,0 +1,14 @@
{
"chunk_length": 30,
"feature_extractor_type": "WhisperFeatureExtractor",
"feature_size": 128,
"hop_length": 160,
"n_fft": 400,
"n_samples": 480000,
"nb_max_frames": 3000,
"padding_side": "right",
"padding_value": 0.0,
"processor_class": "WhisperProcessor",
"return_attention_mask": false,
"sampling_rate": 16000
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -23,4 +23,8 @@ TTS_MODEL_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, TTS_MODEL_FILE_NAME)
IS_TTS_ENABLED = os.environ.get('LEON_TTS', 'true') == 'true'
# ASR
ASR_LIB_PATH = os.path.join(LIB_PATH, 'asr')
ASR_MODEL_FOLDER_PATH = os.path.join(ASR_LIB_PATH, 'models')
ASR_MODEL_PATH_FOR_GPU = os.path.join(ASR_MODEL_FOLDER_PATH, 'gpu')
ASR_MODEL_PATH_FOR_CPU = os.path.join(ASR_MODEL_FOLDER_PATH, 'cpu')
IS_ASR_ENABLED = os.environ.get('LEON_STT', 'true') == 'true'

View File

@ -7,9 +7,15 @@ import re
import string
import lib.nlp as nlp
from .asr import ASR
from .asr.api import ASR
from .tts.api import TTS
from .constants import TTS_MODEL_CONFIG_PATH, TTS_MODEL_PATH, IS_TTS_ENABLED, TMP_PATH, IS_ASR_ENABLED
from .constants import (
TTS_MODEL_CONFIG_PATH,
TTS_MODEL_PATH,
IS_TTS_ENABLED,
TMP_PATH,
IS_ASR_ENABLED
)
class TCPServer:
@ -98,7 +104,6 @@ class TCPServer:
}
})
# TODO: local model path
self.asr = ASR(device='auto',
transcription_callback=transcription_callback,
wake_word_callback=wake_word_callback,