mirror of
https://github.com/leon-ai/leon.git
synced 2024-11-28 04:04:58 +03:00
feat(server): always load ASR model from local
This commit is contained in:
parent
5855990bd5
commit
b02d510490
1
.gitignore
vendored
1
.gitignore
vendored
@ -25,6 +25,7 @@ leon.json
|
||||
bridges/python/src/Pipfile.lock
|
||||
tcp_server/src/Pipfile.lock
|
||||
tcp_server/src/lib/tts/models/*.pth
|
||||
tcp_server/src/lib/asr/models/**/*.bin
|
||||
!tcp_server/**/.gitkeep
|
||||
!bridges/python/**/.gitkeep
|
||||
!bridges/nodejs/**/.gitkeep
|
||||
|
@ -98,7 +98,9 @@ export default class Client {
|
||||
|
||||
this.socket.on('asr-end-of-owner-speech', () => {
|
||||
console.log('End of owner speech')
|
||||
setTimeout(() => {
|
||||
this.send('utterance')
|
||||
}, 300)
|
||||
})
|
||||
|
||||
/**
|
||||
|
@ -1,6 +1,5 @@
|
||||
import fs from 'node:fs'
|
||||
import path from 'node:path'
|
||||
import dns from 'node:dns'
|
||||
import stream from 'node:stream'
|
||||
|
||||
import { command } from 'execa'
|
||||
@ -13,7 +12,6 @@ import {
|
||||
LLM_PATH,
|
||||
LLM_VERSION,
|
||||
LLM_HF_DOWNLOAD_URL,
|
||||
LLM_MIRROR_DOWNLOAD_URL,
|
||||
LLM_LLAMA_CPP_RELEASE_TAG
|
||||
} from '@/constants'
|
||||
import { OSTypes, CPUArchitectures } from '@/types'
|
||||
@ -37,17 +35,7 @@ function checkMinimumHardwareRequirements() {
|
||||
return SystemHelper.getTotalRAM() >= LLM_MINIMUM_TOTAL_RAM
|
||||
}
|
||||
|
||||
async function canAccessHuggingFace() {
|
||||
try {
|
||||
await dns.promises.resolve('huggingface.co')
|
||||
|
||||
return true
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
async function downloadLLM(retryWithMirror = false) {
|
||||
async function downloadLLM() {
|
||||
try {
|
||||
LogHelper.info('Downloading LLM...')
|
||||
|
||||
@ -61,22 +49,20 @@ async function downloadLLM(retryWithMirror = false) {
|
||||
}
|
||||
|
||||
if (!manifest || manifest.version !== LLM_VERSION) {
|
||||
const downloadURL =
|
||||
(await canAccessHuggingFace()) && !retryWithMirror
|
||||
? LLM_HF_DOWNLOAD_URL
|
||||
: LLM_MIRROR_DOWNLOAD_URL
|
||||
|
||||
// Just in case the LLM file already exists, delete it first
|
||||
if (fs.existsSync(LLM_PATH)) {
|
||||
await fs.promises.unlink(LLM_PATH)
|
||||
}
|
||||
|
||||
LogHelper.info(
|
||||
`Downloading ${LLM_NAME_WITH_VERSION} from ${downloadURL}...`
|
||||
`Downloading ${LLM_NAME_WITH_VERSION} from ${LLM_HF_DOWNLOAD_URL}...`
|
||||
)
|
||||
|
||||
const llmWriter = fs.createWriteStream(LLM_PATH)
|
||||
const response = await FileHelper.downloadFile(downloadURL, 'stream')
|
||||
const response = await FileHelper.downloadFile(
|
||||
LLM_HF_DOWNLOAD_URL,
|
||||
'stream'
|
||||
)
|
||||
|
||||
response.data.pipe(llmWriter)
|
||||
await stream.promises.finished(llmWriter)
|
||||
@ -102,13 +88,6 @@ async function downloadLLM(retryWithMirror = false) {
|
||||
}
|
||||
} catch (e) {
|
||||
LogHelper.error(`Failed to download LLM: ${e}`)
|
||||
|
||||
if (e.code === 'EAI_AGAIN') {
|
||||
LogHelper.warning(
|
||||
'Failed to download from Hugging Face, retrying from mirror...'
|
||||
)
|
||||
await downloadLLM(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
import fs from 'node:fs'
|
||||
import path from 'node:path'
|
||||
import stream from 'node:stream'
|
||||
|
||||
import { command } from 'execa'
|
||||
|
||||
@ -9,12 +10,19 @@ import {
|
||||
FR_SPACY_MODEL_NAME,
|
||||
FR_SPACY_MODEL_VERSION,
|
||||
PYTHON_BRIDGE_SRC_PATH,
|
||||
PYTHON_TCP_SERVER_SRC_PATH
|
||||
PYTHON_TCP_SERVER_SRC_PATH,
|
||||
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH,
|
||||
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
|
||||
PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL,
|
||||
PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL,
|
||||
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU,
|
||||
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU
|
||||
} from '@/constants'
|
||||
import { CPUArchitectures, OSTypes } from '@/types'
|
||||
import { LogHelper } from '@/helpers/log-helper'
|
||||
import { LoaderHelper } from '@/helpers/loader-helper'
|
||||
import { SystemHelper } from '@/helpers/system-helper'
|
||||
import { FileHelper } from '@/helpers/file-helper'
|
||||
|
||||
/**
|
||||
* Set up development environment according to the given setup target
|
||||
@ -42,6 +50,19 @@ function getModelInstallationFileUrl(model, mirror = undefined) {
|
||||
return `${urlPrefix}/${name}-${version}/${name}-${version}-${suffix}`
|
||||
}
|
||||
|
||||
const ASR_GPU_MODEL_FILES = [
|
||||
'config.json',
|
||||
'preprocessor_config.json',
|
||||
'tokenizer.json',
|
||||
'vocabulary.json',
|
||||
'model.bin'
|
||||
]
|
||||
const ASR_CPU_MODEL_FILES = [
|
||||
'config.json',
|
||||
'tokenizer.json',
|
||||
'vocabulary.txt',
|
||||
'model.bin'
|
||||
]
|
||||
const SETUP_TARGETS = new Map()
|
||||
const SPACY_MODELS = new Map()
|
||||
|
||||
@ -160,32 +181,6 @@ SPACY_MODELS.set('fr', {
|
||||
stdio: 'inherit'
|
||||
})
|
||||
LogHelper.success('PyTorch with CUDA support installed')
|
||||
|
||||
if (osType === OSTypes.Linux) {
|
||||
LogHelper.info(
|
||||
'Exporting LD_LIBRARY_PATH to map NVIDIA libs as it is needed by Whisper Faster. Cf. https://github.com/SYSTRAN/faster-whisper/issues/153...'
|
||||
)
|
||||
|
||||
try {
|
||||
await command(
|
||||
// eslint-disable-next-line no-useless-escape
|
||||
'export LD_LIBRARY_PATH=`pipenv run python -c "import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))"`',
|
||||
{
|
||||
shell: true,
|
||||
stdio: 'inherit'
|
||||
}
|
||||
)
|
||||
await command('echo $LD_LIBRARY_PATH', {
|
||||
shell: true,
|
||||
stdio: 'inherit'
|
||||
})
|
||||
|
||||
LogHelper.success('LD_LIBRARY_PATH exported')
|
||||
} catch (e) {
|
||||
LogHelper.error(`Failed to export LD_LIBRARY_PATH: ${e}`)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
LogHelper.error(`Failed to install PyTorch with CUDA support: ${e}`)
|
||||
process.exit(1)
|
||||
@ -325,6 +320,85 @@ SPACY_MODELS.set('fr', {
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
const installTTSModel = async () => {
|
||||
try {
|
||||
LogHelper.info('Installing TTS model...')
|
||||
|
||||
const destPath = fs.createWriteStream(
|
||||
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH
|
||||
)
|
||||
|
||||
LogHelper.info(`Downloading TTS model...`)
|
||||
const response = await FileHelper.downloadFile(
|
||||
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
|
||||
'stream'
|
||||
)
|
||||
|
||||
response.data.pipe(destPath)
|
||||
await stream.promises.finished(destPath)
|
||||
|
||||
LogHelper.success(`TTS model downloaded at ${destPath.path}`)
|
||||
} catch (e) {
|
||||
LogHelper.error(`Failed to install TTS model: ${e}`)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
const installASRModelForGPU = async () => {
|
||||
try {
|
||||
LogHelper.info('Installing ASR model for GPU...')
|
||||
|
||||
for (const modelFile of ASR_GPU_MODEL_FILES) {
|
||||
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
|
||||
const destPath = fs.createWriteStream(
|
||||
path.join(PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU, modelFile)
|
||||
)
|
||||
|
||||
LogHelper.info(`Downloading ${modelFile}...`)
|
||||
const response = await FileHelper.downloadFile(
|
||||
modelInstallationFileURL,
|
||||
'stream'
|
||||
)
|
||||
|
||||
response.data.pipe(destPath)
|
||||
await stream.promises.finished(destPath)
|
||||
|
||||
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
|
||||
}
|
||||
|
||||
LogHelper.success('ASR model for GPU installed')
|
||||
} catch (e) {
|
||||
LogHelper.error(`Failed to install ASR model for GPU: ${e}`)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
const installASRModelForCPU = async () => {
|
||||
try {
|
||||
LogHelper.info('Installing ASR model for CPU...')
|
||||
|
||||
for (const modelFile of ASR_CPU_MODEL_FILES) {
|
||||
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
|
||||
const destPath = fs.createWriteStream(
|
||||
path.join(PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU, modelFile)
|
||||
)
|
||||
|
||||
LogHelper.info(`Downloading ${modelFile}...`)
|
||||
const response = await FileHelper.downloadFile(
|
||||
modelInstallationFileURL,
|
||||
'stream'
|
||||
)
|
||||
|
||||
response.data.pipe(destPath)
|
||||
await stream.promises.finished(destPath)
|
||||
|
||||
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
|
||||
}
|
||||
|
||||
LogHelper.success('ASR model for CPU installed')
|
||||
} catch (e) {
|
||||
LogHelper.error(`Failed to install ASR model for CPU: ${e}`)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
LogHelper.info('Checking whether all spaCy models are installed...')
|
||||
|
||||
@ -349,6 +423,47 @@ SPACY_MODELS.set('fr', {
|
||||
LogHelper.info('Not all spaCy models are installed')
|
||||
await installSpacyModels()
|
||||
}
|
||||
|
||||
LogHelper.info('Checking whether the TTS model is installed...')
|
||||
const isTTSModelInstalled = fs.existsSync(
|
||||
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH
|
||||
)
|
||||
if (!isTTSModelInstalled) {
|
||||
LogHelper.info('TTS model is not installed')
|
||||
await installTTSModel()
|
||||
} else {
|
||||
LogHelper.success('TTS model is already installed')
|
||||
}
|
||||
|
||||
LogHelper.info('Checking whether the ASR model for GPU is installed...')
|
||||
// Check if model.bin file exists in directory (last file in the list)
|
||||
const isASRModelForGPUInstalled = fs.existsSync(
|
||||
path.join(
|
||||
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU,
|
||||
ASR_GPU_MODEL_FILES[ASR_GPU_MODEL_FILES.length - 1]
|
||||
)
|
||||
)
|
||||
if (!isASRModelForGPUInstalled) {
|
||||
LogHelper.info('ASR model for GPU is not installed')
|
||||
await installASRModelForGPU()
|
||||
} else {
|
||||
LogHelper.success('ASR model for GPU is already installed')
|
||||
}
|
||||
|
||||
LogHelper.info('Checking whether the ASR model for CPU is installed...')
|
||||
// Check if model.bin file exists in directory (last file in the list)
|
||||
const isASRModelForCPUInstalled = fs.existsSync(
|
||||
path.join(
|
||||
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU,
|
||||
ASR_CPU_MODEL_FILES[ASR_CPU_MODEL_FILES.length - 1]
|
||||
)
|
||||
)
|
||||
if (!isASRModelForCPUInstalled) {
|
||||
LogHelper.info('ASR model for CPU is not installed')
|
||||
await installASRModelForCPU()
|
||||
} else {
|
||||
LogHelper.success('ASR model for CPU is already installed')
|
||||
}
|
||||
}
|
||||
|
||||
LogHelper.success(`${setupTarget} development environment ready`)
|
||||
|
@ -5,6 +5,7 @@ import dotenv from 'dotenv'
|
||||
|
||||
import type { LongLanguageCode } from '@/types'
|
||||
import { SystemHelper } from '@/helpers/system-helper'
|
||||
import { NetworkHelper } from '@/helpers/network-helper'
|
||||
|
||||
dotenv.config()
|
||||
|
||||
@ -45,6 +46,41 @@ export const PYTHON_TCP_SERVER_SRC_PATH = path.join(
|
||||
PYTHON_TCP_SERVER_ROOT_PATH,
|
||||
'src'
|
||||
)
|
||||
export const PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME =
|
||||
'EN-Leon-V1-G_699000.pth'
|
||||
export const PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH = path.join(
|
||||
PYTHON_TCP_SERVER_SRC_PATH,
|
||||
'lib',
|
||||
'tts',
|
||||
'models',
|
||||
PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME
|
||||
)
|
||||
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH = path.join(
|
||||
PYTHON_TCP_SERVER_SRC_PATH,
|
||||
'lib',
|
||||
'asr',
|
||||
'models'
|
||||
)
|
||||
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU = path.join(
|
||||
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH,
|
||||
'gpu'
|
||||
)
|
||||
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU = path.join(
|
||||
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH,
|
||||
'cpu'
|
||||
)
|
||||
export const PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL =
|
||||
NetworkHelper.setHuggingFaceURL(
|
||||
`https://huggingface.co/Louistiti/Voice-EN-Leon-V1/resolve/main/${PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME}?download=true`
|
||||
)
|
||||
export const PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL =
|
||||
NetworkHelper.setHuggingFaceURL(
|
||||
'https://huggingface.co/Systran/faster-distil-whisper-large-v3/resolve/main'
|
||||
)
|
||||
export const PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL =
|
||||
NetworkHelper.setHuggingFaceURL(
|
||||
'https://huggingface.co/Systran/faster-whisper-medium/resolve/main'
|
||||
)
|
||||
|
||||
const NODEJS_BRIDGE_VERSION_FILE_PATH = path.join(
|
||||
NODEJS_BRIDGE_SRC_PATH,
|
||||
@ -73,7 +109,8 @@ export const PYTHON_BRIDGE_BIN_NAME = 'leon-python-bridge'
|
||||
export const PYTHON_TCP_SERVER_BIN_NAME = 'leon-tcp-server'
|
||||
|
||||
/**
|
||||
* NVIDIA libraries paths for CUDA. Needed by Whisper Faster
|
||||
* NVIDIA libraries paths for CUDA. Needed by Whisper Faster.
|
||||
* Otherwise, an error similar to "libcudnn_ops_infer.so.8: cannot open shared object file" occurs.
|
||||
* @see https://github.com/SYSTRAN/faster-whisper/issues/153
|
||||
*/
|
||||
export const PYTHON_TCP_SERVER_NVIDIA_CUBLAS_LIB_PATH = path.join(
|
||||
@ -216,38 +253,30 @@ export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
|
||||
export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
|
||||
export const LLM_MINIMUM_TOTAL_RAM = 8
|
||||
export const LLM_MINIMUM_FREE_RAM = 8
|
||||
/*export const LLM_HF_DOWNLOAD_URL =
|
||||
'https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'*/
|
||||
/*export const LLM_HF_DOWNLOAD_URL =
|
||||
'https://huggingface.co/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q5_K_S.gguf?download=true'*/
|
||||
export const LLM_HF_DOWNLOAD_URL =
|
||||
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
|
||||
'https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'
|
||||
)*/
|
||||
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
|
||||
'https://huggingface.co/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q5_K_S.gguf?download=true'
|
||||
)*/
|
||||
export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
|
||||
'https://huggingface.co/bartowski/Lexi-Llama-3-8B-Uncensored-GGUF/resolve/main/Lexi-Llama-3-8B-Uncensored-Q5_K_S.gguf?download=true'
|
||||
/*export const LLM_HF_DOWNLOAD_URL =
|
||||
'https://huggingface.co/PrunaAI/Phi-3-mini-128k-instruct-GGUF-Imatrix-smashed/resolve/main/Phi-3-mini-128k-instruct.Q5_K_S.gguf?download=true'*/
|
||||
/*export const LLM_HF_DOWNLOAD_URL =
|
||||
'https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf?download=true'*/
|
||||
/*export const LLM_HF_DOWNLOAD_URL =
|
||||
'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'*/
|
||||
/*export const LLM_HF_DOWNLOAD_URL =
|
||||
'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'*/
|
||||
/*export const LLM_HF_DOWNLOAD_URL =
|
||||
'https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_S.gguf?download=true'*/
|
||||
/*export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'*/
|
||||
/*export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'*/
|
||||
/*export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q5_K_S.gguf?download=true'*/
|
||||
export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/bartowski/Lexi-Llama-3-8B-Uncensored-GGUF/resolve/main/Lexi-Llama-3-8B-Uncensored-Q5_K_S.gguf?download=true'
|
||||
/*export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/PrunaAI/Phi-3-mini-128k-instruct-GGUF-Imatrix-smashed/resolve/main/Phi-3-mini-128k-instruct.Q5_K_S.gguf?download=true'*/
|
||||
/*export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf?download=true'*/
|
||||
/*export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'*/
|
||||
/*export const LLM_MIRROR_DOWNLOAD_URL =
|
||||
'https://hf-mirror.com/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_S.gguf?download=true'*/
|
||||
)
|
||||
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
|
||||
'https://huggingface.co/PrunaAI/Phi-3-mini-128k-instruct-GGUF-Imatrix-smashed/resolve/main/Phi-3-mini-128k-instruct.Q5_K_S.gguf?download=true'
|
||||
)*/
|
||||
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
|
||||
'https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf?download=true'
|
||||
)*/
|
||||
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
|
||||
'https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'
|
||||
)*/
|
||||
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
|
||||
'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf?download=true'
|
||||
)*/
|
||||
/*export const LLM_HF_DOWNLOAD_URL = NetworkHelper.setHuggingFaceURL(
|
||||
'https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_S.gguf?download=true'
|
||||
)*/
|
||||
/**
|
||||
* @see llama.cpp releases: https://github.com/ggerganov/llama.cpp/releases
|
||||
*/
|
||||
|
34
server/src/helpers/network-helper.ts
Normal file
34
server/src/helpers/network-helper.ts
Normal file
@ -0,0 +1,34 @@
|
||||
import axios from 'axios'
|
||||
|
||||
const HUGGING_FACE_URL = 'https://huggingface.co'
|
||||
const HUGGING_FACE_MIRROR_URL = 'https://hf-mirror.com'
|
||||
|
||||
export class NetworkHelper {
|
||||
/**
|
||||
* Check if the current network can access Hugging Face
|
||||
* @example canAccessHuggingFace() // true
|
||||
*/
|
||||
public static async canAccessHuggingFace(): Promise<boolean> {
|
||||
try {
|
||||
await axios.head(HUGGING_FACE_URL)
|
||||
return true
|
||||
} catch (e) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the Hugging Face URL based on the network access
|
||||
* @param url The URL to set
|
||||
* @example setHuggingFaceURL('https://huggingface.co') // https://hf-mirror.com
|
||||
*/
|
||||
public static setHuggingFaceURL(url: string): string {
|
||||
const canAccess = NetworkHelper.canAccessHuggingFace()
|
||||
|
||||
if (!canAccess) {
|
||||
return url.replace(HUGGING_FACE_URL, HUGGING_FACE_MIRROR_URL)
|
||||
}
|
||||
|
||||
return url
|
||||
}
|
||||
}
|
@ -5,12 +5,15 @@ import torch
|
||||
import numpy as np
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
from ..constants import ASR_MODEL_PATH_FOR_GPU, ASR_MODEL_PATH_FOR_CPU
|
||||
|
||||
class ASR:
|
||||
def __init__(self,
|
||||
device='auto',
|
||||
transcription_callback=None,
|
||||
wake_word_callback=None,
|
||||
end_of_owner_speech_callback=None):
|
||||
tic = time.perf_counter()
|
||||
self.log('Loading model...')
|
||||
|
||||
if device == 'auto':
|
||||
@ -54,7 +57,6 @@ class ASR:
|
||||
self.chunk = 4096
|
||||
self.threshold = 200
|
||||
self.silence_duration = 1 # duration of silence in seconds
|
||||
self.model_size = "distil-large-v3"
|
||||
self.buffer_size = 64 # Size of the circular buffer
|
||||
|
||||
self.audio = pyaudio.PyAudio()
|
||||
@ -62,20 +64,27 @@ class ASR:
|
||||
self.model = None
|
||||
|
||||
if self.device == 'cpu':
|
||||
model_path = ASR_MODEL_PATH_FOR_CPU
|
||||
self.model = WhisperModel(
|
||||
self.model_size,
|
||||
model_path,
|
||||
device=self.device,
|
||||
compute_type=self.compute_type,
|
||||
local_files_only=True,
|
||||
cpu_threads=4
|
||||
)
|
||||
else:
|
||||
model_path = ASR_MODEL_PATH_FOR_GPU
|
||||
self.model = WhisperModel(
|
||||
self.model_size,
|
||||
model_path,
|
||||
device=self.device,
|
||||
compute_type=self.compute_type
|
||||
compute_type=self.compute_type,
|
||||
local_files_only=True
|
||||
)
|
||||
|
||||
self.log('Model loaded')
|
||||
toc = time.perf_counter()
|
||||
|
||||
self.log(f"Time taken to load model: {toc - tic:0.4f} seconds")
|
||||
|
||||
def detect_wake_word(self, speech: str) -> bool:
|
||||
lowercased_speech = speech.lower().strip()
|
||||
@ -90,14 +99,17 @@ class ASR:
|
||||
self.circular_buffer.pop(0)
|
||||
|
||||
audio_data = np.concatenate(self.circular_buffer)
|
||||
segments, info = self.model.transcribe(
|
||||
audio_data,
|
||||
beam_size=5,
|
||||
language="en",
|
||||
task="transcribe",
|
||||
condition_on_previous_text=False,
|
||||
hotwords="talking to Leon"
|
||||
)
|
||||
transcribe_params = {
|
||||
"beam_size": 5,
|
||||
"language": "en",
|
||||
"task": "transcribe",
|
||||
"condition_on_previous_text": False,
|
||||
"hotwords": "talking to Leon"
|
||||
}
|
||||
if self.device == 'cpu':
|
||||
transcribe_params["temperature"] = 0
|
||||
|
||||
segments, info = self.model.transcribe(audio_data, **transcribe_params)
|
||||
for segment in segments:
|
||||
words = segment.text.split()
|
||||
self.segment_text += ' '.join(words) + ' '
|
0
tcp_server/src/lib/asr/models/cpu/.gitkeep
Normal file
0
tcp_server/src/lib/asr/models/cpu/.gitkeep
Normal file
31
tcp_server/src/lib/asr/models/cpu/config.json
Normal file
31
tcp_server/src/lib/asr/models/cpu/config.json
Normal file
@ -0,0 +1,31 @@
|
||||
{
|
||||
"alignment_heads": [
|
||||
[13, 15],
|
||||
[15, 4],
|
||||
[15, 15],
|
||||
[16, 1],
|
||||
[20, 0],
|
||||
[23, 4]
|
||||
],
|
||||
"lang_ids": [
|
||||
50259, 50260, 50261, 50262, 50263, 50264, 50265, 50266, 50267, 50268, 50269,
|
||||
50270, 50271, 50272, 50273, 50274, 50275, 50276, 50277, 50278, 50279, 50280,
|
||||
50281, 50282, 50283, 50284, 50285, 50286, 50287, 50288, 50289, 50290, 50291,
|
||||
50292, 50293, 50294, 50295, 50296, 50297, 50298, 50299, 50300, 50301, 50302,
|
||||
50303, 50304, 50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313,
|
||||
50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321, 50322, 50323, 50324,
|
||||
50325, 50326, 50327, 50328, 50329, 50330, 50331, 50332, 50333, 50334, 50335,
|
||||
50336, 50337, 50338, 50339, 50340, 50341, 50342, 50343, 50344, 50345, 50346,
|
||||
50347, 50348, 50349, 50350, 50351, 50352, 50353, 50354, 50355, 50356, 50357
|
||||
],
|
||||
"suppress_ids": [
|
||||
1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90,
|
||||
91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853,
|
||||
1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585,
|
||||
6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793,
|
||||
14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520,
|
||||
26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425,
|
||||
49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362
|
||||
],
|
||||
"suppress_ids_begin": [220, 50257]
|
||||
}
|
101330
tcp_server/src/lib/asr/models/cpu/tokenizer.json
Normal file
101330
tcp_server/src/lib/asr/models/cpu/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
51865
tcp_server/src/lib/asr/models/cpu/vocabulary.txt
Normal file
51865
tcp_server/src/lib/asr/models/cpu/vocabulary.txt
Normal file
File diff suppressed because it is too large
Load Diff
0
tcp_server/src/lib/asr/models/gpu/.gitkeep
Normal file
0
tcp_server/src/lib/asr/models/gpu/.gitkeep
Normal file
46
tcp_server/src/lib/asr/models/gpu/config.json
Normal file
46
tcp_server/src/lib/asr/models/gpu/config.json
Normal file
@ -0,0 +1,46 @@
|
||||
{
|
||||
"alignment_heads": [
|
||||
[1, 0],
|
||||
[1, 1],
|
||||
[1, 2],
|
||||
[1, 3],
|
||||
[1, 4],
|
||||
[1, 5],
|
||||
[1, 6],
|
||||
[1, 7],
|
||||
[1, 8],
|
||||
[1, 9],
|
||||
[1, 10],
|
||||
[1, 11],
|
||||
[1, 12],
|
||||
[1, 13],
|
||||
[1, 14],
|
||||
[1, 15],
|
||||
[1, 16],
|
||||
[1, 17],
|
||||
[1, 18],
|
||||
[1, 19]
|
||||
],
|
||||
"lang_ids": [
|
||||
50259, 50260, 50261, 50262, 50263, 50264, 50265, 50266, 50267, 50268, 50269,
|
||||
50270, 50271, 50272, 50273, 50274, 50275, 50276, 50277, 50278, 50279, 50280,
|
||||
50281, 50282, 50283, 50284, 50285, 50286, 50287, 50288, 50289, 50290, 50291,
|
||||
50292, 50293, 50294, 50295, 50296, 50297, 50298, 50299, 50300, 50301, 50302,
|
||||
50303, 50304, 50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313,
|
||||
50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321, 50322, 50323, 50324,
|
||||
50325, 50326, 50327, 50328, 50329, 50330, 50331, 50332, 50333, 50334, 50335,
|
||||
50336, 50337, 50338, 50339, 50340, 50341, 50342, 50343, 50344, 50345, 50346,
|
||||
50347, 50348, 50349, 50350, 50351, 50352, 50353, 50354, 50355, 50356, 50357,
|
||||
50358
|
||||
],
|
||||
"suppress_ids": [
|
||||
1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90,
|
||||
91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853,
|
||||
1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585,
|
||||
6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793,
|
||||
14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520,
|
||||
26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425,
|
||||
49870, 50254, 50258, 50359, 50360, 50361, 50362, 50363
|
||||
],
|
||||
"suppress_ids_begin": [220, 50257]
|
||||
}
|
14
tcp_server/src/lib/asr/models/gpu/preprocessor_config.json
Normal file
14
tcp_server/src/lib/asr/models/gpu/preprocessor_config.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"chunk_length": 30,
|
||||
"feature_extractor_type": "WhisperFeatureExtractor",
|
||||
"feature_size": 128,
|
||||
"hop_length": 160,
|
||||
"n_fft": 400,
|
||||
"n_samples": 480000,
|
||||
"nb_max_frames": 3000,
|
||||
"padding_side": "right",
|
||||
"padding_value": 0.0,
|
||||
"processor_class": "WhisperProcessor",
|
||||
"return_attention_mask": false,
|
||||
"sampling_rate": 16000
|
||||
}
|
114849
tcp_server/src/lib/asr/models/gpu/tokenizer.json
Normal file
114849
tcp_server/src/lib/asr/models/gpu/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
51868
tcp_server/src/lib/asr/models/gpu/vocabulary.json
Normal file
51868
tcp_server/src/lib/asr/models/gpu/vocabulary.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -23,4 +23,8 @@ TTS_MODEL_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, TTS_MODEL_FILE_NAME)
|
||||
IS_TTS_ENABLED = os.environ.get('LEON_TTS', 'true') == 'true'
|
||||
|
||||
# ASR
|
||||
ASR_LIB_PATH = os.path.join(LIB_PATH, 'asr')
|
||||
ASR_MODEL_FOLDER_PATH = os.path.join(ASR_LIB_PATH, 'models')
|
||||
ASR_MODEL_PATH_FOR_GPU = os.path.join(ASR_MODEL_FOLDER_PATH, 'gpu')
|
||||
ASR_MODEL_PATH_FOR_CPU = os.path.join(ASR_MODEL_FOLDER_PATH, 'cpu')
|
||||
IS_ASR_ENABLED = os.environ.get('LEON_STT', 'true') == 'true'
|
||||
|
@ -7,9 +7,15 @@ import re
|
||||
import string
|
||||
|
||||
import lib.nlp as nlp
|
||||
from .asr import ASR
|
||||
from .asr.api import ASR
|
||||
from .tts.api import TTS
|
||||
from .constants import TTS_MODEL_CONFIG_PATH, TTS_MODEL_PATH, IS_TTS_ENABLED, TMP_PATH, IS_ASR_ENABLED
|
||||
from .constants import (
|
||||
TTS_MODEL_CONFIG_PATH,
|
||||
TTS_MODEL_PATH,
|
||||
IS_TTS_ENABLED,
|
||||
TMP_PATH,
|
||||
IS_ASR_ENABLED
|
||||
)
|
||||
|
||||
|
||||
class TCPServer:
|
||||
@ -98,7 +104,6 @@ class TCPServer:
|
||||
}
|
||||
})
|
||||
|
||||
# TODO: local model path
|
||||
self.asr = ASR(device='auto',
|
||||
transcription_callback=transcription_callback,
|
||||
wake_word_callback=wake_word_callback,
|
||||
|
Loading…
Reference in New Issue
Block a user