1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-08-17 06:00:33 +03:00

feat(python tcp server): embed new text-to-speech engine in TCP server binary

This commit is contained in:
louistiti 2024-05-18 01:11:12 +08:00
parent e455a9d96b
commit a7cab344f8
No known key found for this signature in database
GPG Key ID: 92CD6A2E497E1669
15 changed files with 77 additions and 78 deletions

View File

@ -54,7 +54,7 @@
"build:nodejs-bridge": "tsx scripts/build-binaries.js nodejs-bridge",
"build:python-bridge": "tsx scripts/build-binaries.js python-bridge",
"build:tcp-server": "tsx scripts/build-binaries.js tcp-server",
"start:tcp-server": "cross-env PIPENV_PIPFILE=tcp_server/src/Pipfile pipenv run python tcp_server/src/main.py",
"start:tcp-server": "cross-env HF_HUB_VERBOSITY=debug PIPENV_PIPFILE=tcp_server/src/Pipfile pipenv run python tcp_server/src/main.py",
"start": "cross-env LEON_NODE_ENV=production node server/dist/pre-check.js && node server/dist/index.js",
"python-bridge": "cross-env PIPENV_PIPFILE=bridges/python/src/Pipfile pipenv run python bridges/python/src/main.py server/src/intent-object.sample.json",
"train": "tsx scripts/train/run-train.js",

View File

@ -19,7 +19,6 @@ import {
} from '@/constants'
import { OSTypes } from '@/types'
import { LogHelper } from '@/helpers/log-helper'
import { LoaderHelper } from '@/helpers/loader-helper'
import { SystemHelper } from '@/helpers/system-helper'
/**
@ -57,8 +56,6 @@ BUILD_TARGETS.set('tcp-server', {
dotVenvPath: path.join(PYTHON_TCP_SERVER_SRC_PATH, '.venv')
})
;(async () => {
LoaderHelper.start()
const { argv } = process
const givenBuildTarget = argv[2].toLowerCase()
@ -117,8 +114,12 @@ BUILD_TARGETS.set('tcp-server', {
process.env.PIPENV_PIPFILE = pipfilePath
process.env.PIPENV_VENV_IN_PROJECT = true
/**
* cx_Freeze usage
* @see https://cx-freeze.readthedocs.io/en/latest/setup_script.html#build-exe
*/
await command(
`pipenv run python ${setupFilePath} build --build-exe ${buildPath}`,
`pipenv run python ${setupFilePath} build_exe --build-exe ${buildPath}`,
{
shell: true,
stdio: 'inherit'

View File

@ -131,26 +131,37 @@ SPACY_MODELS.set('fr', {
const hasDotVenv = fs.existsSync(dotVenvPath)
const { type: osType, cpuArchitecture } = SystemHelper.getInformation()
/**
* Install PyTorch nightly to support CUDA 12.4
* as it is required by the latest NVIDIA drivers for CUDA runtime APIs
* Install PyTorch with CUDA support
* as it is required by the latest NVIDIA drivers for CUDA runtime APIs.
* PyTorch will automatically download nvidia-* packages and bundle them.
*
* It is important to specify the "--ignore-installed" flag to make sure the
* "~/.pyenv/versions/3.9.10/lib/python3.9/site-packages" is not used in case
* NVIDIA deps are already installed. Otherwise, it won't install it in our
* TCP server .venv as it is already installed (satisfied) in
* the path mentioned above
*
* @see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
* @see https://pytorch.org/get-started/locally/
* @see https://stackoverflow.com/a/76972265/1768162
* @see https://docs.nvidia.com/deeplearning/cudnn/latest/reference/support-matrix.html
*/
const installPytorch = async () => {
LogHelper.info('Installing PyTorch nightly with CUDA support...')
LogHelper.info('Installing PyTorch with CUDA support...')
try {
await command(
'pipenv run pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124',
{
shell: true,
stdio: 'inherit'
}
)
LogHelper.success('PyTorch nightly with CUDA support installed')
// There is no CUDA support on macOS
const commandToExecute =
osType === OSTypes.MacOS
? 'pipenv run pip install --ignore-installed torch==2.3.0'
: 'pipenv run pip install --ignore-installed torch==2.3.0 --index-url https://download.pytorch.org/whl/cu121'
await command(commandToExecute, {
shell: true,
stdio: 'inherit'
})
LogHelper.success('PyTorch with CUDA support installed')
} catch (e) {
LogHelper.error(
`Failed to install PyTorch nightly with CUDA support: ${e}`
)
LogHelper.error(`Failed to install PyTorch with CUDA support: ${e}`)
process.exit(1)
}
}

View File

@ -10,21 +10,16 @@ python_version = "3.9.10"
# Setup
setuptools = "*"
wheel = "*"
# Build
cx-freeze = "==6.11.1"
cx-freeze = "==7.0.0"
# Common
python-dotenv = "==0.19.2"
# TCP server
spacy = "==3.5.4"
geonamescache = "==1.6.0"
# TCP server; TTS
# PyTorch is installed via the setup script
# torch = "*"
# TTS
transformers = "==4.27.4"
g2p-en = "==2.1.0"

View File

@ -1,6 +1,9 @@
import os
import sys
SRC_PATH = os.path.join(os.getcwd(), 'tcp_server', 'src')
IS_RAN_FROM_BINARY = getattr(sys, 'frozen', False)
SRC_PATH = os.path.join(os.getcwd(), 'tcp_server', 'src') if not IS_RAN_FROM_BINARY else '.'
# TTS
TTS_MODEL_VERSION = 'V1'

View File

@ -1,6 +1,7 @@
import copy
from sys import argv
import spacy
import time
from geonamescache import GeonamesCache
lang = argv[1] or 'en'
@ -41,9 +42,12 @@ def load_spacy_model() -> None:
model = spacy_model_mapping[lang]['model']
exclude = spacy_model_mapping[lang]['exclude']
tic = time.perf_counter()
log(f'Loading {model} spaCy model...')
spacy_nlp = spacy.load(model, exclude=exclude)
log('spaCy model loaded')
toc = time.perf_counter()
log(f"Time taken to load spaCy model: {toc - tic:0.4f} seconds")
def delete_unneeded_country_data(data: dict) -> None:

View File

@ -2,6 +2,7 @@ import socket
import json
import os
from typing import Union
import time
import lib.nlp as nlp
from .tts.api import TTS
@ -47,7 +48,11 @@ class TCPServer:
output_path = 'output.wav'
speed = 1.0
tic = time.perf_counter()
self.tts.tts_to_file(text, speaker_ids['EN-Leon-V1'], output_path, speed=speed)
toc = time.perf_counter()
self.log(f"Time taken to generate audio: {toc - tic:0.4f} seconds")
def init(self):
# Make sure to establish TCP connection by reusing the address so it does not conflict with port already in use

View File

@ -4,11 +4,14 @@ import numpy as np
import torch.nn as nn
from tqdm import tqdm
import torch
import time
from . import utils
from .models import SynthesizerTrn
from .split_utils import split_sentence
# torch.backends.cudnn.enabled = False
class TTS(nn.Module):
def __init__(self,
language,
@ -18,6 +21,7 @@ class TTS(nn.Module):
ckpt_path=None):
super().__init__()
tic = time.perf_counter()
self.log('Loading model...')
if device == 'auto':
@ -62,6 +66,9 @@ class TTS(nn.Module):
self.language = 'ZH_MIX_EN' if language == 'ZH' else language # we support a ZH_MIX_EN model
self.log('Model loaded')
toc = time.perf_counter()
self.log(f"Time taken to load model: {toc - tic:0.4f} seconds")
@staticmethod
def audio_numpy_concat(segment_data_list, sr, speed=1.):

View File

@ -24,16 +24,6 @@ class LayerNorm(nn.Module):
return x.transpose(1, -1)
@torch.jit.script
def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
n_channels_int = n_channels[0]
in_act = input_a + input_b
t_act = torch.tanh(in_act[:, :n_channels_int, :])
s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
acts = t_act * s_act
return acts
class Encoder(nn.Module):
def __init__(
self,

View File

@ -97,16 +97,6 @@ def subsequent_mask(length):
return mask
@torch.jit.script
def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
n_channels_int = n_channels[0]
in_act = input_a + input_b
t_act = torch.tanh(in_act[:, :n_channels_int, :])
s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
acts = t_act * s_act
return acts
def convert_pad_shape(pad_shape):
layer = pad_shape[::-1]
pad_shape = [item for sublist in layer for item in sublist]

View File

@ -182,33 +182,6 @@ class WN(torch.nn.Module):
res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight")
self.res_skip_layers.append(res_skip_layer)
def forward(self, x, x_mask, g=None, **kwargs):
output = torch.zeros_like(x)
n_channels_tensor = torch.IntTensor([self.hidden_channels])
if g is not None:
g = self.cond_layer(g)
for i in range(self.n_layers):
x_in = self.in_layers[i](x)
if g is not None:
cond_offset = i * 2 * self.hidden_channels
g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
else:
g_l = torch.zeros_like(x_in)
acts = commons.fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor)
acts = self.drop(acts)
res_skip_acts = self.res_skip_layers[i](acts)
if i < self.n_layers - 1:
res_acts = res_skip_acts[:, : self.hidden_channels, :]
x = (x + res_acts) * x_mask
output = output + res_skip_acts[:, self.hidden_channels :, :]
else:
output = output + res_skip_acts
return output * x_mask
def remove_weight_norm(self):
if self.gin_channels != 0:
torch.nn.utils.remove_weight_norm(self.cond_layer)

View File

@ -2,6 +2,7 @@ import pickle
import os
import re
from g2p_en import G2p
from transformers import AutoTokenizer
from . import symbols
@ -9,7 +10,6 @@ from .english_utils.abbreviations import expand_abbreviations
from .english_utils.time_norm import expand_time_english
from .english_utils.number_norm import normalize_numbers
from transformers import AutoTokenizer
current_file_path = os.path.dirname(__file__)
CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")

View File

@ -2,6 +2,7 @@ import torch
from transformers import AutoTokenizer, AutoModelForMaskedLM
import sys
model_id = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = None

View File

@ -1,6 +1,7 @@
from transformers import AutoTokenizer
from .fr_phonemizer import cleaner as fr_cleaner
from .fr_phonemizer import fr_to_ipa
from transformers import AutoTokenizer
def distribute_phone(n_phone, n_word):

View File

@ -1,27 +1,45 @@
from cx_Freeze import setup, Executable
import sysconfig
import sys
from version import __version__
"""
Increase the recursion limit to avoid RecursionError
@see: https://github.com/marcelotduarte/cx_Freeze/issues/2240
"""
sys.setrecursionlimit(sys.getrecursionlimit() * 10)
"""
Instead of injecting everything from a package,
it's recommended to only include the necessary files via the
"include_files" property.
"""
options = {
'build_exe': {
'packages': [
'spacy',
'torch',
'en_core_web_trf',
'fr_core_news_md'
'fr_core_news_md',
'pycrfsuite'
],
'includes': [
'srsly.msgpack.util',
'blis',
'cymem'
],
'include_files': [
('tcp_server/src/.venv/lib/python3.9/site-packages/nvidia/cudnn/lib', 'lib/nvidia/cudnn/lib')
]
}
}
# Include private libraries from the tokenizers package for Linux
if 'linux' in sysconfig.get_platform():
options['build_exe']['include_files'] = [('tcp_server/src/.venv/lib/python3.9/site-packages/tokenizers.libs', 'lib/tokenizers.libs')]
options['build_exe']['include_files'] = [
*options['build_exe']['include_files'],
('tcp_server/src/.venv/lib/python3.9/site-packages/tokenizers.libs', 'lib/tokenizers.libs')
]
executables = [
Executable(