1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-09-11 10:25:40 +03:00

feat: Apple Silicon support for voice models

This commit is contained in:
louistiti 2024-06-18 09:02:09 +08:00
parent c9afb25ed3
commit 32dc3ced0b
25 changed files with 295 additions and 320654 deletions

8
.gitignore vendored
View File

@ -24,21 +24,17 @@ debug.log
leon.json leon.json
bridges/python/src/Pipfile.lock bridges/python/src/Pipfile.lock
tcp_server/src/Pipfile.lock tcp_server/src/Pipfile.lock
tcp_server/src/lib/tts/models/*.pth
tcp_server/src/lib/tts/models/**/*.bin
tcp_server/src/lib/tts/models/**/*.json
tcp_server/src/lib/tts/models/**/*.txt
tcp_server/src/lib/asr/models/**/*.bin
!tcp_server/**/.gitkeep !tcp_server/**/.gitkeep
!bridges/python/**/.gitkeep !bridges/python/**/.gitkeep
!bridges/nodejs/**/.gitkeep !bridges/nodejs/**/.gitkeep
!core/data/models/llm/.gitkeep !core/data/models/**/.gitkeep
!**/*.sample* !**/*.sample*
skills/**/src/settings.json skills/**/src/settings.json
skills/**/memory/*.json skills/**/memory/*.json
core/data/models/*.nlp core/data/models/*.nlp
core/data/models/*.json core/data/models/*.json
core/data/models/llm/* core/data/models/llm/*
core/data/models/audio/**/*
package.json.backup package.json.backup
.python-version .python-version
schemas/**/*.json schemas/**/*.json

View File

@ -9,11 +9,7 @@ export class Settings<T extends Record<string, unknown>> {
constructor() { constructor() {
this.settingsPath = path.join(SKILL_PATH, 'src', 'settings.json') this.settingsPath = path.join(SKILL_PATH, 'src', 'settings.json')
this.settingsSamplePath = path.join( this.settingsSamplePath = path.join(SKILL_PATH, 'src', 'settings.json')
SKILL_PATH,
'src',
'settings.sample.json'
)
} }
/** /**
@ -32,7 +28,7 @@ export class Settings<T extends Record<string, unknown>> {
} }
/** /**
* Clear the settings and set it to the default settings.sample.json file * Clear the settings and set it to the default settings.json file
* @example clear() * @example clear()
*/ */
public async clear(): Promise<void> { public async clear(): Promise<void> {

View File

@ -9,7 +9,7 @@ from ..constants import SKILL_PATH
class Settings: class Settings:
def __init__(self): def __init__(self):
self.settings_path = path.join(SKILL_PATH, 'src', 'settings.json') self.settings_path = path.join(SKILL_PATH, 'src', 'settings.json')
self.settings_sample_path = path.join(SKILL_PATH, 'src', 'settings.sample.json') self.settings_sample_path = path.join(SKILL_PATH, 'src', 'settings.json')
def is_setting_set(self, key: str) -> bool: def is_setting_set(self, key: str) -> bool:
""" """
@ -23,7 +23,7 @@ class Settings:
def clear(self) -> None: def clear(self) -> None:
""" """
Clear the settings and set it to the default settings.sample.json file Clear the settings and set it to the default settings.json file
""" """
settings_sample = self.get_settings_sample() settings_sample = self.get_settings_sample()
self.set(settings_sample) self.set(settings_sample)

View File

@ -1,6 +1,5 @@
import fs from 'node:fs' import fs from 'node:fs'
import path from 'node:path' import path from 'node:path'
import stream from 'node:stream'
import { command } from 'execa' import { command } from 'execa'
@ -10,23 +9,12 @@ import {
FR_SPACY_MODEL_NAME, FR_SPACY_MODEL_NAME,
FR_SPACY_MODEL_VERSION, FR_SPACY_MODEL_VERSION,
PYTHON_BRIDGE_SRC_PATH, PYTHON_BRIDGE_SRC_PATH,
PYTHON_TCP_SERVER_SRC_PATH, PYTHON_TCP_SERVER_SRC_PATH
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH,
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU,
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU,
// PYTHON_TCP_SERVER_SRC_TTS_BERT_FRENCH_DIR_PATH,
PYTHON_TCP_SERVER_SRC_TTS_BERT_BASE_DIR_PATH,
// PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_TTS_BERT_BASE_MODEL_HF_PREFIX_DOWNLOAD_URL
} from '@/constants' } from '@/constants'
import { CPUArchitectures, OSTypes } from '@/types' import { CPUArchitectures, OSTypes } from '@/types'
import { LogHelper } from '@/helpers/log-helper' import { LogHelper } from '@/helpers/log-helper'
import { LoaderHelper } from '@/helpers/loader-helper' import { LoaderHelper } from '@/helpers/loader-helper'
import { SystemHelper } from '@/helpers/system-helper' import { SystemHelper } from '@/helpers/system-helper'
import { FileHelper } from '@/helpers/file-helper'
/** /**
* Set up development environment according to the given setup target * Set up development environment according to the given setup target
@ -54,32 +42,6 @@ function getModelInstallationFileUrl(model, mirror = undefined) {
return `${urlPrefix}/${name}-${version}/${name}-${version}-${suffix}` return `${urlPrefix}/${name}-${version}/${name}-${version}-${suffix}`
} }
const ASR_GPU_MODEL_FILES = [
'model.bin',
'config.json',
'preprocessor_config.json',
'tokenizer.json',
'vocabulary.json'
]
const ASR_CPU_MODEL_FILES = [
'model.bin',
'config.json',
'tokenizer.json',
'vocabulary.txt'
]
/*const TTS_BERT_FRENCH_MODEL_FILES = [
'pytorch_model.bin', // Not needed? Compare with HF auto download in ~/.cache/huggingface/hub...
'config.json',
'vocab.txt',
'tokenizer_config.json'
]*/
const TTS_BERT_BASE_MODEL_FILES = [
'pytorch_model.bin',
'config.json',
'vocab.txt',
'tokenizer_config.json',
'tokenizer.json'
]
const SETUP_TARGETS = new Map() const SETUP_TARGETS = new Map()
const SPACY_MODELS = new Map() const SPACY_MODELS = new Map()
@ -342,141 +304,6 @@ SPACY_MODELS.set('fr', {
process.exit(1) process.exit(1)
} }
} }
const installTTSModel = async () => {
try {
LogHelper.info('Installing TTS model...')
const destPath = fs.createWriteStream(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH
)
LogHelper.info(`Downloading TTS model...`)
const response = await FileHelper.downloadFile(
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`TTS model downloaded at ${destPath.path}`)
} catch (e) {
LogHelper.error(`Failed to install TTS model: ${e}`)
process.exit(1)
}
}
const installASRModelForGPU = async () => {
try {
LogHelper.info('Installing ASR model for GPU...')
for (const modelFile of ASR_GPU_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('ASR model for GPU installed')
} catch (e) {
LogHelper.error(`Failed to install ASR model for GPU: ${e}`)
process.exit(1)
}
}
const installASRModelForCPU = async () => {
try {
LogHelper.info('Installing ASR model for CPU...')
for (const modelFile of ASR_CPU_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('ASR model for CPU installed')
} catch (e) {
LogHelper.error(`Failed to install ASR model for CPU: ${e}`)
process.exit(1)
}
}
/*const installTTSBERTFrenchModel = async () => {
try {
LogHelper.info('Installing TTS BERT French model...')
for (const modelFile of TTS_BERT_FRENCH_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_SRC_TTS_BERT_FRENCH_DIR_PATH, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('TTS BERT French model installed')
} catch (e) {
LogHelper.error(`Failed to install TTS BERT French model: ${e}`)
process.exit(1)
}
}*/
const installTTSBERTBaseModel = async () => {
try {
LogHelper.info('Installing TTS BERT base model...')
for (const modelFile of TTS_BERT_BASE_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_TTS_BERT_BASE_MODEL_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_SRC_TTS_BERT_BASE_DIR_PATH, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('TTS BERT base model installed')
} catch (e) {
LogHelper.error(`Failed to install TTS BERT base model: ${e}`)
process.exit(1)
}
}
LogHelper.info('Checking whether all spaCy models are installed...') LogHelper.info('Checking whether all spaCy models are installed...')
@ -501,84 +328,6 @@ SPACY_MODELS.set('fr', {
LogHelper.info('Not all spaCy models are installed') LogHelper.info('Not all spaCy models are installed')
await installSpacyModels() await installSpacyModels()
} }
LogHelper.info(
'Checking whether TTS BERT base language model files are downloaded...'
)
const areTTSBERTBaseFilesDownloaded = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_SRC_TTS_BERT_BASE_DIR_PATH,
TTS_BERT_BASE_MODEL_FILES[TTS_BERT_BASE_MODEL_FILES.length - 1]
)
)
if (!areTTSBERTBaseFilesDownloaded) {
LogHelper.info('TTS BERT base language model files not downloaded')
await installTTSBERTBaseModel()
} else {
LogHelper.success(
'TTS BERT base language model files are already downloaded'
)
}
// TODO: later when multiple languages are supported
/*LogHelper.info(
'Checking whether TTS BERT French language model files are downloaded...'
)
const areTTSBERTFrenchFilesDownloaded = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_SRC_TTS_BERT_FRENCH_DIR_PATH,
TTS_BERT_FRENCH_MODEL_FILES[TTS_BERT_FRENCH_MODEL_FILES.length - 1]
)
)
if (!areTTSBERTFrenchFilesDownloaded) {
LogHelper.info('TTS BERT French language model files not downloaded')
await installTTSBERTFrenchModel()
} else {
LogHelper.success(
'TTS BERT French language model files are already downloaded'
)
}*/
LogHelper.info('Checking whether the TTS model is installed...')
const isTTSModelInstalled = fs.existsSync(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH
)
if (!isTTSModelInstalled) {
LogHelper.info('TTS model is not installed')
await installTTSModel()
} else {
LogHelper.success('TTS model is already installed')
}
LogHelper.info('Checking whether the ASR model for GPU is installed...')
// Check if model.bin file exists in directory (last file in the list)
const isASRModelForGPUInstalled = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU,
ASR_GPU_MODEL_FILES[ASR_GPU_MODEL_FILES.length - 1]
)
)
if (!isASRModelForGPUInstalled) {
LogHelper.info('ASR model for GPU is not installed')
await installASRModelForGPU()
} else {
LogHelper.success('ASR model for GPU is already installed')
}
LogHelper.info('Checking whether the ASR model for CPU is installed...')
// Check if model.bin file exists in directory (last file in the list)
const isASRModelForCPUInstalled = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU,
ASR_CPU_MODEL_FILES[ASR_CPU_MODEL_FILES.length - 1]
)
)
if (!isASRModelForCPUInstalled) {
LogHelper.info('ASR model for CPU is not installed')
await installASRModelForCPU()
} else {
LogHelper.success('ASR model for CPU is already installed')
}
} }
LogHelper.success(`${setupTarget} development environment ready`) LogHelper.success(`${setupTarget} development environment ready`)

View File

@ -11,7 +11,7 @@ import { LogHelper } from '@/helpers/log-helper'
export default async function (skillFriendlyName, currentSkill) { export default async function (skillFriendlyName, currentSkill) {
const skillSrcPath = path.join(currentSkill.path, 'src') const skillSrcPath = path.join(currentSkill.path, 'src')
const settingsPath = path.join(skillSrcPath, 'settings.json') const settingsPath = path.join(skillSrcPath, 'settings.json')
const settingsSamplePath = path.join(skillSrcPath, 'settings.sample.json') const settingsSamplePath = path.join(skillSrcPath, 'settings.json')
// If there is a bridge set from the skill settings // If there is a bridge set from the skill settings
if (currentSkill.bridge) { if (currentSkill.bridge) {
@ -61,12 +61,12 @@ export default async function (skillFriendlyName, currentSkill) {
} }
} }
} else if (!fs.existsSync(settingsSamplePath)) { } else if (!fs.existsSync(settingsSamplePath)) {
// Stop the setup if the settings.sample.json of the current skill does not exist // Stop the setup if the settings.json of the current skill does not exist
LogHelper.error( LogHelper.error(
`The "${skillFriendlyName}" skill settings file does not exist. Try to pull the project (git pull)` `The "${skillFriendlyName}" skill settings file does not exist. Try to pull the project (git pull)`
) )
} else { } else {
// Duplicate settings.sample.json of the current skill to settings.json // Duplicate settings.json of the current skill to settings.json
fs.createReadStream(settingsSamplePath).pipe( fs.createReadStream(settingsSamplePath).pipe(
fs.createWriteStream(`${skillSrcPath}/settings.json`) fs.createWriteStream(`${skillSrcPath}/settings.json`)
) )

View File

@ -0,0 +1,206 @@
import fs from 'node:fs'
import path from 'node:path'
import stream from 'node:stream'
import {
PYTHON_TCP_SERVER_TTS_BERT_BASE_DIR_PATH,
// PYTHON_TCP_SERVER_TTS_BERT_FRENCH_DIR_PATH,
// PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_TTS_MODEL_PATH,
PYTHON_TCP_SERVER_ASR_MODEL_DIR_PATH,
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
PYTHON_TCP_SERVER_ASR_MODEL_HF_PREFIX_DOWNLOAD_URL,
PYTHON_TCP_SERVER_TTS_BERT_BASE_MODEL_HF_PREFIX_DOWNLOAD_URL
} from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
import { FileHelper } from '@/helpers/file-helper'
const ASR_MODEL_FILES = [
'model.bin',
'config.json',
'preprocessor_config.json',
'tokenizer.json',
'vocabulary.json'
]
/*const TTS_BERT_FRENCH_MODEL_FILES = [
'pytorch_model.bin', // Not needed? Compare with HF auto download in ~/.cache/huggingface/hub...
'config.json',
'vocab.txt',
'tokenizer_config.json'
]*/
const TTS_BERT_BASE_MODEL_FILES = [
'pytorch_model.bin',
'config.json',
'vocab.txt',
'tokenizer_config.json',
'tokenizer.json'
]
async function installTTSModel() {
try {
LogHelper.info('Installing TTS model...')
const destPath = fs.createWriteStream(PYTHON_TCP_SERVER_TTS_MODEL_PATH)
LogHelper.info(`Downloading TTS model...`)
const response = await FileHelper.downloadFile(
PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`TTS model downloaded at ${destPath.path}`)
} catch (e) {
LogHelper.error(`Failed to install TTS model: ${e}`)
process.exit(1)
}
}
async function installASRModel() {
try {
LogHelper.info('Installing ASR model...')
for (const modelFile of ASR_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_ASR_MODEL_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_ASR_MODEL_DIR_PATH, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('ASR model installed')
} catch (e) {
LogHelper.error(`Failed to install ASR model: ${e}`)
process.exit(1)
}
}
/*async function installTTSBERTFrenchModel() {
try {
LogHelper.info('Installing TTS BERT French model...')
for (const modelFile of TTS_BERT_FRENCH_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_TTS_BERT_FRENCH_DIR_PATH, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('TTS BERT French model installed')
} catch (e) {
LogHelper.error(`Failed to install TTS BERT French model: ${e}`)
process.exit(1)
}
}*/
async function installTTSBERTBaseModel() {
try {
LogHelper.info('Installing TTS BERT base model...')
for (const modelFile of TTS_BERT_BASE_MODEL_FILES) {
const modelInstallationFileURL = `${PYTHON_TCP_SERVER_TTS_BERT_BASE_MODEL_HF_PREFIX_DOWNLOAD_URL}/${modelFile}?download=true`
const destPath = fs.createWriteStream(
path.join(PYTHON_TCP_SERVER_TTS_BERT_BASE_DIR_PATH, modelFile)
)
LogHelper.info(`Downloading ${modelFile}...`)
const response = await FileHelper.downloadFile(
modelInstallationFileURL,
'stream'
)
response.data.pipe(destPath)
await stream.promises.finished(destPath)
LogHelper.success(`${modelFile} downloaded at ${destPath.path}`)
}
LogHelper.success('TTS BERT base model installed')
} catch (e) {
LogHelper.error(`Failed to install TTS BERT base model: ${e}`)
process.exit(1)
}
}
export default async () => {
LogHelper.info(
'Checking whether TTS BERT base language model files are downloaded...'
)
const areTTSBERTBaseFilesDownloaded = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_TTS_BERT_BASE_DIR_PATH,
TTS_BERT_BASE_MODEL_FILES[TTS_BERT_BASE_MODEL_FILES.length - 1]
)
)
if (!areTTSBERTBaseFilesDownloaded) {
LogHelper.info('TTS BERT base language model files not downloaded')
await installTTSBERTBaseModel()
} else {
LogHelper.success(
'TTS BERT base language model files are already downloaded'
)
}
// TODO: later when multiple languages are supported
/*LogHelper.info(
'Checking whether TTS BERT French language model files are downloaded...'
)
const areTTSBERTFrenchFilesDownloaded = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_TTS_BERT_FRENCH_DIR_PATH,
TTS_BERT_FRENCH_MODEL_FILES[TTS_BERT_FRENCH_MODEL_FILES.length - 1]
)
)
if (!areTTSBERTFrenchFilesDownloaded) {
LogHelper.info('TTS BERT French language model files not downloaded')
await installTTSBERTFrenchModel()
} else {
LogHelper.success(
'TTS BERT French language model files are already downloaded'
)
}*/
LogHelper.info('Checking whether the TTS model is installed...')
const isTTSModelInstalled = fs.existsSync(PYTHON_TCP_SERVER_TTS_MODEL_PATH)
if (!isTTSModelInstalled) {
LogHelper.info('TTS model is not installed')
await installTTSModel()
} else {
LogHelper.success('TTS model is already installed')
}
LogHelper.info('Checking whether the ASR model is installed...')
// Check if model.bin file exists in directory (last file in the list)
const isASRModelInstalled = fs.existsSync(
path.join(
PYTHON_TCP_SERVER_ASR_MODEL_DIR_PATH,
ASR_MODEL_FILES[ASR_MODEL_FILES.length - 1]
)
)
if (!isASRModelInstalled) {
LogHelper.info('ASR model is not installed')
await installASRModel()
} else {
LogHelper.success('ASR model is already installed')
}
}

View File

@ -11,6 +11,7 @@ import setupCore from './setup-core'
import setupSkills from './setup-skills/setup-skills' import setupSkills from './setup-skills/setup-skills'
import setupLLM from './setup-llm' import setupLLM from './setup-llm'
import setupBinaries from './setup-binaries' import setupBinaries from './setup-binaries'
import setupTCPServerModels from './setup-tcp-server-models'
import createInstanceID from './create-instance-id' import createInstanceID from './create-instance-id'
// Do not load ".env" file because it is not created yet // Do not load ".env" file because it is not created yet
@ -32,6 +33,7 @@ import createInstanceID from './create-instance-id'
} }
await setupBinaries() await setupBinaries()
await setupTCPServerModels()
await generateHTTPAPIKey() await generateHTTPAPIKey()
await generateJSONSchemas() await generateJSONSchemas()
LoaderHelper.start() LoaderHelper.start()

View File

@ -15,6 +15,37 @@ const TESTING_ENV = 'testing'
export const GITHUB_URL = 'https://github.com/leon-ai/leon' export const GITHUB_URL = 'https://github.com/leon-ai/leon'
/**
* Environments
*/
export const LEON_NODE_ENV = process.env['LEON_NODE_ENV'] || PRODUCTION_ENV
export const IS_PRODUCTION_ENV = LEON_NODE_ENV === PRODUCTION_ENV
export const IS_DEVELOPMENT_ENV = LEON_NODE_ENV === DEVELOPMENT_ENV
export const IS_TESTING_ENV = LEON_NODE_ENV === TESTING_ENV
/**
* Paths
*/
export const BIN_PATH = path.join(process.cwd(), 'bin')
export const LOGS_PATH = path.join(process.cwd(), 'logs')
export const SKILLS_PATH = path.join(process.cwd(), 'skills')
export const GLOBAL_DATA_PATH = path.join(process.cwd(), 'core', 'data')
export const MODELS_PATH = path.join(GLOBAL_DATA_PATH, 'models')
export const AUDIO_MODELS_PATH = path.join(MODELS_PATH, 'audio')
export const VOICE_CONFIG_PATH = path.join(
process.cwd(),
'core',
'config',
'voice'
)
export const SERVER_PATH = path.join(
process.cwd(),
'server',
IS_PRODUCTION_ENV ? 'dist' : 'src'
)
export const TMP_PATH = path.join(SERVER_PATH, 'tmp')
export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
/** /**
* Binaries / distribution * Binaries / distribution
*/ */
@ -46,52 +77,43 @@ export const PYTHON_TCP_SERVER_SRC_PATH = path.join(
PYTHON_TCP_SERVER_ROOT_PATH, PYTHON_TCP_SERVER_ROOT_PATH,
'src' 'src'
) )
export const PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME = export const PYTHON_TCP_SERVER_SETTINGS_PATH = path.join(
'EN-Leon-V1_1-G_600000.pth' PYTHON_TCP_SERVER_ROOT_PATH,
export const PYTHON_TCP_SERVER_SRC_TTS_MODEL_DIR_PATH = path.join( 'settings.json'
PYTHON_TCP_SERVER_SRC_PATH,
'lib',
'tts',
'models'
) )
export const PYTHON_TCP_SERVER_SRC_TTS_MODEL_PATH = path.join( export const PYTHON_TCP_SERVER_SETTINGS = JSON.parse(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_DIR_PATH, fs.readFileSync(PYTHON_TCP_SERVER_SETTINGS_PATH, 'utf8')
PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME
) )
export const PYTHON_TCP_SERVER_SRC_TTS_BERT_FRENCH_DIR_PATH = path.join( export const PYTHON_TCP_SERVER_TTS_MODEL_FILE_NAME =
PYTHON_TCP_SERVER_SRC_TTS_MODEL_DIR_PATH, PYTHON_TCP_SERVER_SETTINGS.tts.model_file_name
export const PYTHON_TCP_SERVER_TTS_MODEL_DIR_PATH = path.join(
AUDIO_MODELS_PATH,
'tts'
)
export const PYTHON_TCP_SERVER_TTS_MODEL_PATH = path.join(
PYTHON_TCP_SERVER_TTS_MODEL_DIR_PATH,
PYTHON_TCP_SERVER_TTS_MODEL_FILE_NAME
)
export const PYTHON_TCP_SERVER_TTS_BERT_FRENCH_DIR_PATH = path.join(
PYTHON_TCP_SERVER_TTS_MODEL_DIR_PATH,
'bert-base-french-europeana-cased' 'bert-base-french-europeana-cased'
) )
export const PYTHON_TCP_SERVER_SRC_TTS_BERT_BASE_DIR_PATH = path.join( export const PYTHON_TCP_SERVER_TTS_BERT_BASE_DIR_PATH = path.join(
PYTHON_TCP_SERVER_SRC_TTS_MODEL_DIR_PATH, PYTHON_TCP_SERVER_TTS_MODEL_DIR_PATH,
'bert-base-uncased' 'bert-base-uncased'
) )
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH = path.join( export const PYTHON_TCP_SERVER_ASR_MODEL_DIR_PATH = path.join(
PYTHON_TCP_SERVER_SRC_PATH, AUDIO_MODELS_PATH,
'lib', 'asr'
'asr',
'models'
)
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_GPU = path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH,
'gpu'
)
export const PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH_FOR_CPU = path.join(
PYTHON_TCP_SERVER_SRC_ASR_MODEL_PATH,
'cpu'
) )
export const PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL = export const PYTHON_TCP_SERVER_TTS_MODEL_HF_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL( NetworkHelper.setHuggingFaceURL(
`https://huggingface.co/Louistiti/Voice-EN-Leon-V1/resolve/main/${PYTHON_TCP_SERVER_SRC_TTS_MODEL_FILE_NAME}?download=true` `https://huggingface.co/Louistiti/Voice-EN-Leon-V1/resolve/main/${PYTHON_TCP_SERVER_TTS_MODEL_FILE_NAME}?download=true`
) )
export const PYTHON_TCP_SERVER_ASR_MODEL_GPU_HF_PREFIX_DOWNLOAD_URL = export const PYTHON_TCP_SERVER_ASR_MODEL_HF_PREFIX_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL( NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/Systran/faster-distil-whisper-large-v3/resolve/main' 'https://huggingface.co/Systran/faster-distil-whisper-large-v3/resolve/main'
) )
export const PYTHON_TCP_SERVER_ASR_MODEL_CPU_HF_PREFIX_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/Systran/faster-whisper-medium/resolve/main'
)
export const PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL = export const PYTHON_TCP_SERVER_TTS_BERT_FRENCH_MODEL_HF_PREFIX_DOWNLOAD_URL =
NetworkHelper.setHuggingFaceURL( NetworkHelper.setHuggingFaceURL(
'https://huggingface.co/dbmdz/bert-base-french-europeana-cased/resolve/main' 'https://huggingface.co/dbmdz/bert-base-french-europeana-cased/resolve/main'
@ -179,14 +201,6 @@ export const EN_SPACY_MODEL_VERSION = '3.4.0'
export const FR_SPACY_MODEL_NAME = 'fr_core_news_md' export const FR_SPACY_MODEL_NAME = 'fr_core_news_md'
export const FR_SPACY_MODEL_VERSION = '3.4.0' export const FR_SPACY_MODEL_VERSION = '3.4.0'
/**
* Environments
*/
export const LEON_NODE_ENV = process.env['LEON_NODE_ENV'] || PRODUCTION_ENV
export const IS_PRODUCTION_ENV = LEON_NODE_ENV === PRODUCTION_ENV
export const IS_DEVELOPMENT_ENV = LEON_NODE_ENV === DEVELOPMENT_ENV
export const IS_TESTING_ENV = LEON_NODE_ENV === TESTING_ENV
/** /**
* Leon environment preferences * Leon environment preferences
*/ */
@ -215,28 +229,6 @@ export const PYTHON_TCP_SERVER_PORT = Number(
export const IS_TELEMETRY_ENABLED = process.env['LEON_TELEMETRY'] === 'true' export const IS_TELEMETRY_ENABLED = process.env['LEON_TELEMETRY'] === 'true'
/**
* Paths
*/
export const BIN_PATH = path.join(process.cwd(), 'bin')
export const LOGS_PATH = path.join(process.cwd(), 'logs')
export const SKILLS_PATH = path.join(process.cwd(), 'skills')
export const GLOBAL_DATA_PATH = path.join(process.cwd(), 'core', 'data')
export const MODELS_PATH = path.join(GLOBAL_DATA_PATH, 'models')
export const VOICE_CONFIG_PATH = path.join(
process.cwd(),
'core',
'config',
'voice'
)
export const SERVER_PATH = path.join(
process.cwd(),
'server',
IS_PRODUCTION_ENV ? 'dist' : 'src'
)
export const TMP_PATH = path.join(SERVER_PATH, 'tmp')
export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
/** /**
* NLP models paths * NLP models paths
*/ */

View File

@ -3,6 +3,8 @@
"rms_threshold": 196, "rms_threshold": 196,
"device": "auto" "device": "auto"
}, },
"tts": {}, "tts": {
"model_file_name": "EN-Leon-V1_1-G_600000.pth"
},
"wake_word": {} "wake_word": {}
} }

View File

@ -5,12 +5,14 @@ import torch
import numpy as np import numpy as np
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
from ..constants import ASR_MODEL_PATH_FOR_GPU, ASR_MODEL_PATH_FOR_CPU from ..constants import ASR_MODEL_PATH
from ..utils import ThrottledCallback, is_macos, get_settings from ..utils import ThrottledCallback, is_macos, get_settings
class ASR: class ASR:
def __init__(self, def __init__(self,
# @see https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/transcribe.py
# auto, cpu, cuda
device='auto', device='auto',
interrupt_leon_speech_callback=None, interrupt_leon_speech_callback=None,
transcribed_callback=None, transcribed_callback=None,
@ -66,7 +68,7 @@ class ASR:
self.channels = 1 self.channels = 1
self.rate = 16000 self.rate = 16000
self.frames_per_buffer = 1024 self.frames_per_buffer = 1024
self.rms_threshold = get_settings('asr')[''] self.rms_threshold = get_settings('asr')['rms_threshold']
# Duration of silence after which the audio data is considered as a new utterance (in seconds) # Duration of silence after which the audio data is considered as a new utterance (in seconds)
self.silence_duration = 1 self.silence_duration = 1
""" """
@ -80,23 +82,16 @@ class ASR:
self.stream = None self.stream = None
self.model = None self.model = None
model_params = {
'model_size_or_path': ASR_MODEL_PATH,
'device': self.device,
'compute_type': self.compute_type,
'local_files_only': True
}
if self.device == 'cpu': if self.device == 'cpu':
model_path = ASR_MODEL_PATH_FOR_CPU model_params['cpu_threads'] = 4
self.model = WhisperModel(
model_path, self.model = WhisperModel(**model_params)
device=self.device,
compute_type=self.compute_type,
local_files_only=True,
cpu_threads=4
)
else:
model_path = ASR_MODEL_PATH_FOR_GPU
self.model = WhisperModel(
model_path,
device=self.device,
compute_type=self.compute_type,
local_files_only=True
)
self.log('Model loaded') self.log('Model loaded')
toc = time.perf_counter() toc = time.perf_counter()

View File

@ -1,31 +0,0 @@
{
"alignment_heads": [
[13, 15],
[15, 4],
[15, 15],
[16, 1],
[20, 0],
[23, 4]
],
"lang_ids": [
50259, 50260, 50261, 50262, 50263, 50264, 50265, 50266, 50267, 50268, 50269,
50270, 50271, 50272, 50273, 50274, 50275, 50276, 50277, 50278, 50279, 50280,
50281, 50282, 50283, 50284, 50285, 50286, 50287, 50288, 50289, 50290, 50291,
50292, 50293, 50294, 50295, 50296, 50297, 50298, 50299, 50300, 50301, 50302,
50303, 50304, 50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313,
50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321, 50322, 50323, 50324,
50325, 50326, 50327, 50328, 50329, 50330, 50331, 50332, 50333, 50334, 50335,
50336, 50337, 50338, 50339, 50340, 50341, 50342, 50343, 50344, 50345, 50346,
50347, 50348, 50349, 50350, 50351, 50352, 50353, 50354, 50355, 50356, 50357
],
"suppress_ids": [
1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90,
91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853,
1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585,
6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793,
14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520,
26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425,
49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362
],
"suppress_ids_begin": [220, 50257]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,46 +0,0 @@
{
"alignment_heads": [
[1, 0],
[1, 1],
[1, 2],
[1, 3],
[1, 4],
[1, 5],
[1, 6],
[1, 7],
[1, 8],
[1, 9],
[1, 10],
[1, 11],
[1, 12],
[1, 13],
[1, 14],
[1, 15],
[1, 16],
[1, 17],
[1, 18],
[1, 19]
],
"lang_ids": [
50259, 50260, 50261, 50262, 50263, 50264, 50265, 50266, 50267, 50268, 50269,
50270, 50271, 50272, 50273, 50274, 50275, 50276, 50277, 50278, 50279, 50280,
50281, 50282, 50283, 50284, 50285, 50286, 50287, 50288, 50289, 50290, 50291,
50292, 50293, 50294, 50295, 50296, 50297, 50298, 50299, 50300, 50301, 50302,
50303, 50304, 50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313,
50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321, 50322, 50323, 50324,
50325, 50326, 50327, 50328, 50329, 50330, 50331, 50332, 50333, 50334, 50335,
50336, 50337, 50338, 50339, 50340, 50341, 50342, 50343, 50344, 50345, 50346,
50347, 50348, 50349, 50350, 50351, 50352, 50353, 50354, 50355, 50356, 50357,
50358
],
"suppress_ids": [
1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90,
91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853,
1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585,
6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793,
14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520,
26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425,
49870, 50254, 50258, 50359, 50360, 50361, 50362, 50363
],
"suppress_ids_begin": [220, 50257]
}

View File

@ -1,14 +0,0 @@
{
"chunk_length": 30,
"feature_extractor_type": "WhisperFeatureExtractor",
"feature_size": 128,
"hop_length": 160,
"n_fft": 400,
"n_samples": 480000,
"nb_max_frames": 3000,
"padding_side": "right",
"padding_value": 0.0,
"processor_class": "WhisperProcessor",
"return_attention_mask": false,
"sampling_rate": 16000
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -10,24 +10,16 @@ if IS_RAN_FROM_BINARY:
LIB_PATH = os.path.join(os.path.dirname(sys.executable), 'lib', 'lib') LIB_PATH = os.path.join(os.path.dirname(sys.executable), 'lib', 'lib')
TMP_PATH = os.path.join(LIB_PATH, 'tmp') TMP_PATH = os.path.join(LIB_PATH, 'tmp')
AUDIO_MODELS_PATH = os.path.join(os.getcwd(), 'core', 'data', 'models', 'audio')
SETTINGS_PATH = os.path.join(os.getcwd(), 'tcp_server', 'settings.json') SETTINGS_PATH = os.path.join(os.getcwd(), 'tcp_server', 'settings.json')
# TTS # TTS
TTS_MODEL_VERSION = 'V1_1' TTS_MODEL_FOLDER_PATH = os.path.join(AUDIO_MODELS_PATH, 'tts')
TTS_MODEL_ITERATION = '600000'
TTS_MODEL_NAME = f'EN-Leon-{TTS_MODEL_VERSION}-G_{TTS_MODEL_ITERATION}'
TTS_MODEL_FILE_NAME = f'{TTS_MODEL_NAME}.pth'
TTS_LIB_PATH = os.path.join(LIB_PATH, 'tts')
TTS_MODEL_FOLDER_PATH = os.path.join(TTS_LIB_PATH, 'models')
TTS_BERT_FRENCH_MODEL_DIR_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, 'bert-case-french-europeana-cased') TTS_BERT_FRENCH_MODEL_DIR_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, 'bert-case-french-europeana-cased')
TTS_BERT_BASE_MODEL_DIR_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, 'bert-base-uncased') TTS_BERT_BASE_MODEL_DIR_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, 'bert-base-uncased')
TTS_MODEL_CONFIG_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, 'config.json') TTS_MODEL_CONFIG_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, 'config.json')
TTS_MODEL_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, TTS_MODEL_FILE_NAME)
IS_TTS_ENABLED = os.environ.get('LEON_TTS', 'true') == 'true' IS_TTS_ENABLED = os.environ.get('LEON_TTS', 'true') == 'true'
# ASR # ASR
ASR_LIB_PATH = os.path.join(LIB_PATH, 'asr') ASR_MODEL_PATH = os.path.join(AUDIO_MODELS_PATH, 'asr')
ASR_MODEL_FOLDER_PATH = os.path.join(ASR_LIB_PATH, 'models')
ASR_MODEL_PATH_FOR_GPU = os.path.join(ASR_MODEL_FOLDER_PATH, 'gpu')
ASR_MODEL_PATH_FOR_CPU = os.path.join(ASR_MODEL_FOLDER_PATH, 'cpu')
IS_ASR_ENABLED = os.environ.get('LEON_STT', 'true') == 'true' IS_ASR_ENABLED = os.environ.get('LEON_STT', 'true') == 'true'

View File

@ -13,12 +13,14 @@ from .asr.api import ASR
from .tts.api import TTS from .tts.api import TTS
from .constants import ( from .constants import (
TTS_MODEL_CONFIG_PATH, TTS_MODEL_CONFIG_PATH,
TTS_MODEL_PATH, TTS_MODEL_FOLDER_PATH,
IS_TTS_ENABLED, IS_TTS_ENABLED,
TMP_PATH, TMP_PATH,
IS_ASR_ENABLED IS_ASR_ENABLED
) )
TTS_MODEL_PATH = os.path.join(TTS_MODEL_FOLDER_PATH, get_settings('tts')['model_file_name'])
class TCPServer: class TCPServer:
def __init__(self, host: str, port: Union[str, int]): def __init__(self, host: str, port: Union[str, int]):

View File

@ -1,288 +0,0 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 52,
"epochs": 10000,
"learning_rate": 0.0003,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 8,
"fp16_run": false,
"lr_decay": 0.999875,
"segment_size": 16384,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0,
"skip_optimizer": true
},
"data": {
"training_files": "/output/V1/model/train.list",
"validation_files": "/output/V1/model/val.list",
"max_wav_value": 32768.0,
"sampling_rate": 44100,
"filter_length": 2048,
"hop_length": 512,
"win_length": 2048,
"n_mel_channels": 128,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 1,
"cleaned_text": true,
"spk2id": {
"EN-Leon-V1_1": 0
}
},
"model": {
"use_spk_conditioned_encoder": true,
"use_noise_scaled_mas": true,
"use_mel_posterior_encoder": false,
"use_duration_discriminator": true,
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"n_layers_trans_flow": 3,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3, 7, 11],
"resblock_dilation_sizes": [
[1, 3, 5],
[1, 3, 5],
[1, 3, 5]
],
"upsample_rates": [8, 8, 2, 2, 2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16, 16, 8, 2, 2],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"num_languages": 8,
"num_tones": 16,
"symbols": [
"_",
"\"",
"(",
")",
"*",
"/",
":",
"AA",
"E",
"EE",
"En",
"N",
"OO",
"Q",
"V",
"[",
"\\",
"]",
"^",
"a",
"a:",
"aa",
"ae",
"ah",
"ai",
"an",
"ang",
"ao",
"aw",
"ay",
"b",
"by",
"c",
"ch",
"d",
"dh",
"dy",
"e",
"e:",
"eh",
"ei",
"en",
"eng",
"er",
"ey",
"f",
"g",
"gy",
"h",
"hh",
"hy",
"i",
"i0",
"i:",
"ia",
"ian",
"iang",
"iao",
"ie",
"ih",
"in",
"ing",
"iong",
"ir",
"iu",
"iy",
"j",
"jh",
"k",
"ky",
"l",
"m",
"my",
"n",
"ng",
"ny",
"o",
"o:",
"ong",
"ou",
"ow",
"oy",
"p",
"py",
"q",
"r",
"ry",
"s",
"sh",
"t",
"th",
"ts",
"ty",
"u",
"u:",
"ua",
"uai",
"uan",
"uang",
"uh",
"ui",
"un",
"uo",
"uw",
"v",
"van",
"ve",
"vn",
"w",
"x",
"y",
"z",
"zh",
"zy",
"~",
"æ",
"ç",
"ð",
"ø",
"ŋ",
"œ",
"ɐ",
"ɑ",
"ɒ",
"ɔ",
"ɕ",
"ə",
"ɛ",
"ɜ",
"ɡ",
"ɣ",
"ɥ",
"ɦ",
"ɪ",
"ɫ",
"ɬ",
"ɭ",
"ɯ",
"ɲ",
"ɵ",
"ɸ",
"ɹ",
"ɾ",
"ʁ",
"ʃ",
"ʊ",
"ʌ",
"ʎ",
"ʏ",
"ʑ",
"ʒ",
"ʝ",
"ʲ",
"ˈ",
"ˌ",
"ː",
"̃",
"̩",
"β",
"θ",
"ᄀ",
"ᄁ",
"ᄂ",
"ᄃ",
"ᄄ",
"ᄅ",
"ᄆ",
"ᄇ",
"ᄈ",
"ᄉ",
"ᄊ",
"ᄋ",
"ᄌ",
"ᄍ",
"ᄎ",
"ᄏ",
"ᄐ",
"ᄑ",
"ᄒ",
"ᅡ",
"ᅢ",
"ᅣ",
"ᅤ",
"ᅥ",
"ᅦ",
"ᅧ",
"ᅨ",
"ᅩ",
"ᅪ",
"ᅫ",
"ᅬ",
"ᅭ",
"ᅮ",
"ᅯ",
"ᅰ",
"ᅱ",
"ᅲ",
"ᅳ",
"ᅴ",
"ᅵ",
"ᆨ",
"ᆫ",
"ᆮ",
"ᆯ",
"ᆷ",
"ᆸ",
"ᆼ",
"ㄸ",
"!",
"?",
"…",
",",
".",
"'",
"-",
"¿",
"¡",
"SP",
"UNK"
]
}