mirror of
https://github.com/leon-ai/leon.git
synced 2024-11-28 12:43:35 +03:00
refactor(server): TTS Flite synthesizer + synthesizer parent class
This commit is contained in:
parent
157e28a063
commit
1146322196
@ -2,16 +2,14 @@ import type { Stream } from 'node:stream'
|
||||
import path from 'node:path'
|
||||
import fs from 'node:fs'
|
||||
|
||||
import Ffmpeg from 'fluent-ffmpeg'
|
||||
import { Polly, SynthesizeSpeechCommand } from '@aws-sdk/client-polly'
|
||||
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
|
||||
import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
|
||||
|
||||
import type { LongLanguageCode } from '@/types'
|
||||
import type { TTSSynthesizerFacade, SynthesizeResult } from '@/core/tts/types'
|
||||
import type { AmazonVoiceConfiguration } from '@/schemas/voice-config-schemas'
|
||||
import { LANG, VOICE_CONFIG_PATH, TMP_PATH } from '@/constants'
|
||||
import { TTS } from '@/core'
|
||||
import { TTSSynthesizerBase } from '@/core/tts/tts-synthesizer-base'
|
||||
import { LogHelper } from '@/helpers/log-helper'
|
||||
import { StringHelper } from '@/helpers/string-helper'
|
||||
|
||||
@ -24,12 +22,14 @@ const VOICES = {
|
||||
}
|
||||
}
|
||||
|
||||
export class AmazonPollyTTSSynthesizer implements TTSSynthesizerFacade {
|
||||
export class AmazonPollyTTSSynthesizer extends TTSSynthesizerBase implements TTSSynthesizerFacade {
|
||||
private readonly name = 'Amazon Polly TTS Synthesizer'
|
||||
private readonly client: Polly | undefined = undefined
|
||||
private readonly lang: LongLanguageCode = LANG as LongLanguageCode
|
||||
|
||||
constructor(lang: LongLanguageCode) {
|
||||
super()
|
||||
|
||||
LogHelper.title(this.name)
|
||||
LogHelper.success('New instance')
|
||||
|
||||
@ -79,13 +79,7 @@ export class AmazonPollyTTSSynthesizer implements TTSSynthesizerFacade {
|
||||
wStream.on('error', reject)
|
||||
})
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const ffmpeg = new (Ffmpeg as any)()
|
||||
ffmpeg.setFfmpegPath(ffmpegPath)
|
||||
ffmpeg.setFfprobePath(ffprobePath)
|
||||
|
||||
const data = await ffmpeg.input(audioFilePath).ffprobe()
|
||||
const duration = data.streams[0].duration * 1_000
|
||||
const duration = await this.getAudioDuration(audioFilePath)
|
||||
|
||||
TTS.em.emit('saved', duration)
|
||||
|
||||
|
83
server/src/core/tts/synthesizers/flite-synthesizer.ts
Normal file
83
server/src/core/tts/synthesizers/flite-synthesizer.ts
Normal file
@ -0,0 +1,83 @@
|
||||
import path from 'node:path'
|
||||
import fs from 'node:fs'
|
||||
import { spawn } from 'node:child_process'
|
||||
|
||||
import type { LongLanguageCode } from '@/types'
|
||||
import type { TTSSynthesizerFacade, SynthesizeResult } from '@/core/tts/types'
|
||||
import { LANG, TMP_PATH, BIN_PATH } from '@/constants'
|
||||
import { TTS } from '@/core'
|
||||
import { TTSSynthesizerBase } from '@/core/tts/tts-synthesizer-base'
|
||||
import { LogHelper } from '@/helpers/log-helper'
|
||||
import { StringHelper } from '@/helpers/string-helper'
|
||||
|
||||
const FLITE_CONFIG = {
|
||||
int_f0_target_mean: 115.0, // Intonation (85-180 Hz men; 165-255 Hz women)
|
||||
f0_shift: 1.0, // Low or high
|
||||
duration_stretch: 1.0, // Speed (lower = faster)
|
||||
int_f0_target_stddev: 15.0 // Pitch variability (lower = more flat)
|
||||
}
|
||||
|
||||
export class FliteTTSSynthesizer extends TTSSynthesizerBase implements TTSSynthesizerFacade {
|
||||
private readonly name = 'Flite TTS Synthesizer'
|
||||
private readonly binPath = path.join(BIN_PATH, 'flite', 'flite')
|
||||
private readonly lang: LongLanguageCode = LANG as LongLanguageCode
|
||||
|
||||
constructor(lang: LongLanguageCode) {
|
||||
super()
|
||||
|
||||
LogHelper.title(this.name)
|
||||
LogHelper.success('New instance')
|
||||
|
||||
this.lang = lang
|
||||
|
||||
if (this.lang !== 'en-US') {
|
||||
LogHelper.warning(
|
||||
'The Flite synthesizer only accepts the "en-US" language at the moment'
|
||||
)
|
||||
}
|
||||
|
||||
if (!fs.existsSync(this.binPath)) {
|
||||
LogHelper.error(
|
||||
`Cannot find ${this.binPath} You can set up the offline TTS by running: "npm run setup:offline-tts"`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
public async synthesize(speech: string): Promise<SynthesizeResult | null> {
|
||||
const audioFilePath = path.join(
|
||||
TMP_PATH,
|
||||
`${Date.now()}-${StringHelper.random(4)}.wav`
|
||||
)
|
||||
const process = spawn(this.binPath, [
|
||||
speech,
|
||||
'--setf',
|
||||
`int_f0_target_mean=${FLITE_CONFIG.int_f0_target_mean}`,
|
||||
'--setf',
|
||||
`f0_shift=${FLITE_CONFIG.f0_shift}`,
|
||||
'--setf',
|
||||
`duration_stretch=${FLITE_CONFIG.duration_stretch}`,
|
||||
'--setf',
|
||||
`int_f0_target_stddev=${FLITE_CONFIG.int_f0_target_stddev}`,
|
||||
'-o',
|
||||
audioFilePath
|
||||
])
|
||||
|
||||
// Handle error
|
||||
process.stderr.on('data', (data) => {
|
||||
LogHelper.error(data.toString())
|
||||
})
|
||||
|
||||
process.stdout.on('end', async () => {
|
||||
const duration = await this.getAudioDuration(audioFilePath)
|
||||
|
||||
TTS.em.emit('saved', duration)
|
||||
|
||||
return {
|
||||
audioFilePath,
|
||||
duration
|
||||
}
|
||||
})
|
||||
|
||||
return null
|
||||
}
|
||||
}
|
17
server/src/core/tts/tts-synthesizer-base.ts
Normal file
17
server/src/core/tts/tts-synthesizer-base.ts
Normal file
@ -0,0 +1,17 @@
|
||||
import Ffmpeg from 'fluent-ffmpeg'
|
||||
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
|
||||
import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
|
||||
|
||||
export class TTSSynthesizerBase {
|
||||
protected async getAudioDuration(audioFilePath: string): Promise<number> {
|
||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||
// @ts-ignore
|
||||
const ffmpeg = new Ffmpeg()
|
||||
ffmpeg.setFfmpegPath(ffmpegPath)
|
||||
ffmpeg.setFfprobePath(ffprobePath)
|
||||
|
||||
const data = await ffmpeg.input(audioFilePath).ffprobe()
|
||||
|
||||
return data.streams[0].duration * 1_000
|
||||
}
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
import type { AmazonPollyTTSSynthesizer } from '@/core/tts/synthesizers/amazon-polly-synthesizer'
|
||||
import type { FliteTTSSynthesizer } from '@/core/tts/synthesizers/flite-synthesizer'
|
||||
|
||||
export enum TTSProviders {
|
||||
AmazonPolly = 'amazon-polly',
|
||||
@ -21,7 +22,7 @@ export type SynthesizeResult = {
|
||||
|
||||
// TODO
|
||||
// export type TTSSynthesizer = AmazonPollyTTSSynthesizer | FliteTTSSynthesizer | GoogleCloudTTSSynthesizer | WatsonTTSSynthesizer | undefined
|
||||
export type TTSSynthesizer = AmazonPollyTTSSynthesizer | undefined
|
||||
export type TTSSynthesizer = AmazonPollyTTSSynthesizer | FliteTTSSynthesizer | undefined
|
||||
|
||||
export interface TTSSynthesizerFacade {
|
||||
synthesize(speech: string): Promise<SynthesizeResult | null>
|
||||
|
Loading…
Reference in New Issue
Block a user