1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-11-28 12:43:35 +03:00

refactor(server): TTS Flite synthesizer + synthesizer parent class

This commit is contained in:
louistiti 2023-02-07 21:26:14 +08:00
parent 157e28a063
commit 1146322196
4 changed files with 107 additions and 12 deletions

View File

@ -2,16 +2,14 @@ import type { Stream } from 'node:stream'
import path from 'node:path'
import fs from 'node:fs'
import Ffmpeg from 'fluent-ffmpeg'
import { Polly, SynthesizeSpeechCommand } from '@aws-sdk/client-polly'
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
import type { LongLanguageCode } from '@/types'
import type { TTSSynthesizerFacade, SynthesizeResult } from '@/core/tts/types'
import type { AmazonVoiceConfiguration } from '@/schemas/voice-config-schemas'
import { LANG, VOICE_CONFIG_PATH, TMP_PATH } from '@/constants'
import { TTS } from '@/core'
import { TTSSynthesizerBase } from '@/core/tts/tts-synthesizer-base'
import { LogHelper } from '@/helpers/log-helper'
import { StringHelper } from '@/helpers/string-helper'
@ -24,12 +22,14 @@ const VOICES = {
}
}
export class AmazonPollyTTSSynthesizer implements TTSSynthesizerFacade {
export class AmazonPollyTTSSynthesizer extends TTSSynthesizerBase implements TTSSynthesizerFacade {
private readonly name = 'Amazon Polly TTS Synthesizer'
private readonly client: Polly | undefined = undefined
private readonly lang: LongLanguageCode = LANG as LongLanguageCode
constructor(lang: LongLanguageCode) {
super()
LogHelper.title(this.name)
LogHelper.success('New instance')
@ -79,13 +79,7 @@ export class AmazonPollyTTSSynthesizer implements TTSSynthesizerFacade {
wStream.on('error', reject)
})
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const ffmpeg = new (Ffmpeg as any)()
ffmpeg.setFfmpegPath(ffmpegPath)
ffmpeg.setFfprobePath(ffprobePath)
const data = await ffmpeg.input(audioFilePath).ffprobe()
const duration = data.streams[0].duration * 1_000
const duration = await this.getAudioDuration(audioFilePath)
TTS.em.emit('saved', duration)

View File

@ -0,0 +1,83 @@
import path from 'node:path'
import fs from 'node:fs'
import { spawn } from 'node:child_process'
import type { LongLanguageCode } from '@/types'
import type { TTSSynthesizerFacade, SynthesizeResult } from '@/core/tts/types'
import { LANG, TMP_PATH, BIN_PATH } from '@/constants'
import { TTS } from '@/core'
import { TTSSynthesizerBase } from '@/core/tts/tts-synthesizer-base'
import { LogHelper } from '@/helpers/log-helper'
import { StringHelper } from '@/helpers/string-helper'
const FLITE_CONFIG = {
int_f0_target_mean: 115.0, // Intonation (85-180 Hz men; 165-255 Hz women)
f0_shift: 1.0, // Low or high
duration_stretch: 1.0, // Speed (lower = faster)
int_f0_target_stddev: 15.0 // Pitch variability (lower = more flat)
}
export class FliteTTSSynthesizer extends TTSSynthesizerBase implements TTSSynthesizerFacade {
private readonly name = 'Flite TTS Synthesizer'
private readonly binPath = path.join(BIN_PATH, 'flite', 'flite')
private readonly lang: LongLanguageCode = LANG as LongLanguageCode
constructor(lang: LongLanguageCode) {
super()
LogHelper.title(this.name)
LogHelper.success('New instance')
this.lang = lang
if (this.lang !== 'en-US') {
LogHelper.warning(
'The Flite synthesizer only accepts the "en-US" language at the moment'
)
}
if (!fs.existsSync(this.binPath)) {
LogHelper.error(
`Cannot find ${this.binPath} You can set up the offline TTS by running: "npm run setup:offline-tts"`
)
}
}
public async synthesize(speech: string): Promise<SynthesizeResult | null> {
const audioFilePath = path.join(
TMP_PATH,
`${Date.now()}-${StringHelper.random(4)}.wav`
)
const process = spawn(this.binPath, [
speech,
'--setf',
`int_f0_target_mean=${FLITE_CONFIG.int_f0_target_mean}`,
'--setf',
`f0_shift=${FLITE_CONFIG.f0_shift}`,
'--setf',
`duration_stretch=${FLITE_CONFIG.duration_stretch}`,
'--setf',
`int_f0_target_stddev=${FLITE_CONFIG.int_f0_target_stddev}`,
'-o',
audioFilePath
])
// Handle error
process.stderr.on('data', (data) => {
LogHelper.error(data.toString())
})
process.stdout.on('end', async () => {
const duration = await this.getAudioDuration(audioFilePath)
TTS.em.emit('saved', duration)
return {
audioFilePath,
duration
}
})
return null
}
}

View File

@ -0,0 +1,17 @@
import Ffmpeg from 'fluent-ffmpeg'
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
export class TTSSynthesizerBase {
protected async getAudioDuration(audioFilePath: string): Promise<number> {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const ffmpeg = new Ffmpeg()
ffmpeg.setFfmpegPath(ffmpegPath)
ffmpeg.setFfprobePath(ffprobePath)
const data = await ffmpeg.input(audioFilePath).ffprobe()
return data.streams[0].duration * 1_000
}
}

View File

@ -1,4 +1,5 @@
import type { AmazonPollyTTSSynthesizer } from '@/core/tts/synthesizers/amazon-polly-synthesizer'
import type { FliteTTSSynthesizer } from '@/core/tts/synthesizers/flite-synthesizer'
export enum TTSProviders {
AmazonPolly = 'amazon-polly',
@ -21,7 +22,7 @@ export type SynthesizeResult = {
// TODO
// export type TTSSynthesizer = AmazonPollyTTSSynthesizer | FliteTTSSynthesizer | GoogleCloudTTSSynthesizer | WatsonTTSSynthesizer | undefined
export type TTSSynthesizer = AmazonPollyTTSSynthesizer | undefined
export type TTSSynthesizer = AmazonPollyTTSSynthesizer | FliteTTSSynthesizer | undefined
export interface TTSSynthesizerFacade {
synthesize(speech: string): Promise<SynthesizeResult | null>