refactor(server): TTS Flite synthesizer + synthesizer parent class

2024-11-28 12:43:35 +03:00 · 2023-02-07 21:26:14 +08:00 · 2023-02-07 21:26:14 +08:00 · 1146322196
commit 1146322196
parent 157e28a063
4 changed files with 107 additions and 12 deletions
--- a/server/src/core/tts/synthesizers/amazon-polly-synthesizer.ts
+++ b/server/src/core/tts/synthesizers/amazon-polly-synthesizer.ts
@ -2,16 +2,14 @@ import type { Stream } from 'node:stream'
 import path from 'node:path'
 import fs from 'node:fs'

-import Ffmpeg from 'fluent-ffmpeg'
 import { Polly, SynthesizeSpeechCommand } from '@aws-sdk/client-polly'
-import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
-import { path as ffprobePath } from '@ffprobe-installer/ffprobe'

 import type { LongLanguageCode } from '@/types'
 import type { TTSSynthesizerFacade, SynthesizeResult } from '@/core/tts/types'
 import type { AmazonVoiceConfiguration } from '@/schemas/voice-config-schemas'
 import { LANG, VOICE_CONFIG_PATH, TMP_PATH } from '@/constants'
 import { TTS } from '@/core'
+import { TTSSynthesizerBase } from '@/core/tts/tts-synthesizer-base'
 import { LogHelper } from '@/helpers/log-helper'
 import { StringHelper } from '@/helpers/string-helper'

@ -24,12 +22,14 @@ const VOICES = {
  }
 }

-export class AmazonPollyTTSSynthesizer implements TTSSynthesizerFacade {
+export class AmazonPollyTTSSynthesizer extends TTSSynthesizerBase implements TTSSynthesizerFacade {
  private readonly name = 'Amazon Polly TTS Synthesizer'
  private readonly client: Polly | undefined = undefined
  private readonly lang: LongLanguageCode = LANG as LongLanguageCode

  constructor(lang: LongLanguageCode) {
+    super()
+
    LogHelper.title(this.name)
    LogHelper.success('New instance')

@ -79,13 +79,7 @@ export class AmazonPollyTTSSynthesizer implements TTSSynthesizerFacade {
          wStream.on('error', reject)
        })

-        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-        const ffmpeg = new (Ffmpeg as any)()
-        ffmpeg.setFfmpegPath(ffmpegPath)
-        ffmpeg.setFfprobePath(ffprobePath)
-
-        const data = await ffmpeg.input(audioFilePath).ffprobe()
-        const duration = data.streams[0].duration * 1_000
+        const duration = await this.getAudioDuration(audioFilePath)

        TTS.em.emit('saved', duration)

--- a/server/src/core/tts/synthesizers/flite-synthesizer.ts
+++ b/server/src/core/tts/synthesizers/flite-synthesizer.ts
@ -0,0 +1,83 @@
+import path from 'node:path'
+import fs from 'node:fs'
+import { spawn } from 'node:child_process'
+
+import type { LongLanguageCode } from '@/types'
+import type { TTSSynthesizerFacade, SynthesizeResult } from '@/core/tts/types'
+import { LANG, TMP_PATH, BIN_PATH } from '@/constants'
+import { TTS } from '@/core'
+import { TTSSynthesizerBase } from '@/core/tts/tts-synthesizer-base'
+import { LogHelper } from '@/helpers/log-helper'
+import { StringHelper } from '@/helpers/string-helper'
+
+const FLITE_CONFIG = {
+  int_f0_target_mean: 115.0, // Intonation (85-180 Hz men; 165-255 Hz women)
+  f0_shift: 1.0, // Low or high
+  duration_stretch: 1.0, // Speed (lower = faster)
+  int_f0_target_stddev: 15.0 // Pitch variability (lower = more flat)
+}
+
+export class FliteTTSSynthesizer extends TTSSynthesizerBase implements TTSSynthesizerFacade {
+  private readonly name = 'Flite TTS Synthesizer'
+  private readonly binPath = path.join(BIN_PATH, 'flite', 'flite')
+  private readonly lang: LongLanguageCode = LANG as LongLanguageCode
+
+  constructor(lang: LongLanguageCode) {
+    super()
+
+    LogHelper.title(this.name)
+    LogHelper.success('New instance')
+
+    this.lang = lang
+
+    if (this.lang !== 'en-US') {
+      LogHelper.warning(
+        'The Flite synthesizer only accepts the "en-US" language at the moment'
+      )
+    }
+
+    if (!fs.existsSync(this.binPath)) {
+      LogHelper.error(
+        `Cannot find ${this.binPath} You can set up the offline TTS by running: "npm run setup:offline-tts"`
+      )
+    }
+  }
+
+  public async synthesize(speech: string): Promise<SynthesizeResult | null> {
+    const audioFilePath = path.join(
+      TMP_PATH,
+      `${Date.now()}-${StringHelper.random(4)}.wav`
+    )
+    const process = spawn(this.binPath, [
+      speech,
+      '--setf',
+      `int_f0_target_mean=${FLITE_CONFIG.int_f0_target_mean}`,
+      '--setf',
+      `f0_shift=${FLITE_CONFIG.f0_shift}`,
+      '--setf',
+      `duration_stretch=${FLITE_CONFIG.duration_stretch}`,
+      '--setf',
+      `int_f0_target_stddev=${FLITE_CONFIG.int_f0_target_stddev}`,
+      '-o',
+      audioFilePath
+    ])
+
+    // Handle error
+    process.stderr.on('data', (data) => {
+      LogHelper.error(data.toString())
+    })
+
+    process.stdout.on('end', async () => {
+      const duration = await this.getAudioDuration(audioFilePath)
+
+      TTS.em.emit('saved', duration)
+
+      return {
+        audioFilePath,
+        duration
+      }
+    })
+
+    return null
+  }
+}
--- a/server/src/core/tts/tts-synthesizer-base.ts
+++ b/server/src/core/tts/tts-synthesizer-base.ts
@ -0,0 +1,17 @@
+import Ffmpeg from 'fluent-ffmpeg'
+import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg'
+import { path as ffprobePath } from '@ffprobe-installer/ffprobe'
+
+export class TTSSynthesizerBase {
+  protected async getAudioDuration(audioFilePath: string): Promise<number> {
+    // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+    // @ts-ignore
+    const ffmpeg = new Ffmpeg()
+    ffmpeg.setFfmpegPath(ffmpegPath)
+    ffmpeg.setFfprobePath(ffprobePath)
+
+    const data = await ffmpeg.input(audioFilePath).ffprobe()
+
+    return data.streams[0].duration * 1_000
+  }
+}
--- a/server/src/core/tts/types.ts
+++ b/server/src/core/tts/types.ts
@ -1,4 +1,5 @@
 import type { AmazonPollyTTSSynthesizer } from '@/core/tts/synthesizers/amazon-polly-synthesizer'
+import type { FliteTTSSynthesizer } from '@/core/tts/synthesizers/flite-synthesizer'

 export enum TTSProviders {
  AmazonPolly = 'amazon-polly',
@ -21,7 +22,7 @@ export type SynthesizeResult = {

 // TODO
 // export type TTSSynthesizer = AmazonPollyTTSSynthesizer | FliteTTSSynthesizer | GoogleCloudTTSSynthesizer | WatsonTTSSynthesizer | undefined
-export type TTSSynthesizer = AmazonPollyTTSSynthesizer | undefined
+export type TTSSynthesizer = AmazonPollyTTSSynthesizer | FliteTTSSynthesizer | undefined

 export interface TTSSynthesizerFacade {
  synthesize(speech: string): Promise<SynthesizeResult | null>