Add vad_filter to ctranslate transcriber

Helps us to correctly detect the language if there is no voice in the
first 30 seconds

Also helps to lower hallucinations
This commit is contained in:
Chocobozzz 2024-07-03 15:07:01 +02:00
parent fb5236f2af
commit c289c86741
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
2 changed files with 7 additions and 1 deletions

View File

@ -28,7 +28,11 @@ export function getCustomModelPath (modelName: CustomModelName) {
// ---------------------------------------------------------------------------
export async function checkAutoCaption (servers: PeerTubeServer[], uuid: string, captionContains = 'WEBVTT\n\n00:00.000 --> 00:') {
export async function checkAutoCaption (
servers: PeerTubeServer[],
uuid: string,
captionContains = new RegExp('^WEBVTT\\n\\n00:00.\\d{3} --> 00:')
) {
for (const server of servers) {
const body = await server.captions.list({ videoId: uuid })
expect(body.total).to.equal(1)

View File

@ -35,6 +35,8 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber {
...modelArgs,
'--word_timestamps',
'True',
'--vad_filter',
'true',
'--output_format',
'all',
'--output_dir',