feat(example): Quivr whisper (#3495)

# Description Talk with quivr handsfree via tts and stt. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate): ![image](https://github.com/user-attachments/assets/1c169e80-45ce-4541-b244-5f3b85b866f2)
2025-01-03 08:45:26 +03:00 · 2024-11-26 18:20:14 +05:30 · 2024-11-26 18:20:14 +05:30 · d20f58c147
commit d20f58c147
parent e68b4f4569
12 changed files with 626 additions and 231 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -44,5 +44,6 @@
    "reportUnusedImport": "warning",
    "reportGeneralTypeIssues": "warning"
  },
-  "makefile.configureOnOpen": false
+  "makefile.configureOnOpen": false,
  "djlint.showInstallError": false
 }
--- a/examples/quivr-whisper/.gitignore
+++ b/examples/quivr-whisper/.gitignore
@ -1 +1,2 @@
 .env
 uploads
--- a/examples/quivr-whisper/app.py
+++ b/examples/quivr-whisper/app.py
@ -1,27 +1,32 @@
-from flask import Flask, render_template, request, jsonify
+from flask import Flask, render_template, request, jsonify, session
 import openai
 import base64
 import os
 import requests
 from dotenv import load_dotenv
 from quivr_core import Brain
 from quivr_core.rag.entities.config import RetrievalConfig
 from tempfile import NamedTemporaryFile
 from werkzeug.utils import secure_filename
 from asyncio import to_thread
 import asyncio
 UPLOAD_FOLDER = "uploads"
 ALLOWED_EXTENSIONS = {"txt"}
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 app = Flask(__name__)
 app.secret_key = "secret"
 app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
 app.config["CACHE_TYPE"] = "SimpleCache"  # In-memory cache for development
 app.config["CACHE_DEFAULT_TIMEOUT"] = 60 * 60  # 1 hour cache timeout
 load_dotenv()
 openai.api_key = os.getenv("OPENAI_API_KEY")
-quivr_token = os.getenv("QUIVR_API_KEY", "")
+brains = {}
 quivr_chat_id = os.getenv("QUIVR_CHAT_ID", "")
 quivr_brain_id = os.getenv("QUIVR_BRAIN_ID", "")
 quivr_url = (
    os.getenv("QUIVR_URL", "https://api.quivr.app")
    + f"/chat/{quivr_chat_id}/question?brain_id={quivr_brain_id}"
 )
 headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {quivr_token}",
 }
@app.route("/")
@ -29,12 +34,83 @@ def index():
    return render_template("index.html")
-@app.route("/transcribe", methods=["POST"])
+def run_in_event_loop(func, *args, **kwargs):
-def transcribe_audio():
+    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    if asyncio.iscoroutinefunction(func):
        result = loop.run_until_complete(func(*args, **kwargs))
    else:
        result = func(*args, **kwargs)
    loop.close()
    return result
 def allowed_file(filename):
    return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route("/upload", methods=["POST"])
 async def upload_file():
    if "file" not in request.files:
        return "No file part", 400
    file = request.files["file"]
    if file.filename == "":
        return "No selected file", 400
    if not (file and file.filename and allowed_file(file.filename)):
        return "Invalid file type", 400
    filename = secure_filename(file.filename)
    filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
    file.save(filepath)
    print(f"File uploaded and saved at: {filepath}")
    print("Creating brain instance...")
    brain: Brain = await to_thread(
        run_in_event_loop, Brain.from_files, name="user_brain", file_paths=[filepath]
    )
    # Store brain instance in cache
    session_id = session.sid if hasattr(session, "sid") else os.urandom(16).hex()
    session["session_id"] = session_id
    # cache.set(session_id, brain)  # Store the brain instance in the cache
    brains[session_id] = brain
    print(f"Brain instance created and stored in cache for session ID: {session_id}")
    return jsonify({"message": "Brain created successfully"})
@app.route("/ask", methods=["POST"])
 async def ask():
    if "audio_data" not in request.files:
        return "Missing audio data", 400
    # Retrieve the brain instance from the cache using the session ID
    session_id = session.get("session_id")
    if not session_id:
        return "Session ID not found. Upload a file first.", 400
    brain = brains.get(session_id)
    if not brain:
        return "Brain instance not found in dict. Upload a file first.", 400
    print("Brain instance loaded from cache.")
    print("Speech to text...")
    audio_file = request.files["audio_data"]
    transcript = transcribe_audio_file(audio_file)
-    quivr_response = ask_quivr_question(transcript)
+    print("Transcript result: ", transcript)
-    audio_base64 = synthesize_speech(quivr_response)
+
    print("Getting response...")
    quivr_response = await to_thread(run_in_event_loop, brain.ask, transcript)
    print("Text to speech...")
    audio_base64 = synthesize_speech(quivr_response.answer)
    print("Done")
    return jsonify({"audio_base64": audio_base64})
@ -55,16 +131,6 @@ def transcribe_audio_file(audio_file):
    return transcript
 def ask_quivr_question(transcript):
    response = requests.post(quivr_url, headers=headers, json={"question": transcript})
    if response.status_code == 200:
        quivr_response = response.json().get("assistant")
        return quivr_response
    else:
        print(f"Error from Quivr API: {response.status_code}, {response.text}")
        return "Sorry, I couldn't understand that."
 def synthesize_speech(text):
    speech_response = openai.audio.speech.create(
        model="tts-1", voice="nova", input=text
--- a/examples/quivr-whisper/pyproject.toml
+++ b/examples/quivr-whisper/pyproject.toml
@ -6,9 +6,10 @@ authors = [
    { name = "Stan Girard", email = "stan@quivr.app" }
 ]
 dependencies = [
-    "flask>=3.1.0",
+    "flask[async]>=3.1.0",
    "openai>=1.54.5",
    "quivr-core>=0.0.24",
    "flask-caching>=2.3.0",
 ]
 readme = "README.md"
 requires-python = ">= 3.11"
--- a/examples/quivr-whisper/requirements-dev.lock
+++ b/examples/quivr-whisper/requirements-dev.lock
@ -32,6 +32,8 @@ anyio==4.6.2.post1
    # via httpx
    # via openai
    # via starlette
 asgiref==3.8.1
    # via flask
 attrs==24.2.0
    # via aiohttp
 backoff==2.2.1
@ -42,6 +44,8 @@ beautifulsoup4==4.12.3
    # via unstructured
 blinker==1.9.0
    # via flask
 cachelib==0.9.0
    # via flask-caching
 cachetools==5.5.0
    # via google-auth
 certifi==2024.8.30
@ -112,6 +116,9 @@ filetype==1.2.0
    # via llama-index-core
    # via unstructured
 flask==3.1.0
    # via flask-caching
    # via quivr-whisper
 flask-caching==2.3.0
    # via quivr-whisper
 flatbuffers==24.3.25
    # via onnxruntime
--- a/examples/quivr-whisper/requirements.lock
+++ b/examples/quivr-whisper/requirements.lock
@ -32,6 +32,8 @@ anyio==4.6.2.post1
    # via httpx
    # via openai
    # via starlette
 asgiref==3.8.1
    # via flask
 attrs==24.2.0
    # via aiohttp
 backoff==2.2.1
@ -42,6 +44,8 @@ beautifulsoup4==4.12.3
    # via unstructured
 blinker==1.9.0
    # via flask
 cachelib==0.9.0
    # via flask-caching
 cachetools==5.5.0
    # via google-auth
 certifi==2024.8.30
@ -112,6 +116,9 @@ filetype==1.2.0
    # via llama-index-core
    # via unstructured
 flask==3.1.0
    # via flask-caching
    # via quivr-whisper
 flask-caching==2.3.0
    # via quivr-whisper
 flatbuffers==24.3.25
    # via onnxruntime
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@ -1,209 +1,359 @@
-const recordBtn = document.getElementById('record-btn');
+// DOM Elements
-const audioVisualizer = document.getElementById('audio-visualizer');
+const recordBtn = document.getElementById("record-btn");
-const audioPlayback = document.getElementById('audio-playback');
+const fileInput = document.getElementById("fileInput");
-const canvasCtx = audioVisualizer.getContext('2d');
+const fileInputContainer = document.querySelector(".custom-file-input");
 const fileName = document.getElementById("fileName");
-let isRecording = false;
+const audioVisualizer = document.getElementById("audio-visualizer");
-let mediaRecorder;
+const audioPlayback = document.getElementById("audio-playback");
-let audioChunks = [];
+const canvasCtx = audioVisualizer.getContext("2d");
 let audioContext;
 let analyser;
 let dataArray;
 let bufferLength;
 let lastAudioLevel = 0;
 let silenceTimer;
-recordBtn.addEventListener('click', toggleRecording);
+window.addEventListener("load", () => {
  audioVisualizer.width = window.innerWidth;
  audioVisualizer.height = window.innerHeight;
 });
-function toggleRecording() {
+window.addEventListener("resize", (e) => {
-    if (!isRecording) {
+  audioVisualizer.width = window.innerWidth;
-        recordBtn.classList.add('hidden');
+  audioVisualizer.height = window.innerHeight;
-        audioVisualizer.classList.remove('hidden');
+});
-        startRecording();
+
-    } else {
+fileInput.addEventListener("change", () => {
-        audioVisualizer.classList.add('hidden');
+  fileName.textContent =
-        stopRecording();
+    fileInput.files.length > 0 ? fileInput.files[0].name : "No file chosen";
  fileName.classList.toggle("file-selected", fileInput.files.length > 0);
 });
 // Configuration
 const SILENCE_THRESHOLD = 128; // Adjusted for byte data (128 is middle)
 const SILENCE_DURATION = 1500;
 const FFT_SIZE = 2048;
 // State
 const state = {
  isRecording: false,
  isVisualizing: false,
  chunks: [],
  silenceTimer: null,
  lastAudioLevel: 0,
 };
 // Audio Analysis
 class AudioAnalyzer {
  constructor() {
    this.reset();
  }
  reset() {
    this.analyser = null;
    this.dataArray = null;
    this.bufferLength = null;
    this.source = null;
    this.cleanup();
  }
  setup(source, audioContext) {
    this.cleanup();
    this.analyser = this._createAnalyser(audioContext);
    source.connect(this.analyser);
    this._initializeBuffer();
    return this.analyser;
  }
  setupForPlayback(audioElement, audioContext, connectToDestination = true) {
    // Reuse existing MediaElementSourceNode if it already exists for this audio element
    if (!this.source || this.source.mediaElement !== audioElement) {
      this.cleanup(); // Ensure any previous connections are cleaned up
      this.source = audioContext.createMediaElementSource(audioElement);
    }
    this.analyser = this._createAnalyser(audioContext);
    this.source.connect(this.analyser);
    if (connectToDestination) {
      this.analyser.connect(audioContext.destination);
    }
    this._initializeBuffer();
    return this.analyser;
  }
  cleanup() {
    if (this.source) {
      this._safeDisconnect(this.source);
    }
    if (this.analyser) {
      this._safeDisconnect(this.analyser);
    }
  }
  _createAnalyser(audioContext) {
    const analyser = audioContext.createAnalyser();
    analyser.fftSize = FFT_SIZE;
    return analyser;
  }
  _initializeBuffer() {
    this.bufferLength = this.analyser.frequencyBinCount;
    this.dataArray = new Uint8Array(this.bufferLength);
  }
  _safeDisconnect(node) {
    if (node) {
      try {
        node.disconnect();
      } catch {
        // Ignore disconnect errors
      }
    }
  }
 }
-function drawWaveform() {
+// Visualization
-    if (!analyser) return;
+class Visualizer {
  constructor(canvas, analyzer) {
    this.canvas = canvas;
    this.ctx = canvas.getContext("2d");
    this.analyzer = analyzer;
  }
-    requestAnimationFrame(drawWaveform);
+  draw(currentAnalyser, onSilence) {
    if (!currentAnalyser || this.analyzer.dataArray === null) return;
-    analyser.getByteTimeDomainData(dataArray);
+    requestAnimationFrame(() => this.draw(currentAnalyser, onSilence));
-    canvasCtx.fillStyle = 'rgb(255, 255, 255)';
+    // Use getByteTimeDomainData instead of getFloatTimeDomainData
-    canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
+    currentAnalyser.getByteTimeDomainData(this.analyzer.dataArray);
-    canvasCtx.lineWidth = 2;
+    // Clear canvas
-    canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
+    this.ctx.fillStyle = "#252525";
    this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height);
    if (!state.isVisualizing) return;
-    canvasCtx.beginPath();
+    this.ctx.lineWidth = 2;
    this.ctx.strokeStyle = "#6142d4";
    this.ctx.beginPath();
-    let sliceWidth = audioVisualizer.width * 1.0 / bufferLength;
+    const sliceWidth = (this.canvas.width * 1) / this.analyzer.bufferLength;
    let x = 0;
    let sum = 0;
-    for (let i = 0; i < bufferLength; i++) {
+    // Draw waveform
-        let v = dataArray[i] / 128.0;
+    for (let i = 0; i < this.analyzer.bufferLength; i++) {
-        let y = v * audioVisualizer.height / 2;
+      // Scale byte data (0-255) to canvas height
      const v = this.analyzer.dataArray[i] / 128.0; // normalize to 0-2
      const y = (v - 1) * (this.canvas.height / 2) + this.canvas.height / 2;
-        sum += v;
+      sum += Math.abs(v - 1); // Calculate distance from center (128)
-        if (i === 0) {
+      if (i === 0) {
-            canvasCtx.moveTo(x, y);
+        this.ctx.moveTo(x, y);
      } else {
        this.ctx.lineTo(x, y);
      }
      x += sliceWidth;
    }
    this.ctx.lineTo(this.canvas.width, this.canvas.height / 2);
    this.ctx.stroke();
    // Check for silence during recording with adjusted thresholds for byte data
    if (state.isRecording) {
      const averageAmplitude = sum / this.analyzer.bufferLength;
      if (averageAmplitude < 0.1) {
        // Adjusted threshold for normalized data
        // Reset silence timer if we detect sound
        if (averageAmplitude > 0.05) {
          clearTimeout(state.silenceTimer);
          state.silenceTimer = null;
        } else {
-            canvasCtx.lineTo(x, y);
+          onSilence();
        }
-
+      }
        x += sliceWidth;
    }
-
+  }
    canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
    canvasCtx.stroke();
    let currentAudioLevel = sum / bufferLength;
    if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) {
        if (!silenceTimer) {
            silenceTimer = setTimeout(stopRecording, 1000);
        }
    } else {
        clearTimeout(silenceTimer);
        silenceTimer = null;
    }
    lastAudioLevel = currentAudioLevel;
 }
-async function startRecording() {
+// Recording Handler
-    audioChunks = [];
+class RecordingHandler {
-    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+  constructor() {
-    mediaRecorder = new MediaRecorder(stream);
+    this.mediaRecorder = null;
-    mediaRecorder.ondataavailable = event => {
+    this.audioAnalyzer = new AudioAnalyzer();
-        audioChunks.push(event.data);
+    this.visualizer = new Visualizer(audioVisualizer, this.audioAnalyzer);
-    };
+    this.audioContext = null;
-    mediaRecorder.start();
+  }
    isRecording = true;
-    audioContext = new (window.AudioContext || window.webkitAudioContext)();
+  async initialize() {
    analyser = audioContext.createAnalyser();
    const source = audioContext.createMediaStreamSource(stream);
    source.connect(analyser);
    analyser.fftSize = 2048;
    bufferLength = analyser.frequencyBinCount;
    dataArray = new Uint8Array(bufferLength);
    drawWaveform();
 }
 function stopRecording() {
    mediaRecorder.stop();
    mediaRecorder.onstop = async () => {
        // The mediaRecorder has stopped; now we can process the chunks
        const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
        const formData = new FormData();
        formData.append('audio_data', audioBlob);
        // Now we're sending the audio to the server and waiting for a response
        try {
            const response = await fetch('/transcribe', {
                method: 'POST',
                body: formData
            });
            const data = await response.json();
            // Once we have the response, we can source the playback element and play it
            audioPlayback.src = 'data:audio/wav;base64,' + data.audio_base64;
            audioPlayback.classList.remove('hidden');
            audioVisualizer.classList.add('hidden'); // hide the visualizer while playing back the response
            setupAIResponseVisualization();
            audioPlayback.onloadedmetadata = () => {
                // When metadata is loaded, start playback
                audioPlayback.play();
                visualizeAIResponse();
            };
            // We only reset the UI after the audio has finished playing
            // audioPlayback.onended = () => {
            //     resetUI();
            // };
        } catch (error) {
            console.error('Error during fetch/transcription:', error);
            resetUI();
        } finally {
            if (analyser) {
                analyser.disconnect();
                analyser = null;
            }
            isRecording = false;
        }
    };
 }
 function resetUI() {
    document.getElementById('record-btn').classList.remove('hidden');
    document.getElementById('audio-visualizer').classList.add('hidden');
    document.getElementById('audio-playback').classList.add('hidden');
    // Reset any other UI elements as necessary
 }
 function setupAIResponseVisualization() {
    try {
-        // Create a new audio context for playback if it doesn't exist
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-        if (!audioContext) {
+      this.mediaRecorder = new MediaRecorder(stream);
-            audioContext = new (window.AudioContext || window.webkitAudioContext)();
+      this.setupRecordingEvents();
-        }
+      if (!this.audioContext)
-        // Resume the audio context in case it's in a suspended state
+        this.audioContext = new (window.AudioContext ||
-        audioContext.resume().then(() => {
+          window.webkitAudioContext)();
-            analyser = audioContext.createAnalyser();
+    } catch (err) {
-            const source = audioContext.createMediaElementSource(audioPlayback);
+      console.error(`Media device error: ${err}`);
            source.connect(analyser);
            analyser.connect(audioContext.destination);
            analyser.fftSize = 2048;
            bufferLength = analyser.frequencyBinCount;
            dataArray = new Uint8Array(bufferLength);
        });
    } catch (error) {
        console.error('Error setting up AI response visualization:', error);
    }
-}
+  }
-function visualizeAIResponse() {
+  setupRecordingEvents() {
-    const draw = () => {
+    this.mediaRecorder.ondataavailable = (e) => {
-        requestAnimationFrame(draw);
+      state.chunks.push(e.data);
        analyser.getByteTimeDomainData(dataArray);
        canvasCtx.fillStyle = 'rgb(255, 255, 255)';
        canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
        canvasCtx.lineWidth = 2;
        canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
        canvasCtx.beginPath();
        let sliceWidth = audioVisualizer.width * 1.0 / bufferLength;
        let x = 0;
        for (let i = 0; i < bufferLength; i++) {
            let v = dataArray[i] / 128.0;
            let y = v * audioVisualizer.height / 2;
            if (i === 0) {
                canvasCtx.moveTo(x, y);
            } else {
                canvasCtx.lineTo(x, y);
            }
            x += sliceWidth;
        }
        canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
        canvasCtx.stroke();
    };
-    draw();
+    this.mediaRecorder.onstop = async () => {
-}
+      await this.handleRecordingStop();
    };
  }
  startRecording() {
    state.isVisualizing = true;
    state.chunks = [];
    state.isRecording = true;
    this.mediaRecorder.start();
    const source = this.audioContext.createMediaStreamSource(
      this.mediaRecorder.stream
    );
    const analyser = this.audioAnalyzer.setup(source, this.audioContext);
    audioVisualizer.classList.remove("hidden");
    this.visualizer.draw(analyser, () => {
      if (!state.silenceTimer) {
        state.silenceTimer = setTimeout(
          () => this.stopRecording(),
          SILENCE_DURATION
        );
      }
    });
    recordBtn.dataset.recording = true;
    recordBtn.classList.add("processing");
  }
  stopRecording() {
    if (state.isRecording) {
      state.isVisualizing = false;
      state.isRecording = false;
      this.mediaRecorder.stop();
      clearTimeout(state.silenceTimer);
      state.silenceTimer = null;
      recordBtn.dataset.recording = false;
    }
  }
  async handleRecordingStop() {
    console.log("Processing recording...");
    recordBtn.dataset.pending = true;
    recordBtn.disabled = true;
    const audioBlob = new Blob(state.chunks, { type: "audio/wav" });
    if (!fileInput.files.length) {
      recordBtn.dataset.pending = false;
      recordBtn.disabled = false;
      alert("Please select a file.");
      return;
    }
    const formData = new FormData();
    formData.append("audio_data", audioBlob);
    formData.append("file", fileInput.files[0]);
    try {
      await this.processRecording(formData);
    } catch (error) {
      console.error("Processing error:", error);
    } finally {
      this.audioAnalyzer.cleanup();
    }
  }
  async processRecording(formData) {
    const response = await fetch("/ask", {
      method: "POST",
      body: formData,
    });
    const data = await response.json();
    await this.handleResponse(data);
  }
  async handleResponse(data) {
    audioPlayback.src = "data:audio/wav;base64," + data.audio_base64;
    audioPlayback.onloadedmetadata = () => {
      const analyser = this.audioAnalyzer.setupForPlayback(
        audioPlayback,
        this.audioContext
      );
      audioVisualizer.classList.remove("hidden");
      this.visualizer.draw(analyser, () => {});
      audioPlayback.play();
      state.isVisualizing = true;
    };
    audioPlayback.onended = () => {
      this.audioAnalyzer.cleanup();
      recordBtn.dataset.pending = false;
      recordBtn.disabled = false;
      state.isVisualizing = false;
    };
  }
 }
 const uploadFile = async (e) => {
  uploadBtn.innerText = "Uploading File...";
  e.preventDefault();
  const file = fileInput.files[0];
  if (!file) {
    alert("Please select a file.");
    return;
  }
  const formData = new FormData();
  formData.append("file", file);
  try {
    await fetch("/upload", {
      method: "POST",
      body: formData,
    });
    recordBtn.classList.remove("hidden");
    fileInputContainer.classList.add("hidden");
  } catch (error) {
    recordBtn.classList.add("hidden");
    fileInputContainer.classList.remove("hidden");
    console.error("Error uploading file:", error);
    uploadBtn.innerText = "Upload Failed. Try again";
  }
 };
 const uploadBtn = document.getElementById("upload-btn");
 uploadBtn.addEventListener("click", uploadFile);
 // Main initialization
 async function initializeApp() {
  if (!navigator.mediaDevices) {
    console.error("Media devices not supported");
    return;
  }
  const recorder = new RecordingHandler();
  await recorder.initialize();
  recordBtn.onclick = () => {
    if (recorder.mediaRecorder.state === "inactive") {
      recorder.startRecording();
    } else if (recorder.mediaRecorder.state === "recording") {
      recorder.stopRecording();
    }
  };
 }
 // Start the application
 initializeApp();
--- a/examples/quivr-whisper/static/loader.svg
+++ b/examples/quivr-whisper/static/loader.svg
@ -0,0 +1 @@
 <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-loader-pinwheel"><path d="M22 12a1 1 0 0 1-10 0 1 1 0 0 0-10 0"/><path d="M7 20.7a1 1 0 1 1 5-8.7 1 1 0 1 0 5-8.6"/><path d="M7 3.3a1 1 0 1 1 5 8.6 1 1 0 1 0 5 8.6"/><circle cx="12" cy="12" r="10"/></svg>
--- a/examples/quivr-whisper/static/mic-off.svg
+++ b/examples/quivr-whisper/static/mic-off.svg
@ -0,0 +1 @@
 <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-mic-off"><line x1="2" x2="22" y1="2" y2="22"/><path d="M18.89 13.23A7.12 7.12 0 0 0 19 12v-2"/><path d="M5 10v2a7 7 0 0 0 12 5"/><path d="M15 9.34V5a3 3 0 0 0-5.68-1.33"/><path d="M9 9v3a3 3 0 0 0 5.12 2.12"/><line x1="12" x2="12" y1="19" y2="22"/></svg>
--- a/examples/quivr-whisper/static/mic.svg
+++ b/examples/quivr-whisper/static/mic.svg
@ -0,0 +1 @@
 <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-mic"><path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" x2="12" y1="19" y2="22"/></svg>
--- a/examples/quivr-whisper/static/styles.css
+++ b/examples/quivr-whisper/static/styles.css
@ -1,3 +1,151 @@
 * {
    box-sizing: border-box;
    margin: 0;
    padding: 0;
 }
 body {
    color: #f4f4f4;
    background-color: #252525;
    display: flex;
    gap: 1rem;
    align-items: center;
    flex-direction: column;
    justify-content: center;
    min-height: 100vh;
 }
 .primary {
    background-color: #6142d4;
 }
 button {
    background-color: #6142d4;
    border: none;
    padding: .75rem 2rem;
    border-radius: 0.5rem;
    color: #f4f4f4;
    cursor: pointer;
 }
 canvas {
    position: absolute;
    width: 100%;
    height: 100%;
    top: 0;
    left: 0;
    background-color: #252525;
    z-index: -1;
 }
 .record-btn {
    background-color: #f5f5f5;
    border: none;
    outline: none;
    width: 256px;
    height: 256px;
    background-repeat: no-repeat;
    background-position: center;
    border-radius: 50%;
    background-size: 50%;
    transition: background-color 200ms ease-in, transform 200ms ease-out;
 }
 .record-btn:hover {
    background-color: #fff;
    transform: scale(1.025);
 }
 .record-btn:active {
    background-color: #e2e2e2;
    transform: scale(0.975);
 }
 .record-btn[data-recording="true"] {
    background-image: url("./mic.svg");
 }
 .record-btn[data-recording="false"] {
    background-image: url("./mic-off.svg");
 }
 .record-btn[data-pending="true"] {
    background-image: url("./loader.svg") !important;
    animation: spin 1s linear infinite;
 }
 .hidden {
    display: none !important;
    visibility: hidden;
 }
 .custom-file-input {
  display: flex;
  flex-direction: column;
  align-items: center;
  gap: 10px;
 }
 .custom-file-input input[type="file"] {
  display: none;
 }
 .custom-file-input label {
  border: solid 2px #6142d4;
  color: white;
  padding: 8px 16px;
  border-radius: 4px;
  cursor: pointer;
  font-size: 14px;
  font-weight: bold;
  transition: background-color 0.3s;
 }
 .custom-file-input label:hover {
  background-color: #6142d4;
 }
 .custom-file-input span {
  font-size: 14px;
  color: #f4f4f4;
 }
 /* Adjust appearance when a file is selected */
 .custom-file-input span.file-selected {
  color: #ffffff;
  font-weight: bold;
 }
 /* 
 # Override default MUI light theme. (Check theme.ts)
 [UI.theme.light]
    background = "#fcfcfc"
    paper = "#f8f8f8"
    [UI.theme.light.primary]
        main = "#6142d4"
        dark = "#6e53cf"
        light = "#6e53cf30"
    [UI.theme.light.text]
        primary = "#1f1f1f"
        secondary = "#818080"
 # Override default MUI dark theme. (Check theme.ts)
 [UI.theme.dark]
    background = "#252525"
    paper = "#1f1f1f"
    [UI.theme.dark.primary]
        main = "#6142d4"
        dark = "#6e53cf"
        light = "#6e53cf30"
    [UI.theme.dark.text]
        primary = "#f4f4f4"
        secondary = "#c8c8c8"
 */
 .loader {
    border: 4px solid #f3f3f3;
    border-radius: 50%;
--- a/examples/quivr-whisper/templates/index.html
+++ b/examples/quivr-whisper/templates/index.html
@ -1,26 +1,37 @@
-<!doctype html>
+<!DOCTYPE html>
 <html lang="en">
-
+  <head>
-<head>
+    <meta charset="UTF-8" />
-    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Audio Interaction WebApp</title>
-    <script src="https://cdn.tailwindcss.com"></script>
+    <link
-    <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
+      rel="stylesheet"
-</head>
+      href="{{ url_for('static', filename='styles.css') }}"
    />
  </head>
-<body class="bg-gray-100 flex flex-col items-center justify-center h-screen">
+  <body>
-    <h1 class="text-6xl font-bold mb-8">Quivr.app</h1>
+    <button
-    <div id="app" class="text-center">
+      type="button"
-        <button id="record-btn"
+      id="record-btn"
-            class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-full cursor-pointer">
+      class="record-btn hidden"
-            Ask a question to Quivr
+      data-recording="false"
-        </button>
+      data-pending="false"
-        <canvas id="audio-visualizer" width="640" height="100"
+    ></button>
-            class="hidden bg-white rounded-lg cursor-pointer"></canvas>
+    <div class="custom-file-input">
-        <audio id="audio-playback" controls class="hidden mt-4"></audio>
+      <label for="fileInput">Choose a file</label>
      <input
        type="file"
        accept="text/plain"
        name="fileInput"
        required
        id="fileInput"
      />
      <span id="fileName">No file chosen</span>
      <button id="upload-btn" class="upload-btn">Upload</button>
    </div>
    <canvas id="audio-visualizer" class=""></canvas>
    <audio id="audio-playback" controls class="hidden"></audio>
    <script src="{{ url_for('static', filename='app.js') }}"></script>
-</body>
+  </body>
-
+</html>
 </html>
		`@ -0,0 +1 @@`
							`<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-loader-pinwheel"><path d="M22 12a1 1 0 0 1-10 0 1 1 0 0 0-10 0"/><path d="M7 20.7a1 1 0 1 1 5-8.7 1 1 0 1 0 5-8.6"/><path d="M7 3.3a1 1 0 1 1 5 8.6 1 1 0 1 0 5 8.6"/><circle cx="12" cy="12" r="10"/></svg>`