From d20f58c1471e17855465049041781fb8fb0782c2 Mon Sep 17 00:00:00 2001
From: Aditya Nandan <61308761+adityanandanx@users.noreply.github.com>
Date: Tue, 26 Nov 2024 18:20:14 +0530
Subject: [PATCH] feat(example): Quivr whisper (#3495)

# Description

Talk with quivr handsfree via tts and stt.

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):

![image](https://github.com/user-attachments/assets/1c169e80-45ce-4541-b244-5f3b85b866f2)
---
 .vscode/settings.json                        |   3 +-
 examples/quivr-whisper/.gitignore            |   1 +
 examples/quivr-whisper/app.py                | 120 ++++-
 examples/quivr-whisper/pyproject.toml        |   3 +-
 examples/quivr-whisper/requirements-dev.lock |   7 +
 examples/quivr-whisper/requirements.lock     |   7 +
 examples/quivr-whisper/static/app.js         | 512 ++++++++++++-------
 examples/quivr-whisper/static/loader.svg     |   1 +
 examples/quivr-whisper/static/mic-off.svg    |   1 +
 examples/quivr-whisper/static/mic.svg        |   1 +
 examples/quivr-whisper/static/styles.css     | 148 ++++++
 examples/quivr-whisper/templates/index.html  |  53 +-
 12 files changed, 626 insertions(+), 231 deletions(-)
 create mode 100644 examples/quivr-whisper/static/loader.svg
 create mode 100644 examples/quivr-whisper/static/mic-off.svg
 create mode 100644 examples/quivr-whisper/static/mic.svg

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 86370d352..bb7120ff5 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -44,5 +44,6 @@
     "reportUnusedImport": "warning",
     "reportGeneralTypeIssues": "warning"
   },
-  "makefile.configureOnOpen": false
+  "makefile.configureOnOpen": false,
+  "djlint.showInstallError": false
 }
diff --git a/examples/quivr-whisper/.gitignore b/examples/quivr-whisper/.gitignore
index 4c49bd78f..727370b46 100644
--- a/examples/quivr-whisper/.gitignore
+++ b/examples/quivr-whisper/.gitignore
@@ -1 +1,2 @@
 .env
+uploads
\ No newline at end of file
diff --git a/examples/quivr-whisper/app.py b/examples/quivr-whisper/app.py
index 1ae27eac2..79031b901 100644
--- a/examples/quivr-whisper/app.py
+++ b/examples/quivr-whisper/app.py
@@ -1,27 +1,32 @@
-from flask import Flask, render_template, request, jsonify
+from flask import Flask, render_template, request, jsonify, session
 import openai
 import base64
 import os
 import requests
 from dotenv import load_dotenv
+from quivr_core import Brain
+from quivr_core.rag.entities.config import RetrievalConfig
 from tempfile import NamedTemporaryFile
+from werkzeug.utils import secure_filename
+from asyncio import to_thread
+import asyncio
+
+
+UPLOAD_FOLDER = "uploads"
+ALLOWED_EXTENSIONS = {"txt"}
+
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 
 app = Flask(__name__)
+app.secret_key = "secret"
+app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
+app.config["CACHE_TYPE"] = "SimpleCache"  # In-memory cache for development
+app.config["CACHE_DEFAULT_TIMEOUT"] = 60 * 60  # 1 hour cache timeout
 load_dotenv()
+
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
-quivr_token = os.getenv("QUIVR_API_KEY", "")
-quivr_chat_id = os.getenv("QUIVR_CHAT_ID", "")
-quivr_brain_id = os.getenv("QUIVR_BRAIN_ID", "")
-quivr_url = (
-    os.getenv("QUIVR_URL", "https://api.quivr.app")
-    + f"/chat/{quivr_chat_id}/question?brain_id={quivr_brain_id}"
-)
-
-headers = {
-    "Content-Type": "application/json",
-    "Authorization": f"Bearer {quivr_token}",
-}
+brains = {}
 
 
 @app.route("/")
@@ -29,12 +34,83 @@ def index():
     return render_template("index.html")
 
 
-@app.route("/transcribe", methods=["POST"])
-def transcribe_audio():
+def run_in_event_loop(func, *args, **kwargs):
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    if asyncio.iscoroutinefunction(func):
+        result = loop.run_until_complete(func(*args, **kwargs))
+    else:
+        result = func(*args, **kwargs)
+    loop.close()
+    return result
+
+
+def allowed_file(filename):
+    return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
+
+
+@app.route("/upload", methods=["POST"])
+async def upload_file():
+    if "file" not in request.files:
+        return "No file part", 400
+
+    file = request.files["file"]
+
+    if file.filename == "":
+        return "No selected file", 400
+    if not (file and file.filename and allowed_file(file.filename)):
+        return "Invalid file type", 400
+
+    filename = secure_filename(file.filename)
+    filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
+    file.save(filepath)
+
+    print(f"File uploaded and saved at: {filepath}")
+
+    print("Creating brain instance...")
+
+    brain: Brain = await to_thread(
+        run_in_event_loop, Brain.from_files, name="user_brain", file_paths=[filepath]
+    )
+
+    # Store brain instance in cache
+    session_id = session.sid if hasattr(session, "sid") else os.urandom(16).hex()
+    session["session_id"] = session_id
+    # cache.set(session_id, brain)  # Store the brain instance in the cache
+    brains[session_id] = brain
+    print(f"Brain instance created and stored in cache for session ID: {session_id}")
+
+    return jsonify({"message": "Brain created successfully"})
+
+
+@app.route("/ask", methods=["POST"])
+async def ask():
+    if "audio_data" not in request.files:
+        return "Missing audio data", 400
+
+    # Retrieve the brain instance from the cache using the session ID
+    session_id = session.get("session_id")
+    if not session_id:
+        return "Session ID not found. Upload a file first.", 400
+
+    brain = brains.get(session_id)
+    if not brain:
+        return "Brain instance not found in dict. Upload a file first.", 400
+
+    print("Brain instance loaded from cache.")
+
+    print("Speech to text...")
     audio_file = request.files["audio_data"]
     transcript = transcribe_audio_file(audio_file)
-    quivr_response = ask_quivr_question(transcript)
-    audio_base64 = synthesize_speech(quivr_response)
+    print("Transcript result: ", transcript)
+
+    print("Getting response...")
+    quivr_response = await to_thread(run_in_event_loop, brain.ask, transcript)
+
+    print("Text to speech...")
+    audio_base64 = synthesize_speech(quivr_response.answer)
+
+    print("Done")
     return jsonify({"audio_base64": audio_base64})
 
 
@@ -55,16 +131,6 @@ def transcribe_audio_file(audio_file):
     return transcript
 
 
-def ask_quivr_question(transcript):
-    response = requests.post(quivr_url, headers=headers, json={"question": transcript})
-    if response.status_code == 200:
-        quivr_response = response.json().get("assistant")
-        return quivr_response
-    else:
-        print(f"Error from Quivr API: {response.status_code}, {response.text}")
-        return "Sorry, I couldn't understand that."
-
-
 def synthesize_speech(text):
     speech_response = openai.audio.speech.create(
         model="tts-1", voice="nova", input=text
diff --git a/examples/quivr-whisper/pyproject.toml b/examples/quivr-whisper/pyproject.toml
index 457e6c90e..3c48b90c6 100644
--- a/examples/quivr-whisper/pyproject.toml
+++ b/examples/quivr-whisper/pyproject.toml
@@ -6,9 +6,10 @@ authors = [
     { name = "Stan Girard", email = "stan@quivr.app" }
 ]
 dependencies = [
-    "flask>=3.1.0",
+    "flask[async]>=3.1.0",
     "openai>=1.54.5",
     "quivr-core>=0.0.24",
+    "flask-caching>=2.3.0",
 ]
 readme = "README.md"
 requires-python = ">= 3.11"
diff --git a/examples/quivr-whisper/requirements-dev.lock b/examples/quivr-whisper/requirements-dev.lock
index 8e93ec1b5..716aa161b 100644
--- a/examples/quivr-whisper/requirements-dev.lock
+++ b/examples/quivr-whisper/requirements-dev.lock
@@ -32,6 +32,8 @@ anyio==4.6.2.post1
     # via httpx
     # via openai
     # via starlette
+asgiref==3.8.1
+    # via flask
 attrs==24.2.0
     # via aiohttp
 backoff==2.2.1
@@ -42,6 +44,8 @@ beautifulsoup4==4.12.3
     # via unstructured
 blinker==1.9.0
     # via flask
+cachelib==0.9.0
+    # via flask-caching
 cachetools==5.5.0
     # via google-auth
 certifi==2024.8.30
@@ -112,6 +116,9 @@ filetype==1.2.0
     # via llama-index-core
     # via unstructured
 flask==3.1.0
+    # via flask-caching
+    # via quivr-whisper
+flask-caching==2.3.0
     # via quivr-whisper
 flatbuffers==24.3.25
     # via onnxruntime
diff --git a/examples/quivr-whisper/requirements.lock b/examples/quivr-whisper/requirements.lock
index 8e93ec1b5..716aa161b 100644
--- a/examples/quivr-whisper/requirements.lock
+++ b/examples/quivr-whisper/requirements.lock
@@ -32,6 +32,8 @@ anyio==4.6.2.post1
     # via httpx
     # via openai
     # via starlette
+asgiref==3.8.1
+    # via flask
 attrs==24.2.0
     # via aiohttp
 backoff==2.2.1
@@ -42,6 +44,8 @@ beautifulsoup4==4.12.3
     # via unstructured
 blinker==1.9.0
     # via flask
+cachelib==0.9.0
+    # via flask-caching
 cachetools==5.5.0
     # via google-auth
 certifi==2024.8.30
@@ -112,6 +116,9 @@ filetype==1.2.0
     # via llama-index-core
     # via unstructured
 flask==3.1.0
+    # via flask-caching
+    # via quivr-whisper
+flask-caching==2.3.0
     # via quivr-whisper
 flatbuffers==24.3.25
     # via onnxruntime
diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index 0d788544d..5f9a7064c 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -1,209 +1,359 @@
-const recordBtn = document.getElementById('record-btn');
-const audioVisualizer = document.getElementById('audio-visualizer');
-const audioPlayback = document.getElementById('audio-playback');
-const canvasCtx = audioVisualizer.getContext('2d');
+// DOM Elements
+const recordBtn = document.getElementById("record-btn");
+const fileInput = document.getElementById("fileInput");
+const fileInputContainer = document.querySelector(".custom-file-input");
+const fileName = document.getElementById("fileName");
 
-let isRecording = false;
-let mediaRecorder;
-let audioChunks = [];
-let audioContext;
-let analyser;
-let dataArray;
-let bufferLength;
-let lastAudioLevel = 0;
-let silenceTimer;
+const audioVisualizer = document.getElementById("audio-visualizer");
+const audioPlayback = document.getElementById("audio-playback");
+const canvasCtx = audioVisualizer.getContext("2d");
 
-recordBtn.addEventListener('click', toggleRecording);
+window.addEventListener("load", () => {
+  audioVisualizer.width = window.innerWidth;
+  audioVisualizer.height = window.innerHeight;
+});
 
-function toggleRecording() {
-    if (!isRecording) {
-        recordBtn.classList.add('hidden');
-        audioVisualizer.classList.remove('hidden');
-        startRecording();
-    } else {
-        audioVisualizer.classList.add('hidden');
-        stopRecording();
+window.addEventListener("resize", (e) => {
+  audioVisualizer.width = window.innerWidth;
+  audioVisualizer.height = window.innerHeight;
+});
+
+fileInput.addEventListener("change", () => {
+  fileName.textContent =
+    fileInput.files.length > 0 ? fileInput.files[0].name : "No file chosen";
+  fileName.classList.toggle("file-selected", fileInput.files.length > 0);
+});
+
+// Configuration
+const SILENCE_THRESHOLD = 128; // Adjusted for byte data (128 is middle)
+const SILENCE_DURATION = 1500;
+const FFT_SIZE = 2048;
+
+// State
+const state = {
+  isRecording: false,
+  isVisualizing: false,
+  chunks: [],
+  silenceTimer: null,
+  lastAudioLevel: 0,
+};
+
+// Audio Analysis
+class AudioAnalyzer {
+  constructor() {
+    this.reset();
+  }
+
+  reset() {
+    this.analyser = null;
+    this.dataArray = null;
+    this.bufferLength = null;
+    this.source = null;
+    this.cleanup();
+  }
+
+  setup(source, audioContext) {
+    this.cleanup();
+
+    this.analyser = this._createAnalyser(audioContext);
+    source.connect(this.analyser);
+
+    this._initializeBuffer();
+    return this.analyser;
+  }
+
+  setupForPlayback(audioElement, audioContext, connectToDestination = true) {
+    // Reuse existing MediaElementSourceNode if it already exists for this audio element
+    if (!this.source || this.source.mediaElement !== audioElement) {
+      this.cleanup(); // Ensure any previous connections are cleaned up
+      this.source = audioContext.createMediaElementSource(audioElement);
     }
+
+    this.analyser = this._createAnalyser(audioContext);
+
+    this.source.connect(this.analyser);
+
+    if (connectToDestination) {
+      this.analyser.connect(audioContext.destination);
+    }
+
+    this._initializeBuffer();
+    return this.analyser;
+  }
+
+  cleanup() {
+    if (this.source) {
+      this._safeDisconnect(this.source);
+    }
+    if (this.analyser) {
+      this._safeDisconnect(this.analyser);
+    }
+  }
+
+  _createAnalyser(audioContext) {
+    const analyser = audioContext.createAnalyser();
+    analyser.fftSize = FFT_SIZE;
+    return analyser;
+  }
+
+  _initializeBuffer() {
+    this.bufferLength = this.analyser.frequencyBinCount;
+    this.dataArray = new Uint8Array(this.bufferLength);
+  }
+
+  _safeDisconnect(node) {
+    if (node) {
+      try {
+        node.disconnect();
+      } catch {
+        // Ignore disconnect errors
+      }
+    }
+  }
 }
 
-function drawWaveform() {
-    if (!analyser) return;
+// Visualization
+class Visualizer {
+  constructor(canvas, analyzer) {
+    this.canvas = canvas;
+    this.ctx = canvas.getContext("2d");
+    this.analyzer = analyzer;
+  }
 
-    requestAnimationFrame(drawWaveform);
+  draw(currentAnalyser, onSilence) {
+    if (!currentAnalyser || this.analyzer.dataArray === null) return;
 
-    analyser.getByteTimeDomainData(dataArray);
+    requestAnimationFrame(() => this.draw(currentAnalyser, onSilence));
 
-    canvasCtx.fillStyle = 'rgb(255, 255, 255)';
-    canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
+    // Use getByteTimeDomainData instead of getFloatTimeDomainData
+    currentAnalyser.getByteTimeDomainData(this.analyzer.dataArray);
 
-    canvasCtx.lineWidth = 2;
-    canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
+    // Clear canvas
+    this.ctx.fillStyle = "#252525";
+    this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height);
+    if (!state.isVisualizing) return;
 
-    canvasCtx.beginPath();
+    this.ctx.lineWidth = 2;
+    this.ctx.strokeStyle = "#6142d4";
+    this.ctx.beginPath();
 
-    let sliceWidth = audioVisualizer.width * 1.0 / bufferLength;
+    const sliceWidth = (this.canvas.width * 1) / this.analyzer.bufferLength;
     let x = 0;
-
     let sum = 0;
 
-    for (let i = 0; i < bufferLength; i++) {
-        let v = dataArray[i] / 128.0;
-        let y = v * audioVisualizer.height / 2;
+    // Draw waveform
+    for (let i = 0; i < this.analyzer.bufferLength; i++) {
+      // Scale byte data (0-255) to canvas height
+      const v = this.analyzer.dataArray[i] / 128.0; // normalize to 0-2
+      const y = (v - 1) * (this.canvas.height / 2) + this.canvas.height / 2;
 
-        sum += v;
+      sum += Math.abs(v - 1); // Calculate distance from center (128)
 
-        if (i === 0) {
-            canvasCtx.moveTo(x, y);
+      if (i === 0) {
+        this.ctx.moveTo(x, y);
+      } else {
+        this.ctx.lineTo(x, y);
+      }
+
+      x += sliceWidth;
+    }
+
+    this.ctx.lineTo(this.canvas.width, this.canvas.height / 2);
+    this.ctx.stroke();
+
+    // Check for silence during recording with adjusted thresholds for byte data
+    if (state.isRecording) {
+      const averageAmplitude = sum / this.analyzer.bufferLength;
+      if (averageAmplitude < 0.1) {
+        // Adjusted threshold for normalized data
+        // Reset silence timer if we detect sound
+        if (averageAmplitude > 0.05) {
+          clearTimeout(state.silenceTimer);
+          state.silenceTimer = null;
         } else {
-            canvasCtx.lineTo(x, y);
+          onSilence();
         }
-
-        x += sliceWidth;
+      }
     }
-
-    canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
-    canvasCtx.stroke();
-
-    let currentAudioLevel = sum / bufferLength;
-
-    if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) {
-        if (!silenceTimer) {
-            silenceTimer = setTimeout(stopRecording, 1000);
-        }
-    } else {
-        clearTimeout(silenceTimer);
-        silenceTimer = null;
-    }
-
-    lastAudioLevel = currentAudioLevel;
+  }
 }
 
-async function startRecording() {
-    audioChunks = [];
-    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-    mediaRecorder = new MediaRecorder(stream);
-    mediaRecorder.ondataavailable = event => {
-        audioChunks.push(event.data);
-    };
-    mediaRecorder.start();
-    isRecording = true;
+// Recording Handler
+class RecordingHandler {
+  constructor() {
+    this.mediaRecorder = null;
+    this.audioAnalyzer = new AudioAnalyzer();
+    this.visualizer = new Visualizer(audioVisualizer, this.audioAnalyzer);
+    this.audioContext = null;
+  }
 
-    audioContext = new (window.AudioContext || window.webkitAudioContext)();
-    analyser = audioContext.createAnalyser();
-    const source = audioContext.createMediaStreamSource(stream);
-
-    source.connect(analyser);
-    analyser.fftSize = 2048;
-    bufferLength = analyser.frequencyBinCount;
-    dataArray = new Uint8Array(bufferLength);
-
-    drawWaveform();
-}
-
-function stopRecording() {
-    mediaRecorder.stop();
-    mediaRecorder.onstop = async () => {
-        // The mediaRecorder has stopped; now we can process the chunks
-        const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
-        const formData = new FormData();
-        formData.append('audio_data', audioBlob);
-
-        // Now we're sending the audio to the server and waiting for a response
-        try {
-            const response = await fetch('/transcribe', {
-                method: 'POST',
-                body: formData
-            });
-            const data = await response.json();
-
-            // Once we have the response, we can source the playback element and play it
-            audioPlayback.src = 'data:audio/wav;base64,' + data.audio_base64;
-            audioPlayback.classList.remove('hidden');
-            audioVisualizer.classList.add('hidden'); // hide the visualizer while playing back the response
-            setupAIResponseVisualization();
-            audioPlayback.onloadedmetadata = () => {
-                // When metadata is loaded, start playback
-                audioPlayback.play();
-                visualizeAIResponse();
-            };
-
-            // We only reset the UI after the audio has finished playing
-            // audioPlayback.onended = () => {
-            //     resetUI();
-            // };
-        } catch (error) {
-            console.error('Error during fetch/transcription:', error);
-            resetUI();
-        } finally {
-            if (analyser) {
-                analyser.disconnect();
-                analyser = null;
-            }
-            isRecording = false;
-        }
-    };
-}
-function resetUI() {
-    document.getElementById('record-btn').classList.remove('hidden');
-    document.getElementById('audio-visualizer').classList.add('hidden');
-    document.getElementById('audio-playback').classList.add('hidden');
-    // Reset any other UI elements as necessary
-}
-
-function setupAIResponseVisualization() {
+  async initialize() {
     try {
-        // Create a new audio context for playback if it doesn't exist
-        if (!audioContext) {
-            audioContext = new (window.AudioContext || window.webkitAudioContext)();
-        }
-        // Resume the audio context in case it's in a suspended state
-        audioContext.resume().then(() => {
-            analyser = audioContext.createAnalyser();
-            const source = audioContext.createMediaElementSource(audioPlayback);
-            source.connect(analyser);
-            analyser.connect(audioContext.destination);
-            analyser.fftSize = 2048;
-            bufferLength = analyser.frequencyBinCount;
-            dataArray = new Uint8Array(bufferLength);
-        });
-    } catch (error) {
-        console.error('Error setting up AI response visualization:', error);
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      this.mediaRecorder = new MediaRecorder(stream);
+      this.setupRecordingEvents();
+      if (!this.audioContext)
+        this.audioContext = new (window.AudioContext ||
+          window.webkitAudioContext)();
+    } catch (err) {
+      console.error(`Media device error: ${err}`);
     }
-}
+  }
 
-function visualizeAIResponse() {
-    const draw = () => {
-        requestAnimationFrame(draw);
-
-        analyser.getByteTimeDomainData(dataArray);
-
-        canvasCtx.fillStyle = 'rgb(255, 255, 255)';
-        canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
-
-        canvasCtx.lineWidth = 2;
-        canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
-
-        canvasCtx.beginPath();
-
-        let sliceWidth = audioVisualizer.width * 1.0 / bufferLength;
-        let x = 0;
-
-        for (let i = 0; i < bufferLength; i++) {
-            let v = dataArray[i] / 128.0;
-            let y = v * audioVisualizer.height / 2;
-
-            if (i === 0) {
-                canvasCtx.moveTo(x, y);
-            } else {
-                canvasCtx.lineTo(x, y);
-            }
-
-            x += sliceWidth;
-        }
-
-        canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
-        canvasCtx.stroke();
+  setupRecordingEvents() {
+    this.mediaRecorder.ondataavailable = (e) => {
+      state.chunks.push(e.data);
     };
 
-    draw();
-}
\ No newline at end of file
+    this.mediaRecorder.onstop = async () => {
+      await this.handleRecordingStop();
+    };
+  }
+
+  startRecording() {
+    state.isVisualizing = true;
+    state.chunks = [];
+    state.isRecording = true;
+    this.mediaRecorder.start();
+
+    const source = this.audioContext.createMediaStreamSource(
+      this.mediaRecorder.stream
+    );
+
+    const analyser = this.audioAnalyzer.setup(source, this.audioContext);
+    audioVisualizer.classList.remove("hidden");
+
+    this.visualizer.draw(analyser, () => {
+      if (!state.silenceTimer) {
+        state.silenceTimer = setTimeout(
+          () => this.stopRecording(),
+          SILENCE_DURATION
+        );
+      }
+    });
+
+    recordBtn.dataset.recording = true;
+    recordBtn.classList.add("processing");
+  }
+
+  stopRecording() {
+    if (state.isRecording) {
+      state.isVisualizing = false;
+      state.isRecording = false;
+      this.mediaRecorder.stop();
+      clearTimeout(state.silenceTimer);
+      state.silenceTimer = null;
+      recordBtn.dataset.recording = false;
+    }
+  }
+
+  async handleRecordingStop() {
+    console.log("Processing recording...");
+    recordBtn.dataset.pending = true;
+    recordBtn.disabled = true;
+
+    const audioBlob = new Blob(state.chunks, { type: "audio/wav" });
+    if (!fileInput.files.length) {
+      recordBtn.dataset.pending = false;
+      recordBtn.disabled = false;
+      alert("Please select a file.");
+      return;
+    }
+
+    const formData = new FormData();
+    formData.append("audio_data", audioBlob);
+    formData.append("file", fileInput.files[0]);
+
+    try {
+      await this.processRecording(formData);
+    } catch (error) {
+      console.error("Processing error:", error);
+    } finally {
+      this.audioAnalyzer.cleanup();
+    }
+  }
+
+  async processRecording(formData) {
+    const response = await fetch("/ask", {
+      method: "POST",
+      body: formData,
+    });
+    const data = await response.json();
+
+    await this.handleResponse(data);
+  }
+
+  async handleResponse(data) {
+    audioPlayback.src = "data:audio/wav;base64," + data.audio_base64;
+
+    audioPlayback.onloadedmetadata = () => {
+      const analyser = this.audioAnalyzer.setupForPlayback(
+        audioPlayback,
+        this.audioContext
+      );
+      audioVisualizer.classList.remove("hidden");
+
+      this.visualizer.draw(analyser, () => {});
+      audioPlayback.play();
+      state.isVisualizing = true;
+    };
+
+    audioPlayback.onended = () => {
+      this.audioAnalyzer.cleanup();
+      recordBtn.dataset.pending = false;
+      recordBtn.disabled = false;
+      state.isVisualizing = false;
+    };
+  }
+}
+
+const uploadFile = async (e) => {
+  uploadBtn.innerText = "Uploading File...";
+  e.preventDefault();
+  const file = fileInput.files[0];
+
+  if (!file) {
+    alert("Please select a file.");
+    return;
+  }
+  const formData = new FormData();
+  formData.append("file", file);
+  try {
+    await fetch("/upload", {
+      method: "POST",
+      body: formData,
+    });
+    recordBtn.classList.remove("hidden");
+    fileInputContainer.classList.add("hidden");
+  } catch (error) {
+    recordBtn.classList.add("hidden");
+    fileInputContainer.classList.remove("hidden");
+    console.error("Error uploading file:", error);
+    uploadBtn.innerText = "Upload Failed. Try again";
+  }
+};
+
+const uploadBtn = document.getElementById("upload-btn");
+uploadBtn.addEventListener("click", uploadFile);
+
+// Main initialization
+async function initializeApp() {
+  if (!navigator.mediaDevices) {
+    console.error("Media devices not supported");
+    return;
+  }
+
+  const recorder = new RecordingHandler();
+  await recorder.initialize();
+
+  recordBtn.onclick = () => {
+    if (recorder.mediaRecorder.state === "inactive") {
+      recorder.startRecording();
+    } else if (recorder.mediaRecorder.state === "recording") {
+      recorder.stopRecording();
+    }
+  };
+}
+
+// Start the application
+initializeApp();
diff --git a/examples/quivr-whisper/static/loader.svg b/examples/quivr-whisper/static/loader.svg
new file mode 100644
index 000000000..1390bc478
--- /dev/null
+++ b/examples/quivr-whisper/static/loader.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-loader-pinwheel"><path d="M22 12a1 1 0 0 1-10 0 1 1 0 0 0-10 0"/><path d="M7 20.7a1 1 0 1 1 5-8.7 1 1 0 1 0 5-8.6"/><path d="M7 3.3a1 1 0 1 1 5 8.6 1 1 0 1 0 5 8.6"/><circle cx="12" cy="12" r="10"/></svg>
\ No newline at end of file
diff --git a/examples/quivr-whisper/static/mic-off.svg b/examples/quivr-whisper/static/mic-off.svg
new file mode 100644
index 000000000..46d151fca
--- /dev/null
+++ b/examples/quivr-whisper/static/mic-off.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-mic-off"><line x1="2" x2="22" y1="2" y2="22"/><path d="M18.89 13.23A7.12 7.12 0 0 0 19 12v-2"/><path d="M5 10v2a7 7 0 0 0 12 5"/><path d="M15 9.34V5a3 3 0 0 0-5.68-1.33"/><path d="M9 9v3a3 3 0 0 0 5.12 2.12"/><line x1="12" x2="12" y1="19" y2="22"/></svg>
\ No newline at end of file
diff --git a/examples/quivr-whisper/static/mic.svg b/examples/quivr-whisper/static/mic.svg
new file mode 100644
index 000000000..726d9f11b
--- /dev/null
+++ b/examples/quivr-whisper/static/mic.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-mic"><path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" x2="12" y1="19" y2="22"/></svg>
\ No newline at end of file
diff --git a/examples/quivr-whisper/static/styles.css b/examples/quivr-whisper/static/styles.css
index b7a0d74d4..e250adda1 100644
--- a/examples/quivr-whisper/static/styles.css
+++ b/examples/quivr-whisper/static/styles.css
@@ -1,3 +1,151 @@
+* {
+    box-sizing: border-box;
+    margin: 0;
+    padding: 0;
+}
+
+
+body {
+    color: #f4f4f4;
+    background-color: #252525;
+    display: flex;
+    gap: 1rem;
+    align-items: center;
+    flex-direction: column;
+    justify-content: center;
+    min-height: 100vh;
+}
+
+.primary {
+    background-color: #6142d4;
+}
+
+button {
+    background-color: #6142d4;
+    border: none;
+    padding: .75rem 2rem;
+    border-radius: 0.5rem;
+    color: #f4f4f4;
+    cursor: pointer;
+}
+
+canvas {
+    position: absolute;
+    width: 100%;
+    height: 100%;
+    top: 0;
+    left: 0;
+    background-color: #252525;
+    z-index: -1;
+}
+
+.record-btn {
+    background-color: #f5f5f5;
+    border: none;
+    outline: none;
+    width: 256px;
+    height: 256px;
+    background-repeat: no-repeat;
+    background-position: center;
+    border-radius: 50%;
+    background-size: 50%;
+    transition: background-color 200ms ease-in, transform 200ms ease-out;
+}
+
+.record-btn:hover {
+    background-color: #fff;
+    transform: scale(1.025);
+}
+
+.record-btn:active {
+    background-color: #e2e2e2;
+    transform: scale(0.975);
+}
+
+.record-btn[data-recording="true"] {
+    background-image: url("./mic.svg");
+}
+
+.record-btn[data-recording="false"] {
+    background-image: url("./mic-off.svg");
+}
+
+.record-btn[data-pending="true"] {
+    background-image: url("./loader.svg") !important;
+    animation: spin 1s linear infinite;
+}
+
+.hidden {
+    display: none !important;
+    visibility: hidden;
+}
+
+.custom-file-input {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  gap: 10px;
+}
+
+.custom-file-input input[type="file"] {
+  display: none;
+}
+
+.custom-file-input label {
+  border: solid 2px #6142d4;
+  color: white;
+  padding: 8px 16px;
+  border-radius: 4px;
+  cursor: pointer;
+  font-size: 14px;
+  font-weight: bold;
+  transition: background-color 0.3s;
+}
+
+.custom-file-input label:hover {
+  background-color: #6142d4;
+}
+
+.custom-file-input span {
+  font-size: 14px;
+  color: #f4f4f4;
+}
+
+/* Adjust appearance when a file is selected */
+.custom-file-input span.file-selected {
+  color: #ffffff;
+  font-weight: bold;
+}
+
+/* 
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme.light]
+    background = "#fcfcfc"
+    paper = "#f8f8f8"
+
+    [UI.theme.light.primary]
+        main = "#6142d4"
+        dark = "#6e53cf"
+        light = "#6e53cf30"
+    [UI.theme.light.text]
+        primary = "#1f1f1f"
+        secondary = "#818080"
+
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    background = "#252525"
+    paper = "#1f1f1f"
+
+    [UI.theme.dark.primary]
+        main = "#6142d4"
+        dark = "#6e53cf"
+        light = "#6e53cf30"
+    [UI.theme.dark.text]
+        primary = "#f4f4f4"
+        secondary = "#c8c8c8"
+
+*/
+
 .loader {
     border: 4px solid #f3f3f3;
     border-radius: 50%;
diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html
index 6f508034c..d1ae47eb0 100644
--- a/examples/quivr-whisper/templates/index.html
+++ b/examples/quivr-whisper/templates/index.html
@@ -1,26 +1,37 @@
-<!doctype html>
+<!DOCTYPE html>
 <html lang="en">
-
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>Audio Interaction WebApp</title>
-    <script src="https://cdn.tailwindcss.com"></script>
-    <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
-</head>
+    <link
+      rel="stylesheet"
+      href="{{ url_for('static', filename='styles.css') }}"
+    />
+  </head>
 
-<body class="bg-gray-100 flex flex-col items-center justify-center h-screen">
-    <h1 class="text-6xl font-bold mb-8">Quivr.app</h1>
-    <div id="app" class="text-center">
-        <button id="record-btn"
-            class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-full cursor-pointer">
-            Ask a question to Quivr
-        </button>
-        <canvas id="audio-visualizer" width="640" height="100"
-            class="hidden bg-white rounded-lg cursor-pointer"></canvas>
-        <audio id="audio-playback" controls class="hidden mt-4"></audio>
+  <body>
+    <button
+      type="button"
+      id="record-btn"
+      class="record-btn hidden"
+      data-recording="false"
+      data-pending="false"
+    ></button>
+    <div class="custom-file-input">
+      <label for="fileInput">Choose a file</label>
+      <input
+        type="file"
+        accept="text/plain"
+        name="fileInput"
+        required
+        id="fileInput"
+      />
+      <span id="fileName">No file chosen</span>
+      <button id="upload-btn" class="upload-btn">Upload</button>
     </div>
+    <canvas id="audio-visualizer" class=""></canvas>
+    <audio id="audio-playback" controls class="hidden"></audio>
     <script src="{{ url_for('static', filename='app.js') }}"></script>
-</body>
-
-</html>
\ No newline at end of file
+  </body>
+</html>