feat(quivr-whisper): add initial project files (#3492)

# Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate):
2024-11-21 16:12:42 +03:00 · 2024-11-20 10:50:29 +01:00 · 2024-11-20 10:50:29 +01:00 · 169587b2a3
commit 169587b2a3
parent 58946d01ca
12 changed files with 1878 additions and 0 deletions
--- a/examples/quivr-whisper/.env_example
+++ b/examples/quivr-whisper/.env_example
@ -0,0 +1,5 @@
+QUIVR_API_KEY=XXXX
+QUIVR_CHAT_ID=1XXXX
+QUIVR_BRAIN_ID=XXXX
+QUIVR_URL=XXXX
+OPENAI_API_KEY=XXXX
--- a/examples/quivr-whisper/.gitignore
+++ b/examples/quivr-whisper/.gitignore
@ -0,0 +1 @@
+.env
--- a/examples/quivr-whisper/.python-version
+++ b/examples/quivr-whisper/.python-version
@ -0,0 +1 @@
+3.11.9
--- a/examples/quivr-whisper/README.md
+++ b/examples/quivr-whisper/README.md
@ -0,0 +1,3 @@
+# quivr-whisper
+
+Describe your project here.
--- a/examples/quivr-whisper/Readme.md
+++ b/examples/quivr-whisper/Readme.md
@ -0,0 +1,65 @@
+# Quivr-Whisper
+
+Quivr-Whisper is a web application that allows users to ask questions via audio input. It leverages OpenAI's Whisper model for speech transcription and synthesizes responses using OpenAI's text-to-speech capabilities. The application queries the Quivr API to get a response based on the transcribed audio input.
+
+
+
+https://github.com/StanGirard/quivr-whisper/assets/19614572/9cc270c9-07e4-4ce1-bcff-380f195c9313
+
+
+
+## Features
+
+- Audio input for asking questions
+- Speech transcription using OpenAI's Whisper model
+- Integration with Quivr API for intelligent responses
+- Speech synthesis of the response for audio playback
+
+## Getting Started
+
+These instructions will get you a copy of the project up and running on your local machine for development and testing purposes.
+
+### Prerequisites
+
+What things you need to install the software and how to install them:
+
+- Python 3.6+
+- pip for Python 3
+- Flask
+- OpenAI Python package
+- Requests package
+
+### Installing
+
+A step by step series of examples that tell you how to get a development environment running:
+
+1. Clone the repository to your local machine.
+```bash
+git clone https://github.com/stangirard/quivr-whisper.git
+cd Quivr-talk
+```
+
+2. Install the required packages.
+```bash
+pip install flask openai requests python-dotenv
+```
+
+3. Create a `.env` file in the root directory of the project and add your API keys and other configuration variables.
+```env
+OPENAI_API_KEY='your_openai_api_key'
+QUIVR_API_KEY='your_quivr_api_key'
+QUIVR_CHAT_ID='your_quivr_chat_id'
+QUIVR_BRAIN_ID='your_quivr_brain_id'
+QUIVR_URL='https://api.quivr.app' # Optional, only if different from the default
+```
+
+4. Run the Flask application.
+```bash
+flask run
+```
+
+Your app should now be running on `http://localhost:5000`.
+
+## Usage
+
+To use Quivr-talk, navigate to `http://localhost:5000` in your web browser, click on "Ask a question to Quivr", and record your question. Wait for the transcription and response to be synthesized, and you will hear the response played back to you.
--- a/examples/quivr-whisper/app.py
+++ b/examples/quivr-whisper/app.py
@ -0,0 +1,78 @@
+from flask import Flask, render_template, request, jsonify
+import openai
+import base64
+import os
+import requests
+from dotenv import load_dotenv
+from tempfile import NamedTemporaryFile
+
+app = Flask(__name__)
+load_dotenv()
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+quivr_token = os.getenv("QUIVR_API_KEY", "")
+quivr_chat_id = os.getenv("QUIVR_CHAT_ID", "")
+quivr_brain_id = os.getenv("QUIVR_BRAIN_ID", "")
+quivr_url = (
+    os.getenv("QUIVR_URL", "https://api.quivr.app")
+    + f"/chat/{quivr_chat_id}/question?brain_id={quivr_brain_id}"
+)
+
+headers = {
+    "Content-Type": "application/json",
+    "Authorization": f"Bearer {quivr_token}",
+}
+
+
+@app.route("/")
+def index():
+    return render_template("index.html")
+
+
+@app.route("/transcribe", methods=["POST"])
+def transcribe_audio():
+    audio_file = request.files["audio_data"]
+    transcript = transcribe_audio_file(audio_file)
+    quivr_response = ask_quivr_question(transcript)
+    audio_base64 = synthesize_speech(quivr_response)
+    return jsonify({"audio_base64": audio_base64})
+
+
+def transcribe_audio_file(audio_file):
+    with NamedTemporaryFile(suffix=".webm", delete=False) as temp_audio_file:
+        audio_file.save(temp_audio_file)
+        temp_audio_file_path = temp_audio_file.name
+
+    try:
+        with open(temp_audio_file_path, "rb") as f:
+            transcript_response = openai.audio.transcriptions.create(
+                model="whisper-1", file=f
+            )
+        transcript = transcript_response.text
+    finally:
+        os.unlink(temp_audio_file_path)
+
+    return transcript
+
+
+def ask_quivr_question(transcript):
+    response = requests.post(quivr_url, headers=headers, json={"question": transcript})
+    if response.status_code == 200:
+        quivr_response = response.json().get("assistant")
+        return quivr_response
+    else:
+        print(f"Error from Quivr API: {response.status_code}, {response.text}")
+        return "Sorry, I couldn't understand that."
+
+
+def synthesize_speech(text):
+    speech_response = openai.audio.speech.create(
+        model="tts-1", voice="nova", input=text
+    )
+    audio_content = speech_response.content
+    audio_base64 = base64.b64encode(audio_content).decode("utf-8")
+    return audio_base64
+
+
+if __name__ == "__main__":
+    app.run(debug=True)
--- a/examples/quivr-whisper/pyproject.toml
+++ b/examples/quivr-whisper/pyproject.toml
@ -0,0 +1,28 @@
+[project]
+name = "quivr-whisper"
+version = "0.1.0"
+description = "Add your description here"
+authors = [
+    { name = "Stan Girard", email = "stan@quivr.app" }
+]
+dependencies = [
+    "flask>=3.1.0",
+    "openai>=1.54.5",
+    "quivr-core>=0.0.24",
+]
+readme = "README.md"
+requires-python = ">= 3.11"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.rye]
+managed = true
+dev-dependencies = []
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/quivr_whisper"]
--- a/examples/quivr-whisper/requirements-dev.lock
+++ b/examples/quivr-whisper/requirements-dev.lock
@ -0,0 +1,713 @@
+# generated by rye
+# use `rye lock` or `rye sync` to update this lockfile
+#
+# last locked with the following flags:
+#   pre: false
+#   features: []
+#   all-features: false
+#   with-sources: false
+#   generate-hashes: false
+#   universal: false
+
+-e file:.
+aiofiles==24.1.0
+    # via quivr-core
+aiohappyeyeballs==2.4.3
+    # via aiohttp
+aiohttp==3.11.6
+    # via langchain
+    # via langchain-community
+    # via llama-index-core
+    # via llama-index-legacy
+aiosignal==1.3.1
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+anthropic==0.39.0
+    # via langchain-anthropic
+antlr4-python3-runtime==4.9.3
+    # via omegaconf
+anyio==4.6.2.post1
+    # via anthropic
+    # via httpx
+    # via openai
+    # via starlette
+attrs==24.2.0
+    # via aiohttp
+backoff==2.2.1
+    # via megaparse
+    # via unstructured
+beautifulsoup4==4.12.3
+    # via llama-index-readers-file
+    # via unstructured
+blinker==1.9.0
+    # via flask
+cachetools==5.5.0
+    # via google-auth
+certifi==2024.8.30
+    # via httpcore
+    # via httpx
+    # via requests
+cffi==1.17.1
+    # via cryptography
+chardet==5.2.0
+    # via unstructured
+charset-normalizer==3.4.0
+    # via pdfminer-six
+    # via requests
+click==8.1.7
+    # via flask
+    # via llama-parse
+    # via nltk
+    # via python-oxmsg
+    # via uvicorn
+cohere==5.11.4
+    # via langchain-cohere
+coloredlogs==15.0.1
+    # via onnxruntime
+contourpy==1.3.1
+    # via matplotlib
+cryptography==43.0.3
+    # via pdfminer-six
+    # via unstructured-client
+cycler==0.12.1
+    # via matplotlib
+dataclasses-json==0.6.7
+    # via langchain-community
+    # via llama-index-core
+    # via llama-index-legacy
+    # via unstructured
+defusedxml==0.7.1
+    # via langchain-anthropic
+deprecated==1.2.15
+    # via llama-index-core
+    # via llama-index-legacy
+    # via pikepdf
+dirtyjson==1.0.8
+    # via llama-index-core
+    # via llama-index-legacy
+distro==1.9.0
+    # via anthropic
+    # via openai
+effdet==0.4.1
+    # via unstructured
+emoji==2.14.0
+    # via unstructured
+et-xmlfile==2.0.0
+    # via openpyxl
+eval-type-backport==0.2.0
+    # via unstructured-client
+faiss-cpu==1.9.0.post1
+    # via quivr-core
+fastapi==0.115.5
+    # via megaparse
+fastavro==1.9.7
+    # via cohere
+filelock==3.16.1
+    # via huggingface-hub
+    # via torch
+    # via transformers
+    # via triton
+filetype==1.2.0
+    # via llama-index-core
+    # via unstructured
+flask==3.1.0
+    # via quivr-whisper
+flatbuffers==24.3.25
+    # via onnxruntime
+fonttools==4.55.0
+    # via matplotlib
+frozenlist==1.5.0
+    # via aiohttp
+    # via aiosignal
+fsspec==2024.10.0
+    # via huggingface-hub
+    # via llama-index-core
+    # via llama-index-legacy
+    # via torch
+google-api-core==2.23.0
+    # via google-cloud-vision
+google-auth==2.36.0
+    # via google-api-core
+    # via google-cloud-vision
+google-cloud-vision==3.8.1
+    # via unstructured
+googleapis-common-protos==1.66.0
+    # via google-api-core
+    # via grpcio-status
+greenlet==3.1.1
+    # via playwright
+    # via sqlalchemy
+grpcio==1.68.0
+    # via google-api-core
+    # via grpcio-status
+grpcio-status==1.68.0
+    # via google-api-core
+h11==0.14.0
+    # via httpcore
+    # via uvicorn
+httpcore==1.0.7
+    # via httpx
+httpx==0.27.2
+    # via anthropic
+    # via cohere
+    # via langgraph-sdk
+    # via langsmith
+    # via llama-cloud
+    # via llama-index-core
+    # via llama-index-legacy
+    # via openai
+    # via quivr-core
+    # via unstructured-client
+httpx-sse==0.4.0
+    # via cohere
+    # via langgraph-sdk
+huggingface-hub==0.26.2
+    # via timm
+    # via tokenizers
+    # via transformers
+    # via unstructured-inference
+humanfriendly==10.0
+    # via coloredlogs
+idna==3.10
+    # via anyio
+    # via httpx
+    # via requests
+    # via yarl
+iopath==0.1.10
+    # via layoutparser
+itsdangerous==2.2.0
+    # via flask
+jinja2==3.1.4
+    # via flask
+    # via torch
+jiter==0.7.1
+    # via anthropic
+    # via openai
+joblib==1.4.2
+    # via nltk
+jsonpatch==1.33
+    # via langchain-core
+jsonpath-python==1.0.6
+    # via unstructured-client
+jsonpointer==3.0.0
+    # via jsonpatch
+kiwisolver==1.4.7
+    # via matplotlib
+langchain==0.2.17
+    # via langchain-community
+    # via megaparse
+    # via quivr-core
+langchain-anthropic==0.1.23
+    # via megaparse
+    # via quivr-core
+langchain-cohere==0.2.4
+    # via quivr-core
+langchain-community==0.2.19
+    # via langchain-experimental
+    # via megaparse
+    # via quivr-core
+langchain-core==0.2.43
+    # via langchain
+    # via langchain-anthropic
+    # via langchain-cohere
+    # via langchain-community
+    # via langchain-experimental
+    # via langchain-openai
+    # via langchain-text-splitters
+    # via langgraph
+    # via langgraph-checkpoint
+    # via megaparse
+    # via quivr-core
+langchain-experimental==0.0.65
+    # via langchain-cohere
+langchain-openai==0.1.25
+    # via megaparse
+    # via quivr-core
+langchain-text-splitters==0.2.4
+    # via langchain
+langdetect==1.0.9
+    # via unstructured
+langgraph==0.2.52
+    # via quivr-core
+langgraph-checkpoint==2.0.5
+    # via langgraph
+langgraph-sdk==0.1.36
+    # via langgraph
+langsmith==0.1.143
+    # via langchain
+    # via langchain-community
+    # via langchain-core
+layoutparser==0.3.4
+    # via unstructured-inference
+llama-cloud==0.1.5
+    # via llama-index-indices-managed-llama-cloud
+llama-index==0.12.0
+    # via megaparse
+llama-index-agent-openai==0.4.0
+    # via llama-index
+    # via llama-index-program-openai
+llama-index-cli==0.4.0
+    # via llama-index
+llama-index-core==0.12.0
+    # via llama-index
+    # via llama-index-agent-openai
+    # via llama-index-cli
+    # via llama-index-embeddings-openai
+    # via llama-index-indices-managed-llama-cloud
+    # via llama-index-llms-openai
+    # via llama-index-multi-modal-llms-openai
+    # via llama-index-program-openai
+    # via llama-index-question-gen-openai
+    # via llama-index-readers-file
+    # via llama-index-readers-llama-parse
+    # via llama-parse
+llama-index-embeddings-openai==0.3.0
+    # via llama-index
+    # via llama-index-cli
+llama-index-indices-managed-llama-cloud==0.6.2
+    # via llama-index
+llama-index-legacy==0.9.48.post4
+    # via llama-index
+llama-index-llms-openai==0.3.0
+    # via llama-index
+    # via llama-index-agent-openai
+    # via llama-index-cli
+    # via llama-index-multi-modal-llms-openai
+    # via llama-index-program-openai
+    # via llama-index-question-gen-openai
+llama-index-multi-modal-llms-openai==0.3.0
+    # via llama-index
+llama-index-program-openai==0.3.0
+    # via llama-index
+    # via llama-index-question-gen-openai
+llama-index-question-gen-openai==0.3.0
+    # via llama-index
+llama-index-readers-file==0.4.0
+    # via llama-index
+llama-index-readers-llama-parse==0.4.0
+    # via llama-index
+llama-parse==0.5.14
+    # via llama-index-readers-llama-parse
+    # via megaparse
+lxml==5.3.0
+    # via pikepdf
+    # via python-docx
+    # via python-pptx
+    # via unstructured
+markdown==3.7
+    # via unstructured
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via jinja2
+    # via quivr-core
+    # via werkzeug
+marshmallow==3.23.1
+    # via dataclasses-json
+matplotlib==3.9.2
+    # via pycocotools
+    # via unstructured-inference
+mdurl==0.1.2
+    # via markdown-it-py
+megaparse==0.0.43
+    # via quivr-core
+mpmath==1.3.0
+    # via sympy
+msgpack==1.1.0
+    # via langgraph-checkpoint
+multidict==6.1.0
+    # via aiohttp
+    # via yarl
+mypy-extensions==1.0.0
+    # via typing-inspect
+nest-asyncio==1.6.0
+    # via llama-index-core
+    # via llama-index-legacy
+    # via unstructured-client
+networkx==3.4.2
+    # via llama-index-core
+    # via llama-index-legacy
+    # via torch
+    # via unstructured
+nltk==3.9.1
+    # via llama-index
+    # via llama-index-core
+    # via llama-index-legacy
+    # via unstructured
+numpy==1.26.4
+    # via contourpy
+    # via faiss-cpu
+    # via langchain
+    # via langchain-community
+    # via layoutparser
+    # via llama-index-core
+    # via llama-index-legacy
+    # via matplotlib
+    # via megaparse
+    # via onnx
+    # via onnxruntime
+    # via opencv-python
+    # via pandas
+    # via pycocotools
+    # via scipy
+    # via torchvision
+    # via transformers
+    # via unstructured
+nvidia-cublas-cu12==12.4.5.8
+    # via nvidia-cudnn-cu12
+    # via nvidia-cusolver-cu12
+    # via torch
+nvidia-cuda-cupti-cu12==12.4.127
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.4.127
+    # via torch
+nvidia-cuda-runtime-cu12==12.4.127
+    # via torch
+nvidia-cudnn-cu12==9.1.0.70
+    # via torch
+nvidia-cufft-cu12==11.2.1.3
+    # via torch
+nvidia-curand-cu12==10.3.5.147
+    # via torch
+nvidia-cusolver-cu12==11.6.1.9
+    # via torch
+nvidia-cusparse-cu12==12.3.1.170
+    # via nvidia-cusolver-cu12
+    # via torch
+nvidia-nccl-cu12==2.21.5
+    # via torch
+nvidia-nvjitlink-cu12==12.4.127
+    # via nvidia-cusolver-cu12
+    # via nvidia-cusparse-cu12
+    # via torch
+nvidia-nvtx-cu12==12.4.127
+    # via torch
+olefile==0.47
+    # via python-oxmsg
+omegaconf==2.3.0
+    # via effdet
+onnx==1.17.0
+    # via unstructured
+    # via unstructured-inference
+onnxruntime==1.20.0
+    # via unstructured-inference
+openai==1.54.5
+    # via langchain-openai
+    # via llama-index-agent-openai
+    # via llama-index-embeddings-openai
+    # via llama-index-legacy
+    # via llama-index-llms-openai
+    # via quivr-whisper
+opencv-python==4.10.0.84
+    # via layoutparser
+    # via unstructured-inference
+openpyxl==3.1.5
+    # via unstructured
+orjson==3.10.11
+    # via langgraph-sdk
+    # via langsmith
+packaging==24.2
+    # via faiss-cpu
+    # via huggingface-hub
+    # via langchain-core
+    # via marshmallow
+    # via matplotlib
+    # via onnxruntime
+    # via pikepdf
+    # via pytesseract
+    # via transformers
+    # via unstructured-pytesseract
+pandas==2.2.3
+    # via langchain-cohere
+    # via layoutparser
+    # via llama-index-legacy
+    # via llama-index-readers-file
+    # via unstructured
+parameterized==0.9.0
+    # via cohere
+pdf2image==1.17.0
+    # via layoutparser
+    # via unstructured
+pdfminer-six==20231228
+    # via pdfplumber
+    # via unstructured
+pdfplumber==0.11.4
+    # via layoutparser
+    # via megaparse
+pikepdf==9.4.2
+    # via unstructured
+pillow==11.0.0
+    # via layoutparser
+    # via llama-index-core
+    # via matplotlib
+    # via pdf2image
+    # via pdfplumber
+    # via pikepdf
+    # via pillow-heif
+    # via pytesseract
+    # via python-pptx
+    # via torchvision
+    # via unstructured-pytesseract
+pillow-heif==0.20.0
+    # via unstructured
+playwright==1.48.0
+    # via megaparse
+portalocker==3.0.0
+    # via iopath
+propcache==0.2.0
+    # via aiohttp
+    # via yarl
+proto-plus==1.25.0
+    # via google-api-core
+    # via google-cloud-vision
+protobuf==5.28.3
+    # via google-api-core
+    # via google-cloud-vision
+    # via googleapis-common-protos
+    # via grpcio-status
+    # via onnx
+    # via onnxruntime
+    # via proto-plus
+    # via transformers
+psutil==6.1.0
+    # via megaparse
+    # via unstructured
+pyasn1==0.6.1
+    # via pyasn1-modules
+    # via rsa
+pyasn1-modules==0.4.1
+    # via google-auth
+pycocotools==2.0.8
+    # via effdet
+pycparser==2.22
+    # via cffi
+pycryptodome==3.21.0
+    # via megaparse
+pydantic==2.9.2
+    # via anthropic
+    # via cohere
+    # via fastapi
+    # via langchain
+    # via langchain-core
+    # via langsmith
+    # via llama-cloud
+    # via llama-index-core
+    # via openai
+    # via pydantic-settings
+    # via quivr-core
+    # via unstructured-client
+pydantic-core==2.23.4
+    # via cohere
+    # via pydantic
+pydantic-settings==2.6.1
+    # via megaparse
+pyee==12.0.0
+    # via playwright
+pygments==2.18.0
+    # via rich
+pypandoc==1.14
+    # via unstructured
+pyparsing==3.2.0
+    # via matplotlib
+pypdf==5.1.0
+    # via llama-index-readers-file
+    # via megaparse
+    # via unstructured
+    # via unstructured-client
+pypdfium2==4.30.0
+    # via pdfplumber
+pytesseract==0.3.13
+    # via unstructured
+python-dateutil==2.8.2
+    # via matplotlib
+    # via pandas
+    # via unstructured-client
+python-docx==1.1.2
+    # via unstructured
+python-dotenv==1.0.1
+    # via megaparse
+    # via pydantic-settings
+python-iso639==2024.10.22
+    # via unstructured
+python-magic==0.4.27
+    # via megaparse
+    # via unstructured
+python-multipart==0.0.17
+    # via unstructured-inference
+python-oxmsg==0.0.1
+    # via unstructured
+python-pptx==0.6.23
+    # via unstructured
+pytz==2024.2
+    # via pandas
+pyyaml==6.0.2
+    # via huggingface-hub
+    # via langchain
+    # via langchain-community
+    # via langchain-core
+    # via layoutparser
+    # via llama-index-core
+    # via omegaconf
+    # via timm
+    # via transformers
+quivr-core==0.0.24
+    # via quivr-whisper
+rapidfuzz==3.10.1
+    # via quivr-core
+    # via unstructured
+    # via unstructured-inference
+ratelimit==2.2.1
+    # via megaparse
+regex==2024.11.6
+    # via nltk
+    # via tiktoken
+    # via transformers
+requests==2.32.3
+    # via cohere
+    # via google-api-core
+    # via huggingface-hub
+    # via langchain
+    # via langchain-community
+    # via langsmith
+    # via llama-index-core
+    # via llama-index-legacy
+    # via megaparse
+    # via requests-toolbelt
+    # via tiktoken
+    # via transformers
+    # via unstructured
+requests-toolbelt==1.0.0
+    # via langsmith
+    # via unstructured-client
+rich==13.9.4
+    # via quivr-core
+rsa==4.9
+    # via google-auth
+safetensors==0.4.5
+    # via timm
+    # via transformers
+scipy==1.14.1
+    # via layoutparser
+sentencepiece==0.2.0
+    # via transformers
+six==1.16.0
+    # via langdetect
+    # via python-dateutil
+sniffio==1.3.1
+    # via anthropic
+    # via anyio
+    # via httpx
+    # via openai
+soupsieve==2.6
+    # via beautifulsoup4
+sqlalchemy==2.0.36
+    # via langchain
+    # via langchain-community
+    # via llama-index-core
+    # via llama-index-legacy
+starlette==0.41.3
+    # via fastapi
+striprtf==0.0.26
+    # via llama-index-readers-file
+sympy==1.13.1
+    # via onnxruntime
+    # via torch
+tabulate==0.9.0
+    # via langchain-cohere
+    # via unstructured
+tenacity==8.5.0
+    # via langchain
+    # via langchain-community
+    # via langchain-core
+    # via llama-index-core
+    # via llama-index-legacy
+tiktoken==0.8.0
+    # via langchain-openai
+    # via llama-index-core
+    # via llama-index-legacy
+    # via quivr-core
+timm==1.0.11
+    # via effdet
+    # via unstructured-inference
+tokenizers==0.20.3
+    # via cohere
+    # via transformers
+torch==2.5.1
+    # via effdet
+    # via timm
+    # via torchvision
+    # via unstructured-inference
+torchvision==0.20.1
+    # via effdet
+    # via timm
+tqdm==4.67.0
+    # via huggingface-hub
+    # via iopath
+    # via llama-index-core
+    # via nltk
+    # via openai
+    # via transformers
+    # via unstructured
+transformers==4.46.3
+    # via quivr-core
+    # via unstructured-inference
+triton==3.1.0
+    # via torch
+types-pyyaml==6.0.12.20240917
+    # via quivr-core
+types-requests==2.32.0.20241016
+    # via cohere
+typing-extensions==4.12.2
+    # via anthropic
+    # via cohere
+    # via fastapi
+    # via huggingface-hub
+    # via iopath
+    # via langchain-core
+    # via llama-index-core
+    # via llama-index-legacy
+    # via openai
+    # via pydantic
+    # via pydantic-core
+    # via pyee
+    # via python-docx
+    # via python-oxmsg
+    # via sqlalchemy
+    # via torch
+    # via typing-inspect
+    # via unstructured
+typing-inspect==0.9.0
+    # via dataclasses-json
+    # via llama-index-core
+    # via llama-index-legacy
+    # via unstructured-client
+tzdata==2024.2
+    # via pandas
+unstructured==0.15.0
+    # via megaparse
+unstructured-client==0.27.0
+    # via unstructured
+unstructured-inference==0.7.36
+    # via unstructured
+unstructured-pytesseract==0.3.13
+    # via unstructured
+urllib3==2.2.3
+    # via requests
+    # via types-requests
+uvicorn==0.32.0
+    # via megaparse
+uvloop==0.21.0
+    # via megaparse
+werkzeug==3.1.3
+    # via flask
+wrapt==1.16.0
+    # via deprecated
+    # via llama-index-core
+    # via unstructured
+xlrd==2.0.1
+    # via unstructured
+xlsxwriter==3.2.0
+    # via python-pptx
+yarl==1.17.2
+    # via aiohttp
--- a/examples/quivr-whisper/requirements.lock
+++ b/examples/quivr-whisper/requirements.lock
@ -0,0 +1,713 @@
+# generated by rye
+# use `rye lock` or `rye sync` to update this lockfile
+#
+# last locked with the following flags:
+#   pre: false
+#   features: []
+#   all-features: false
+#   with-sources: false
+#   generate-hashes: false
+#   universal: false
+
+-e file:.
+aiofiles==24.1.0
+    # via quivr-core
+aiohappyeyeballs==2.4.3
+    # via aiohttp
+aiohttp==3.11.6
+    # via langchain
+    # via langchain-community
+    # via llama-index-core
+    # via llama-index-legacy
+aiosignal==1.3.1
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+anthropic==0.39.0
+    # via langchain-anthropic
+antlr4-python3-runtime==4.9.3
+    # via omegaconf
+anyio==4.6.2.post1
+    # via anthropic
+    # via httpx
+    # via openai
+    # via starlette
+attrs==24.2.0
+    # via aiohttp
+backoff==2.2.1
+    # via megaparse
+    # via unstructured
+beautifulsoup4==4.12.3
+    # via llama-index-readers-file
+    # via unstructured
+blinker==1.9.0
+    # via flask
+cachetools==5.5.0
+    # via google-auth
+certifi==2024.8.30
+    # via httpcore
+    # via httpx
+    # via requests
+cffi==1.17.1
+    # via cryptography
+chardet==5.2.0
+    # via unstructured
+charset-normalizer==3.4.0
+    # via pdfminer-six
+    # via requests
+click==8.1.7
+    # via flask
+    # via llama-parse
+    # via nltk
+    # via python-oxmsg
+    # via uvicorn
+cohere==5.11.4
+    # via langchain-cohere
+coloredlogs==15.0.1
+    # via onnxruntime
+contourpy==1.3.1
+    # via matplotlib
+cryptography==43.0.3
+    # via pdfminer-six
+    # via unstructured-client
+cycler==0.12.1
+    # via matplotlib
+dataclasses-json==0.6.7
+    # via langchain-community
+    # via llama-index-core
+    # via llama-index-legacy
+    # via unstructured
+defusedxml==0.7.1
+    # via langchain-anthropic
+deprecated==1.2.15
+    # via llama-index-core
+    # via llama-index-legacy
+    # via pikepdf
+dirtyjson==1.0.8
+    # via llama-index-core
+    # via llama-index-legacy
+distro==1.9.0
+    # via anthropic
+    # via openai
+effdet==0.4.1
+    # via unstructured
+emoji==2.14.0
+    # via unstructured
+et-xmlfile==2.0.0
+    # via openpyxl
+eval-type-backport==0.2.0
+    # via unstructured-client
+faiss-cpu==1.9.0.post1
+    # via quivr-core
+fastapi==0.115.5
+    # via megaparse
+fastavro==1.9.7
+    # via cohere
+filelock==3.16.1
+    # via huggingface-hub
+    # via torch
+    # via transformers
+    # via triton
+filetype==1.2.0
+    # via llama-index-core
+    # via unstructured
+flask==3.1.0
+    # via quivr-whisper
+flatbuffers==24.3.25
+    # via onnxruntime
+fonttools==4.55.0
+    # via matplotlib
+frozenlist==1.5.0
+    # via aiohttp
+    # via aiosignal
+fsspec==2024.10.0
+    # via huggingface-hub
+    # via llama-index-core
+    # via llama-index-legacy
+    # via torch
+google-api-core==2.23.0
+    # via google-cloud-vision
+google-auth==2.36.0
+    # via google-api-core
+    # via google-cloud-vision
+google-cloud-vision==3.8.1
+    # via unstructured
+googleapis-common-protos==1.66.0
+    # via google-api-core
+    # via grpcio-status
+greenlet==3.1.1
+    # via playwright
+    # via sqlalchemy
+grpcio==1.68.0
+    # via google-api-core
+    # via grpcio-status
+grpcio-status==1.68.0
+    # via google-api-core
+h11==0.14.0
+    # via httpcore
+    # via uvicorn
+httpcore==1.0.7
+    # via httpx
+httpx==0.27.2
+    # via anthropic
+    # via cohere
+    # via langgraph-sdk
+    # via langsmith
+    # via llama-cloud
+    # via llama-index-core
+    # via llama-index-legacy
+    # via openai
+    # via quivr-core
+    # via unstructured-client
+httpx-sse==0.4.0
+    # via cohere
+    # via langgraph-sdk
+huggingface-hub==0.26.2
+    # via timm
+    # via tokenizers
+    # via transformers
+    # via unstructured-inference
+humanfriendly==10.0
+    # via coloredlogs
+idna==3.10
+    # via anyio
+    # via httpx
+    # via requests
+    # via yarl
+iopath==0.1.10
+    # via layoutparser
+itsdangerous==2.2.0
+    # via flask
+jinja2==3.1.4
+    # via flask
+    # via torch
+jiter==0.7.1
+    # via anthropic
+    # via openai
+joblib==1.4.2
+    # via nltk
+jsonpatch==1.33
+    # via langchain-core
+jsonpath-python==1.0.6
+    # via unstructured-client
+jsonpointer==3.0.0
+    # via jsonpatch
+kiwisolver==1.4.7
+    # via matplotlib
+langchain==0.2.17
+    # via langchain-community
+    # via megaparse
+    # via quivr-core
+langchain-anthropic==0.1.23
+    # via megaparse
+    # via quivr-core
+langchain-cohere==0.2.4
+    # via quivr-core
+langchain-community==0.2.19
+    # via langchain-experimental
+    # via megaparse
+    # via quivr-core
+langchain-core==0.2.43
+    # via langchain
+    # via langchain-anthropic
+    # via langchain-cohere
+    # via langchain-community
+    # via langchain-experimental
+    # via langchain-openai
+    # via langchain-text-splitters
+    # via langgraph
+    # via langgraph-checkpoint
+    # via megaparse
+    # via quivr-core
+langchain-experimental==0.0.65
+    # via langchain-cohere
+langchain-openai==0.1.25
+    # via megaparse
+    # via quivr-core
+langchain-text-splitters==0.2.4
+    # via langchain
+langdetect==1.0.9
+    # via unstructured
+langgraph==0.2.52
+    # via quivr-core
+langgraph-checkpoint==2.0.5
+    # via langgraph
+langgraph-sdk==0.1.36
+    # via langgraph
+langsmith==0.1.143
+    # via langchain
+    # via langchain-community
+    # via langchain-core
+layoutparser==0.3.4
+    # via unstructured-inference
+llama-cloud==0.1.5
+    # via llama-index-indices-managed-llama-cloud
+llama-index==0.12.0
+    # via megaparse
+llama-index-agent-openai==0.4.0
+    # via llama-index
+    # via llama-index-program-openai
+llama-index-cli==0.4.0
+    # via llama-index
+llama-index-core==0.12.0
+    # via llama-index
+    # via llama-index-agent-openai
+    # via llama-index-cli
+    # via llama-index-embeddings-openai
+    # via llama-index-indices-managed-llama-cloud
+    # via llama-index-llms-openai
+    # via llama-index-multi-modal-llms-openai
+    # via llama-index-program-openai
+    # via llama-index-question-gen-openai
+    # via llama-index-readers-file
+    # via llama-index-readers-llama-parse
+    # via llama-parse
+llama-index-embeddings-openai==0.3.0
+    # via llama-index
+    # via llama-index-cli
+llama-index-indices-managed-llama-cloud==0.6.2
+    # via llama-index
+llama-index-legacy==0.9.48.post4
+    # via llama-index
+llama-index-llms-openai==0.3.0
+    # via llama-index
+    # via llama-index-agent-openai
+    # via llama-index-cli
+    # via llama-index-multi-modal-llms-openai
+    # via llama-index-program-openai
+    # via llama-index-question-gen-openai
+llama-index-multi-modal-llms-openai==0.3.0
+    # via llama-index
+llama-index-program-openai==0.3.0
+    # via llama-index
+    # via llama-index-question-gen-openai
+llama-index-question-gen-openai==0.3.0
+    # via llama-index
+llama-index-readers-file==0.4.0
+    # via llama-index
+llama-index-readers-llama-parse==0.4.0
+    # via llama-index
+llama-parse==0.5.14
+    # via llama-index-readers-llama-parse
+    # via megaparse
+lxml==5.3.0
+    # via pikepdf
+    # via python-docx
+    # via python-pptx
+    # via unstructured
+markdown==3.7
+    # via unstructured
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via jinja2
+    # via quivr-core
+    # via werkzeug
+marshmallow==3.23.1
+    # via dataclasses-json
+matplotlib==3.9.2
+    # via pycocotools
+    # via unstructured-inference
+mdurl==0.1.2
+    # via markdown-it-py
+megaparse==0.0.43
+    # via quivr-core
+mpmath==1.3.0
+    # via sympy
+msgpack==1.1.0
+    # via langgraph-checkpoint
+multidict==6.1.0
+    # via aiohttp
+    # via yarl
+mypy-extensions==1.0.0
+    # via typing-inspect
+nest-asyncio==1.6.0
+    # via llama-index-core
+    # via llama-index-legacy
+    # via unstructured-client
+networkx==3.4.2
+    # via llama-index-core
+    # via llama-index-legacy
+    # via torch
+    # via unstructured
+nltk==3.9.1
+    # via llama-index
+    # via llama-index-core
+    # via llama-index-legacy
+    # via unstructured
+numpy==1.26.4
+    # via contourpy
+    # via faiss-cpu
+    # via langchain
+    # via langchain-community
+    # via layoutparser
+    # via llama-index-core
+    # via llama-index-legacy
+    # via matplotlib
+    # via megaparse
+    # via onnx
+    # via onnxruntime
+    # via opencv-python
+    # via pandas
+    # via pycocotools
+    # via scipy
+    # via torchvision
+    # via transformers
+    # via unstructured
+nvidia-cublas-cu12==12.4.5.8
+    # via nvidia-cudnn-cu12
+    # via nvidia-cusolver-cu12
+    # via torch
+nvidia-cuda-cupti-cu12==12.4.127
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.4.127
+    # via torch
+nvidia-cuda-runtime-cu12==12.4.127
+    # via torch
+nvidia-cudnn-cu12==9.1.0.70
+    # via torch
+nvidia-cufft-cu12==11.2.1.3
+    # via torch
+nvidia-curand-cu12==10.3.5.147
+    # via torch
+nvidia-cusolver-cu12==11.6.1.9
+    # via torch
+nvidia-cusparse-cu12==12.3.1.170
+    # via nvidia-cusolver-cu12
+    # via torch
+nvidia-nccl-cu12==2.21.5
+    # via torch
+nvidia-nvjitlink-cu12==12.4.127
+    # via nvidia-cusolver-cu12
+    # via nvidia-cusparse-cu12
+    # via torch
+nvidia-nvtx-cu12==12.4.127
+    # via torch
+olefile==0.47
+    # via python-oxmsg
+omegaconf==2.3.0
+    # via effdet
+onnx==1.17.0
+    # via unstructured
+    # via unstructured-inference
+onnxruntime==1.20.0
+    # via unstructured-inference
+openai==1.54.5
+    # via langchain-openai
+    # via llama-index-agent-openai
+    # via llama-index-embeddings-openai
+    # via llama-index-legacy
+    # via llama-index-llms-openai
+    # via quivr-whisper
+opencv-python==4.10.0.84
+    # via layoutparser
+    # via unstructured-inference
+openpyxl==3.1.5
+    # via unstructured
+orjson==3.10.11
+    # via langgraph-sdk
+    # via langsmith
+packaging==24.2
+    # via faiss-cpu
+    # via huggingface-hub
+    # via langchain-core
+    # via marshmallow
+    # via matplotlib
+    # via onnxruntime
+    # via pikepdf
+    # via pytesseract
+    # via transformers
+    # via unstructured-pytesseract
+pandas==2.2.3
+    # via langchain-cohere
+    # via layoutparser
+    # via llama-index-legacy
+    # via llama-index-readers-file
+    # via unstructured
+parameterized==0.9.0
+    # via cohere
+pdf2image==1.17.0
+    # via layoutparser
+    # via unstructured
+pdfminer-six==20231228
+    # via pdfplumber
+    # via unstructured
+pdfplumber==0.11.4
+    # via layoutparser
+    # via megaparse
+pikepdf==9.4.2
+    # via unstructured
+pillow==11.0.0
+    # via layoutparser
+    # via llama-index-core
+    # via matplotlib
+    # via pdf2image
+    # via pdfplumber
+    # via pikepdf
+    # via pillow-heif
+    # via pytesseract
+    # via python-pptx
+    # via torchvision
+    # via unstructured-pytesseract
+pillow-heif==0.20.0
+    # via unstructured
+playwright==1.48.0
+    # via megaparse
+portalocker==3.0.0
+    # via iopath
+propcache==0.2.0
+    # via aiohttp
+    # via yarl
+proto-plus==1.25.0
+    # via google-api-core
+    # via google-cloud-vision
+protobuf==5.28.3
+    # via google-api-core
+    # via google-cloud-vision
+    # via googleapis-common-protos
+    # via grpcio-status
+    # via onnx
+    # via onnxruntime
+    # via proto-plus
+    # via transformers
+psutil==6.1.0
+    # via megaparse
+    # via unstructured
+pyasn1==0.6.1
+    # via pyasn1-modules
+    # via rsa
+pyasn1-modules==0.4.1
+    # via google-auth
+pycocotools==2.0.8
+    # via effdet
+pycparser==2.22
+    # via cffi
+pycryptodome==3.21.0
+    # via megaparse
+pydantic==2.9.2
+    # via anthropic
+    # via cohere
+    # via fastapi
+    # via langchain
+    # via langchain-core
+    # via langsmith
+    # via llama-cloud
+    # via llama-index-core
+    # via openai
+    # via pydantic-settings
+    # via quivr-core
+    # via unstructured-client
+pydantic-core==2.23.4
+    # via cohere
+    # via pydantic
+pydantic-settings==2.6.1
+    # via megaparse
+pyee==12.0.0
+    # via playwright
+pygments==2.18.0
+    # via rich
+pypandoc==1.14
+    # via unstructured
+pyparsing==3.2.0
+    # via matplotlib
+pypdf==5.1.0
+    # via llama-index-readers-file
+    # via megaparse
+    # via unstructured
+    # via unstructured-client
+pypdfium2==4.30.0
+    # via pdfplumber
+pytesseract==0.3.13
+    # via unstructured
+python-dateutil==2.8.2
+    # via matplotlib
+    # via pandas
+    # via unstructured-client
+python-docx==1.1.2
+    # via unstructured
+python-dotenv==1.0.1
+    # via megaparse
+    # via pydantic-settings
+python-iso639==2024.10.22
+    # via unstructured
+python-magic==0.4.27
+    # via megaparse
+    # via unstructured
+python-multipart==0.0.17
+    # via unstructured-inference
+python-oxmsg==0.0.1
+    # via unstructured
+python-pptx==0.6.23
+    # via unstructured
+pytz==2024.2
+    # via pandas
+pyyaml==6.0.2
+    # via huggingface-hub
+    # via langchain
+    # via langchain-community
+    # via langchain-core
+    # via layoutparser
+    # via llama-index-core
+    # via omegaconf
+    # via timm
+    # via transformers
+quivr-core==0.0.24
+    # via quivr-whisper
+rapidfuzz==3.10.1
+    # via quivr-core
+    # via unstructured
+    # via unstructured-inference
+ratelimit==2.2.1
+    # via megaparse
+regex==2024.11.6
+    # via nltk
+    # via tiktoken
+    # via transformers
+requests==2.32.3
+    # via cohere
+    # via google-api-core
+    # via huggingface-hub
+    # via langchain
+    # via langchain-community
+    # via langsmith
+    # via llama-index-core
+    # via llama-index-legacy
+    # via megaparse
+    # via requests-toolbelt
+    # via tiktoken
+    # via transformers
+    # via unstructured
+requests-toolbelt==1.0.0
+    # via langsmith
+    # via unstructured-client
+rich==13.9.4
+    # via quivr-core
+rsa==4.9
+    # via google-auth
+safetensors==0.4.5
+    # via timm
+    # via transformers
+scipy==1.14.1
+    # via layoutparser
+sentencepiece==0.2.0
+    # via transformers
+six==1.16.0
+    # via langdetect
+    # via python-dateutil
+sniffio==1.3.1
+    # via anthropic
+    # via anyio
+    # via httpx
+    # via openai
+soupsieve==2.6
+    # via beautifulsoup4
+sqlalchemy==2.0.36
+    # via langchain
+    # via langchain-community
+    # via llama-index-core
+    # via llama-index-legacy
+starlette==0.41.3
+    # via fastapi
+striprtf==0.0.26
+    # via llama-index-readers-file
+sympy==1.13.1
+    # via onnxruntime
+    # via torch
+tabulate==0.9.0
+    # via langchain-cohere
+    # via unstructured
+tenacity==8.5.0
+    # via langchain
+    # via langchain-community
+    # via langchain-core
+    # via llama-index-core
+    # via llama-index-legacy
+tiktoken==0.8.0
+    # via langchain-openai
+    # via llama-index-core
+    # via llama-index-legacy
+    # via quivr-core
+timm==1.0.11
+    # via effdet
+    # via unstructured-inference
+tokenizers==0.20.3
+    # via cohere
+    # via transformers
+torch==2.5.1
+    # via effdet
+    # via timm
+    # via torchvision
+    # via unstructured-inference
+torchvision==0.20.1
+    # via effdet
+    # via timm
+tqdm==4.67.0
+    # via huggingface-hub
+    # via iopath
+    # via llama-index-core
+    # via nltk
+    # via openai
+    # via transformers
+    # via unstructured
+transformers==4.46.3
+    # via quivr-core
+    # via unstructured-inference
+triton==3.1.0
+    # via torch
+types-pyyaml==6.0.12.20240917
+    # via quivr-core
+types-requests==2.32.0.20241016
+    # via cohere
+typing-extensions==4.12.2
+    # via anthropic
+    # via cohere
+    # via fastapi
+    # via huggingface-hub
+    # via iopath
+    # via langchain-core
+    # via llama-index-core
+    # via llama-index-legacy
+    # via openai
+    # via pydantic
+    # via pydantic-core
+    # via pyee
+    # via python-docx
+    # via python-oxmsg
+    # via sqlalchemy
+    # via torch
+    # via typing-inspect
+    # via unstructured
+typing-inspect==0.9.0
+    # via dataclasses-json
+    # via llama-index-core
+    # via llama-index-legacy
+    # via unstructured-client
+tzdata==2024.2
+    # via pandas
+unstructured==0.15.0
+    # via megaparse
+unstructured-client==0.27.0
+    # via unstructured
+unstructured-inference==0.7.36
+    # via unstructured
+unstructured-pytesseract==0.3.13
+    # via unstructured
+urllib3==2.2.3
+    # via requests
+    # via types-requests
+uvicorn==0.32.0
+    # via megaparse
+uvloop==0.21.0
+    # via megaparse
+werkzeug==3.1.3
+    # via flask
+wrapt==1.16.0
+    # via deprecated
+    # via llama-index-core
+    # via unstructured
+xlrd==2.0.1
+    # via unstructured
+xlsxwriter==3.2.0
+    # via python-pptx
+yarl==1.17.2
+    # via aiohttp
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@ -0,0 +1,209 @@
+const recordBtn = document.getElementById('record-btn');
+const audioVisualizer = document.getElementById('audio-visualizer');
+const audioPlayback = document.getElementById('audio-playback');
+const canvasCtx = audioVisualizer.getContext('2d');
+
+let isRecording = false;
+let mediaRecorder;
+let audioChunks = [];
+let audioContext;
+let analyser;
+let dataArray;
+let bufferLength;
+let lastAudioLevel = 0;
+let silenceTimer;
+
+recordBtn.addEventListener('click', toggleRecording);
+
+function toggleRecording() {
+    if (!isRecording) {
+        recordBtn.classList.add('hidden');
+        audioVisualizer.classList.remove('hidden');
+        startRecording();
+    } else {
+        audioVisualizer.classList.add('hidden');
+        stopRecording();
+    }
+}
+
+function drawWaveform() {
+    if (!analyser) return;
+
+    requestAnimationFrame(drawWaveform);
+
+    analyser.getByteTimeDomainData(dataArray);
+
+    canvasCtx.fillStyle = 'rgb(255, 255, 255)';
+    canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
+
+    canvasCtx.lineWidth = 2;
+    canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
+
+    canvasCtx.beginPath();
+
+    let sliceWidth = audioVisualizer.width * 1.0 / bufferLength;
+    let x = 0;
+
+    let sum = 0;
+
+    for (let i = 0; i < bufferLength; i++) {
+        let v = dataArray[i] / 128.0;
+        let y = v * audioVisualizer.height / 2;
+
+        sum += v;
+
+        if (i === 0) {
+            canvasCtx.moveTo(x, y);
+        } else {
+            canvasCtx.lineTo(x, y);
+        }
+
+        x += sliceWidth;
+    }
+
+    canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
+    canvasCtx.stroke();
+
+    let currentAudioLevel = sum / bufferLength;
+
+    if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) {
+        if (!silenceTimer) {
+            silenceTimer = setTimeout(stopRecording, 1000);
+        }
+    } else {
+        clearTimeout(silenceTimer);
+        silenceTimer = null;
+    }
+
+    lastAudioLevel = currentAudioLevel;
+}
+
+async function startRecording() {
+    audioChunks = [];
+    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    mediaRecorder = new MediaRecorder(stream);
+    mediaRecorder.ondataavailable = event => {
+        audioChunks.push(event.data);
+    };
+    mediaRecorder.start();
+    isRecording = true;
+
+    audioContext = new (window.AudioContext || window.webkitAudioContext)();
+    analyser = audioContext.createAnalyser();
+    const source = audioContext.createMediaStreamSource(stream);
+
+    source.connect(analyser);
+    analyser.fftSize = 2048;
+    bufferLength = analyser.frequencyBinCount;
+    dataArray = new Uint8Array(bufferLength);
+
+    drawWaveform();
+}
+
+function stopRecording() {
+    mediaRecorder.stop();
+    mediaRecorder.onstop = async () => {
+        // The mediaRecorder has stopped; now we can process the chunks
+        const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
+        const formData = new FormData();
+        formData.append('audio_data', audioBlob);
+
+        // Now we're sending the audio to the server and waiting for a response
+        try {
+            const response = await fetch('/transcribe', {
+                method: 'POST',
+                body: formData
+            });
+            const data = await response.json();
+
+            // Once we have the response, we can source the playback element and play it
+            audioPlayback.src = 'data:audio/wav;base64,' + data.audio_base64;
+            audioPlayback.classList.remove('hidden');
+            audioVisualizer.classList.add('hidden'); // hide the visualizer while playing back the response
+            setupAIResponseVisualization();
+            audioPlayback.onloadedmetadata = () => {
+                // When metadata is loaded, start playback
+                audioPlayback.play();
+                visualizeAIResponse();
+            };
+
+            // We only reset the UI after the audio has finished playing
+            // audioPlayback.onended = () => {
+            //     resetUI();
+            // };
+        } catch (error) {
+            console.error('Error during fetch/transcription:', error);
+            resetUI();
+        } finally {
+            if (analyser) {
+                analyser.disconnect();
+                analyser = null;
+            }
+            isRecording = false;
+        }
+    };
+}
+function resetUI() {
+    document.getElementById('record-btn').classList.remove('hidden');
+    document.getElementById('audio-visualizer').classList.add('hidden');
+    document.getElementById('audio-playback').classList.add('hidden');
+    // Reset any other UI elements as necessary
+}
+
+function setupAIResponseVisualization() {
+    try {
+        // Create a new audio context for playback if it doesn't exist
+        if (!audioContext) {
+            audioContext = new (window.AudioContext || window.webkitAudioContext)();
+        }
+        // Resume the audio context in case it's in a suspended state
+        audioContext.resume().then(() => {
+            analyser = audioContext.createAnalyser();
+            const source = audioContext.createMediaElementSource(audioPlayback);
+            source.connect(analyser);
+            analyser.connect(audioContext.destination);
+            analyser.fftSize = 2048;
+            bufferLength = analyser.frequencyBinCount;
+            dataArray = new Uint8Array(bufferLength);
+        });
+    } catch (error) {
+        console.error('Error setting up AI response visualization:', error);
+    }
+}
+
+function visualizeAIResponse() {
+    const draw = () => {
+        requestAnimationFrame(draw);
+
+        analyser.getByteTimeDomainData(dataArray);
+
+        canvasCtx.fillStyle = 'rgb(255, 255, 255)';
+        canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
+
+        canvasCtx.lineWidth = 2;
+        canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
+
+        canvasCtx.beginPath();
+
+        let sliceWidth = audioVisualizer.width * 1.0 / bufferLength;
+        let x = 0;
+
+        for (let i = 0; i < bufferLength; i++) {
+            let v = dataArray[i] / 128.0;
+            let y = v * audioVisualizer.height / 2;
+
+            if (i === 0) {
+                canvasCtx.moveTo(x, y);
+            } else {
+                canvasCtx.lineTo(x, y);
+            }
+
+            x += sliceWidth;
+        }
+
+        canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
+        canvasCtx.stroke();
+    };
+
+    draw();
+}
--- a/examples/quivr-whisper/static/styles.css
+++ b/examples/quivr-whisper/static/styles.css
@ -0,0 +1,36 @@
+.loader {
+    border: 4px solid #f3f3f3;
+    border-radius: 50%;
+    border-top: 4px solid #3498db;
+    width: 50px;
+    height: 50px;
+    -webkit-animation: spin 2s linear infinite;
+    animation: spin 2s linear infinite;
+    position: absolute;
+    /* Center the loader in the viewport */
+    top: 50%;
+    left: 50%;
+    transform: translate(-50%, -50%);
+    display: none;
+    /* Hide it by default */
+}
+
+@-webkit-keyframes spin {
+    0% {
+        -webkit-transform: rotate(0deg);
+    }
+
+    100% {
+        -webkit-transform: rotate(360deg);
+    }
+}
+
+@keyframes spin {
+    0% {
+        transform: rotate(0deg);
+    }
+
+    100% {
+        transform: rotate(360deg);
+    }
+}
--- a/examples/quivr-whisper/templates/index.html
+++ b/examples/quivr-whisper/templates/index.html
@ -0,0 +1,26 @@
+<!doctype html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Audio Interaction WebApp</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
+</head>
+
+<body class="bg-gray-100 flex flex-col items-center justify-center h-screen">
+    <h1 class="text-6xl font-bold mb-8">Quivr.app</h1>
+    <div id="app" class="text-center">
+        <button id="record-btn"
+            class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-full cursor-pointer">
+            Ask a question to Quivr
+        </button>
+        <canvas id="audio-visualizer" width="640" height="100"
+            class="hidden bg-white rounded-lg cursor-pointer"></canvas>
+        <audio id="audio-playback" controls class="hidden mt-4"></audio>
+    </div>
+    <script src="{{ url_for('static', filename='app.js') }}"></script>
+</body>
+
+</html>