mirror of
https://github.com/StanGirard/quivr.git
synced 2025-01-03 08:45:26 +03:00
feat(example): Quivr whisper (#3495)
# Description Talk with quivr handsfree via tts and stt. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate): ![image](https://github.com/user-attachments/assets/1c169e80-45ce-4541-b244-5f3b85b866f2)
This commit is contained in:
parent
e68b4f4569
commit
d20f58c147
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@ -44,5 +44,6 @@
|
|||||||
"reportUnusedImport": "warning",
|
"reportUnusedImport": "warning",
|
||||||
"reportGeneralTypeIssues": "warning"
|
"reportGeneralTypeIssues": "warning"
|
||||||
},
|
},
|
||||||
"makefile.configureOnOpen": false
|
"makefile.configureOnOpen": false,
|
||||||
|
"djlint.showInstallError": false
|
||||||
}
|
}
|
||||||
|
1
examples/quivr-whisper/.gitignore
vendored
1
examples/quivr-whisper/.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
.env
|
.env
|
||||||
|
uploads
|
@ -1,27 +1,32 @@
|
|||||||
from flask import Flask, render_template, request, jsonify
|
from flask import Flask, render_template, request, jsonify, session
|
||||||
import openai
|
import openai
|
||||||
import base64
|
import base64
|
||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from quivr_core import Brain
|
||||||
|
from quivr_core.rag.entities.config import RetrievalConfig
|
||||||
from tempfile import NamedTemporaryFile
|
from tempfile import NamedTemporaryFile
|
||||||
|
from werkzeug.utils import secure_filename
|
||||||
|
from asyncio import to_thread
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
|
UPLOAD_FOLDER = "uploads"
|
||||||
|
ALLOWED_EXTENSIONS = {"txt"}
|
||||||
|
|
||||||
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
app.secret_key = "secret"
|
||||||
|
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
|
||||||
|
app.config["CACHE_TYPE"] = "SimpleCache" # In-memory cache for development
|
||||||
|
app.config["CACHE_DEFAULT_TIMEOUT"] = 60 * 60 # 1 hour cache timeout
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||||
|
|
||||||
quivr_token = os.getenv("QUIVR_API_KEY", "")
|
brains = {}
|
||||||
quivr_chat_id = os.getenv("QUIVR_CHAT_ID", "")
|
|
||||||
quivr_brain_id = os.getenv("QUIVR_BRAIN_ID", "")
|
|
||||||
quivr_url = (
|
|
||||||
os.getenv("QUIVR_URL", "https://api.quivr.app")
|
|
||||||
+ f"/chat/{quivr_chat_id}/question?brain_id={quivr_brain_id}"
|
|
||||||
)
|
|
||||||
|
|
||||||
headers = {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
"Authorization": f"Bearer {quivr_token}",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
@ -29,12 +34,83 @@ def index():
|
|||||||
return render_template("index.html")
|
return render_template("index.html")
|
||||||
|
|
||||||
|
|
||||||
@app.route("/transcribe", methods=["POST"])
|
def run_in_event_loop(func, *args, **kwargs):
|
||||||
def transcribe_audio():
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
if asyncio.iscoroutinefunction(func):
|
||||||
|
result = loop.run_until_complete(func(*args, **kwargs))
|
||||||
|
else:
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
loop.close()
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_file(filename):
|
||||||
|
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/upload", methods=["POST"])
|
||||||
|
async def upload_file():
|
||||||
|
if "file" not in request.files:
|
||||||
|
return "No file part", 400
|
||||||
|
|
||||||
|
file = request.files["file"]
|
||||||
|
|
||||||
|
if file.filename == "":
|
||||||
|
return "No selected file", 400
|
||||||
|
if not (file and file.filename and allowed_file(file.filename)):
|
||||||
|
return "Invalid file type", 400
|
||||||
|
|
||||||
|
filename = secure_filename(file.filename)
|
||||||
|
filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
|
||||||
|
file.save(filepath)
|
||||||
|
|
||||||
|
print(f"File uploaded and saved at: {filepath}")
|
||||||
|
|
||||||
|
print("Creating brain instance...")
|
||||||
|
|
||||||
|
brain: Brain = await to_thread(
|
||||||
|
run_in_event_loop, Brain.from_files, name="user_brain", file_paths=[filepath]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Store brain instance in cache
|
||||||
|
session_id = session.sid if hasattr(session, "sid") else os.urandom(16).hex()
|
||||||
|
session["session_id"] = session_id
|
||||||
|
# cache.set(session_id, brain) # Store the brain instance in the cache
|
||||||
|
brains[session_id] = brain
|
||||||
|
print(f"Brain instance created and stored in cache for session ID: {session_id}")
|
||||||
|
|
||||||
|
return jsonify({"message": "Brain created successfully"})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/ask", methods=["POST"])
|
||||||
|
async def ask():
|
||||||
|
if "audio_data" not in request.files:
|
||||||
|
return "Missing audio data", 400
|
||||||
|
|
||||||
|
# Retrieve the brain instance from the cache using the session ID
|
||||||
|
session_id = session.get("session_id")
|
||||||
|
if not session_id:
|
||||||
|
return "Session ID not found. Upload a file first.", 400
|
||||||
|
|
||||||
|
brain = brains.get(session_id)
|
||||||
|
if not brain:
|
||||||
|
return "Brain instance not found in dict. Upload a file first.", 400
|
||||||
|
|
||||||
|
print("Brain instance loaded from cache.")
|
||||||
|
|
||||||
|
print("Speech to text...")
|
||||||
audio_file = request.files["audio_data"]
|
audio_file = request.files["audio_data"]
|
||||||
transcript = transcribe_audio_file(audio_file)
|
transcript = transcribe_audio_file(audio_file)
|
||||||
quivr_response = ask_quivr_question(transcript)
|
print("Transcript result: ", transcript)
|
||||||
audio_base64 = synthesize_speech(quivr_response)
|
|
||||||
|
print("Getting response...")
|
||||||
|
quivr_response = await to_thread(run_in_event_loop, brain.ask, transcript)
|
||||||
|
|
||||||
|
print("Text to speech...")
|
||||||
|
audio_base64 = synthesize_speech(quivr_response.answer)
|
||||||
|
|
||||||
|
print("Done")
|
||||||
return jsonify({"audio_base64": audio_base64})
|
return jsonify({"audio_base64": audio_base64})
|
||||||
|
|
||||||
|
|
||||||
@ -55,16 +131,6 @@ def transcribe_audio_file(audio_file):
|
|||||||
return transcript
|
return transcript
|
||||||
|
|
||||||
|
|
||||||
def ask_quivr_question(transcript):
|
|
||||||
response = requests.post(quivr_url, headers=headers, json={"question": transcript})
|
|
||||||
if response.status_code == 200:
|
|
||||||
quivr_response = response.json().get("assistant")
|
|
||||||
return quivr_response
|
|
||||||
else:
|
|
||||||
print(f"Error from Quivr API: {response.status_code}, {response.text}")
|
|
||||||
return "Sorry, I couldn't understand that."
|
|
||||||
|
|
||||||
|
|
||||||
def synthesize_speech(text):
|
def synthesize_speech(text):
|
||||||
speech_response = openai.audio.speech.create(
|
speech_response = openai.audio.speech.create(
|
||||||
model="tts-1", voice="nova", input=text
|
model="tts-1", voice="nova", input=text
|
||||||
|
@ -6,9 +6,10 @@ authors = [
|
|||||||
{ name = "Stan Girard", email = "stan@quivr.app" }
|
{ name = "Stan Girard", email = "stan@quivr.app" }
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"flask>=3.1.0",
|
"flask[async]>=3.1.0",
|
||||||
"openai>=1.54.5",
|
"openai>=1.54.5",
|
||||||
"quivr-core>=0.0.24",
|
"quivr-core>=0.0.24",
|
||||||
|
"flask-caching>=2.3.0",
|
||||||
]
|
]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">= 3.11"
|
requires-python = ">= 3.11"
|
||||||
|
@ -32,6 +32,8 @@ anyio==4.6.2.post1
|
|||||||
# via httpx
|
# via httpx
|
||||||
# via openai
|
# via openai
|
||||||
# via starlette
|
# via starlette
|
||||||
|
asgiref==3.8.1
|
||||||
|
# via flask
|
||||||
attrs==24.2.0
|
attrs==24.2.0
|
||||||
# via aiohttp
|
# via aiohttp
|
||||||
backoff==2.2.1
|
backoff==2.2.1
|
||||||
@ -42,6 +44,8 @@ beautifulsoup4==4.12.3
|
|||||||
# via unstructured
|
# via unstructured
|
||||||
blinker==1.9.0
|
blinker==1.9.0
|
||||||
# via flask
|
# via flask
|
||||||
|
cachelib==0.9.0
|
||||||
|
# via flask-caching
|
||||||
cachetools==5.5.0
|
cachetools==5.5.0
|
||||||
# via google-auth
|
# via google-auth
|
||||||
certifi==2024.8.30
|
certifi==2024.8.30
|
||||||
@ -112,6 +116,9 @@ filetype==1.2.0
|
|||||||
# via llama-index-core
|
# via llama-index-core
|
||||||
# via unstructured
|
# via unstructured
|
||||||
flask==3.1.0
|
flask==3.1.0
|
||||||
|
# via flask-caching
|
||||||
|
# via quivr-whisper
|
||||||
|
flask-caching==2.3.0
|
||||||
# via quivr-whisper
|
# via quivr-whisper
|
||||||
flatbuffers==24.3.25
|
flatbuffers==24.3.25
|
||||||
# via onnxruntime
|
# via onnxruntime
|
||||||
|
@ -32,6 +32,8 @@ anyio==4.6.2.post1
|
|||||||
# via httpx
|
# via httpx
|
||||||
# via openai
|
# via openai
|
||||||
# via starlette
|
# via starlette
|
||||||
|
asgiref==3.8.1
|
||||||
|
# via flask
|
||||||
attrs==24.2.0
|
attrs==24.2.0
|
||||||
# via aiohttp
|
# via aiohttp
|
||||||
backoff==2.2.1
|
backoff==2.2.1
|
||||||
@ -42,6 +44,8 @@ beautifulsoup4==4.12.3
|
|||||||
# via unstructured
|
# via unstructured
|
||||||
blinker==1.9.0
|
blinker==1.9.0
|
||||||
# via flask
|
# via flask
|
||||||
|
cachelib==0.9.0
|
||||||
|
# via flask-caching
|
||||||
cachetools==5.5.0
|
cachetools==5.5.0
|
||||||
# via google-auth
|
# via google-auth
|
||||||
certifi==2024.8.30
|
certifi==2024.8.30
|
||||||
@ -112,6 +116,9 @@ filetype==1.2.0
|
|||||||
# via llama-index-core
|
# via llama-index-core
|
||||||
# via unstructured
|
# via unstructured
|
||||||
flask==3.1.0
|
flask==3.1.0
|
||||||
|
# via flask-caching
|
||||||
|
# via quivr-whisper
|
||||||
|
flask-caching==2.3.0
|
||||||
# via quivr-whisper
|
# via quivr-whisper
|
||||||
flatbuffers==24.3.25
|
flatbuffers==24.3.25
|
||||||
# via onnxruntime
|
# via onnxruntime
|
||||||
|
@ -1,209 +1,359 @@
|
|||||||
const recordBtn = document.getElementById('record-btn');
|
// DOM Elements
|
||||||
const audioVisualizer = document.getElementById('audio-visualizer');
|
const recordBtn = document.getElementById("record-btn");
|
||||||
const audioPlayback = document.getElementById('audio-playback');
|
const fileInput = document.getElementById("fileInput");
|
||||||
const canvasCtx = audioVisualizer.getContext('2d');
|
const fileInputContainer = document.querySelector(".custom-file-input");
|
||||||
|
const fileName = document.getElementById("fileName");
|
||||||
|
|
||||||
let isRecording = false;
|
const audioVisualizer = document.getElementById("audio-visualizer");
|
||||||
let mediaRecorder;
|
const audioPlayback = document.getElementById("audio-playback");
|
||||||
let audioChunks = [];
|
const canvasCtx = audioVisualizer.getContext("2d");
|
||||||
let audioContext;
|
|
||||||
let analyser;
|
|
||||||
let dataArray;
|
|
||||||
let bufferLength;
|
|
||||||
let lastAudioLevel = 0;
|
|
||||||
let silenceTimer;
|
|
||||||
|
|
||||||
recordBtn.addEventListener('click', toggleRecording);
|
window.addEventListener("load", () => {
|
||||||
|
audioVisualizer.width = window.innerWidth;
|
||||||
|
audioVisualizer.height = window.innerHeight;
|
||||||
|
});
|
||||||
|
|
||||||
function toggleRecording() {
|
window.addEventListener("resize", (e) => {
|
||||||
if (!isRecording) {
|
audioVisualizer.width = window.innerWidth;
|
||||||
recordBtn.classList.add('hidden');
|
audioVisualizer.height = window.innerHeight;
|
||||||
audioVisualizer.classList.remove('hidden');
|
});
|
||||||
startRecording();
|
|
||||||
} else {
|
fileInput.addEventListener("change", () => {
|
||||||
audioVisualizer.classList.add('hidden');
|
fileName.textContent =
|
||||||
stopRecording();
|
fileInput.files.length > 0 ? fileInput.files[0].name : "No file chosen";
|
||||||
|
fileName.classList.toggle("file-selected", fileInput.files.length > 0);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Configuration
|
||||||
|
const SILENCE_THRESHOLD = 128; // Adjusted for byte data (128 is middle)
|
||||||
|
const SILENCE_DURATION = 1500;
|
||||||
|
const FFT_SIZE = 2048;
|
||||||
|
|
||||||
|
// State
|
||||||
|
const state = {
|
||||||
|
isRecording: false,
|
||||||
|
isVisualizing: false,
|
||||||
|
chunks: [],
|
||||||
|
silenceTimer: null,
|
||||||
|
lastAudioLevel: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Audio Analysis
|
||||||
|
class AudioAnalyzer {
|
||||||
|
constructor() {
|
||||||
|
this.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
reset() {
|
||||||
|
this.analyser = null;
|
||||||
|
this.dataArray = null;
|
||||||
|
this.bufferLength = null;
|
||||||
|
this.source = null;
|
||||||
|
this.cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
setup(source, audioContext) {
|
||||||
|
this.cleanup();
|
||||||
|
|
||||||
|
this.analyser = this._createAnalyser(audioContext);
|
||||||
|
source.connect(this.analyser);
|
||||||
|
|
||||||
|
this._initializeBuffer();
|
||||||
|
return this.analyser;
|
||||||
|
}
|
||||||
|
|
||||||
|
setupForPlayback(audioElement, audioContext, connectToDestination = true) {
|
||||||
|
// Reuse existing MediaElementSourceNode if it already exists for this audio element
|
||||||
|
if (!this.source || this.source.mediaElement !== audioElement) {
|
||||||
|
this.cleanup(); // Ensure any previous connections are cleaned up
|
||||||
|
this.source = audioContext.createMediaElementSource(audioElement);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.analyser = this._createAnalyser(audioContext);
|
||||||
|
|
||||||
|
this.source.connect(this.analyser);
|
||||||
|
|
||||||
|
if (connectToDestination) {
|
||||||
|
this.analyser.connect(audioContext.destination);
|
||||||
|
}
|
||||||
|
|
||||||
|
this._initializeBuffer();
|
||||||
|
return this.analyser;
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
if (this.source) {
|
||||||
|
this._safeDisconnect(this.source);
|
||||||
|
}
|
||||||
|
if (this.analyser) {
|
||||||
|
this._safeDisconnect(this.analyser);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_createAnalyser(audioContext) {
|
||||||
|
const analyser = audioContext.createAnalyser();
|
||||||
|
analyser.fftSize = FFT_SIZE;
|
||||||
|
return analyser;
|
||||||
|
}
|
||||||
|
|
||||||
|
_initializeBuffer() {
|
||||||
|
this.bufferLength = this.analyser.frequencyBinCount;
|
||||||
|
this.dataArray = new Uint8Array(this.bufferLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
_safeDisconnect(node) {
|
||||||
|
if (node) {
|
||||||
|
try {
|
||||||
|
node.disconnect();
|
||||||
|
} catch {
|
||||||
|
// Ignore disconnect errors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function drawWaveform() {
|
// Visualization
|
||||||
if (!analyser) return;
|
class Visualizer {
|
||||||
|
constructor(canvas, analyzer) {
|
||||||
|
this.canvas = canvas;
|
||||||
|
this.ctx = canvas.getContext("2d");
|
||||||
|
this.analyzer = analyzer;
|
||||||
|
}
|
||||||
|
|
||||||
requestAnimationFrame(drawWaveform);
|
draw(currentAnalyser, onSilence) {
|
||||||
|
if (!currentAnalyser || this.analyzer.dataArray === null) return;
|
||||||
|
|
||||||
analyser.getByteTimeDomainData(dataArray);
|
requestAnimationFrame(() => this.draw(currentAnalyser, onSilence));
|
||||||
|
|
||||||
canvasCtx.fillStyle = 'rgb(255, 255, 255)';
|
// Use getByteTimeDomainData instead of getFloatTimeDomainData
|
||||||
canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
|
currentAnalyser.getByteTimeDomainData(this.analyzer.dataArray);
|
||||||
|
|
||||||
canvasCtx.lineWidth = 2;
|
// Clear canvas
|
||||||
canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
|
this.ctx.fillStyle = "#252525";
|
||||||
|
this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height);
|
||||||
|
if (!state.isVisualizing) return;
|
||||||
|
|
||||||
canvasCtx.beginPath();
|
this.ctx.lineWidth = 2;
|
||||||
|
this.ctx.strokeStyle = "#6142d4";
|
||||||
|
this.ctx.beginPath();
|
||||||
|
|
||||||
let sliceWidth = audioVisualizer.width * 1.0 / bufferLength;
|
const sliceWidth = (this.canvas.width * 1) / this.analyzer.bufferLength;
|
||||||
let x = 0;
|
let x = 0;
|
||||||
|
|
||||||
let sum = 0;
|
let sum = 0;
|
||||||
|
|
||||||
for (let i = 0; i < bufferLength; i++) {
|
// Draw waveform
|
||||||
let v = dataArray[i] / 128.0;
|
for (let i = 0; i < this.analyzer.bufferLength; i++) {
|
||||||
let y = v * audioVisualizer.height / 2;
|
// Scale byte data (0-255) to canvas height
|
||||||
|
const v = this.analyzer.dataArray[i] / 128.0; // normalize to 0-2
|
||||||
|
const y = (v - 1) * (this.canvas.height / 2) + this.canvas.height / 2;
|
||||||
|
|
||||||
sum += v;
|
sum += Math.abs(v - 1); // Calculate distance from center (128)
|
||||||
|
|
||||||
if (i === 0) {
|
if (i === 0) {
|
||||||
canvasCtx.moveTo(x, y);
|
this.ctx.moveTo(x, y);
|
||||||
|
} else {
|
||||||
|
this.ctx.lineTo(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
x += sliceWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.ctx.lineTo(this.canvas.width, this.canvas.height / 2);
|
||||||
|
this.ctx.stroke();
|
||||||
|
|
||||||
|
// Check for silence during recording with adjusted thresholds for byte data
|
||||||
|
if (state.isRecording) {
|
||||||
|
const averageAmplitude = sum / this.analyzer.bufferLength;
|
||||||
|
if (averageAmplitude < 0.1) {
|
||||||
|
// Adjusted threshold for normalized data
|
||||||
|
// Reset silence timer if we detect sound
|
||||||
|
if (averageAmplitude > 0.05) {
|
||||||
|
clearTimeout(state.silenceTimer);
|
||||||
|
state.silenceTimer = null;
|
||||||
} else {
|
} else {
|
||||||
canvasCtx.lineTo(x, y);
|
onSilence();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
x += sliceWidth;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
|
|
||||||
canvasCtx.stroke();
|
|
||||||
|
|
||||||
let currentAudioLevel = sum / bufferLength;
|
|
||||||
|
|
||||||
if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) {
|
|
||||||
if (!silenceTimer) {
|
|
||||||
silenceTimer = setTimeout(stopRecording, 1000);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
clearTimeout(silenceTimer);
|
|
||||||
silenceTimer = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
lastAudioLevel = currentAudioLevel;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function startRecording() {
|
// Recording Handler
|
||||||
audioChunks = [];
|
class RecordingHandler {
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
constructor() {
|
||||||
mediaRecorder = new MediaRecorder(stream);
|
this.mediaRecorder = null;
|
||||||
mediaRecorder.ondataavailable = event => {
|
this.audioAnalyzer = new AudioAnalyzer();
|
||||||
audioChunks.push(event.data);
|
this.visualizer = new Visualizer(audioVisualizer, this.audioAnalyzer);
|
||||||
};
|
this.audioContext = null;
|
||||||
mediaRecorder.start();
|
}
|
||||||
isRecording = true;
|
|
||||||
|
|
||||||
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
async initialize() {
|
||||||
analyser = audioContext.createAnalyser();
|
|
||||||
const source = audioContext.createMediaStreamSource(stream);
|
|
||||||
|
|
||||||
source.connect(analyser);
|
|
||||||
analyser.fftSize = 2048;
|
|
||||||
bufferLength = analyser.frequencyBinCount;
|
|
||||||
dataArray = new Uint8Array(bufferLength);
|
|
||||||
|
|
||||||
drawWaveform();
|
|
||||||
}
|
|
||||||
|
|
||||||
function stopRecording() {
|
|
||||||
mediaRecorder.stop();
|
|
||||||
mediaRecorder.onstop = async () => {
|
|
||||||
// The mediaRecorder has stopped; now we can process the chunks
|
|
||||||
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
|
|
||||||
const formData = new FormData();
|
|
||||||
formData.append('audio_data', audioBlob);
|
|
||||||
|
|
||||||
// Now we're sending the audio to the server and waiting for a response
|
|
||||||
try {
|
|
||||||
const response = await fetch('/transcribe', {
|
|
||||||
method: 'POST',
|
|
||||||
body: formData
|
|
||||||
});
|
|
||||||
const data = await response.json();
|
|
||||||
|
|
||||||
// Once we have the response, we can source the playback element and play it
|
|
||||||
audioPlayback.src = 'data:audio/wav;base64,' + data.audio_base64;
|
|
||||||
audioPlayback.classList.remove('hidden');
|
|
||||||
audioVisualizer.classList.add('hidden'); // hide the visualizer while playing back the response
|
|
||||||
setupAIResponseVisualization();
|
|
||||||
audioPlayback.onloadedmetadata = () => {
|
|
||||||
// When metadata is loaded, start playback
|
|
||||||
audioPlayback.play();
|
|
||||||
visualizeAIResponse();
|
|
||||||
};
|
|
||||||
|
|
||||||
// We only reset the UI after the audio has finished playing
|
|
||||||
// audioPlayback.onended = () => {
|
|
||||||
// resetUI();
|
|
||||||
// };
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Error during fetch/transcription:', error);
|
|
||||||
resetUI();
|
|
||||||
} finally {
|
|
||||||
if (analyser) {
|
|
||||||
analyser.disconnect();
|
|
||||||
analyser = null;
|
|
||||||
}
|
|
||||||
isRecording = false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
function resetUI() {
|
|
||||||
document.getElementById('record-btn').classList.remove('hidden');
|
|
||||||
document.getElementById('audio-visualizer').classList.add('hidden');
|
|
||||||
document.getElementById('audio-playback').classList.add('hidden');
|
|
||||||
// Reset any other UI elements as necessary
|
|
||||||
}
|
|
||||||
|
|
||||||
function setupAIResponseVisualization() {
|
|
||||||
try {
|
try {
|
||||||
// Create a new audio context for playback if it doesn't exist
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
if (!audioContext) {
|
this.mediaRecorder = new MediaRecorder(stream);
|
||||||
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
this.setupRecordingEvents();
|
||||||
}
|
if (!this.audioContext)
|
||||||
// Resume the audio context in case it's in a suspended state
|
this.audioContext = new (window.AudioContext ||
|
||||||
audioContext.resume().then(() => {
|
window.webkitAudioContext)();
|
||||||
analyser = audioContext.createAnalyser();
|
} catch (err) {
|
||||||
const source = audioContext.createMediaElementSource(audioPlayback);
|
console.error(`Media device error: ${err}`);
|
||||||
source.connect(analyser);
|
|
||||||
analyser.connect(audioContext.destination);
|
|
||||||
analyser.fftSize = 2048;
|
|
||||||
bufferLength = analyser.frequencyBinCount;
|
|
||||||
dataArray = new Uint8Array(bufferLength);
|
|
||||||
});
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Error setting up AI response visualization:', error);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function visualizeAIResponse() {
|
setupRecordingEvents() {
|
||||||
const draw = () => {
|
this.mediaRecorder.ondataavailable = (e) => {
|
||||||
requestAnimationFrame(draw);
|
state.chunks.push(e.data);
|
||||||
|
|
||||||
analyser.getByteTimeDomainData(dataArray);
|
|
||||||
|
|
||||||
canvasCtx.fillStyle = 'rgb(255, 255, 255)';
|
|
||||||
canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
|
|
||||||
|
|
||||||
canvasCtx.lineWidth = 2;
|
|
||||||
canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
|
|
||||||
|
|
||||||
canvasCtx.beginPath();
|
|
||||||
|
|
||||||
let sliceWidth = audioVisualizer.width * 1.0 / bufferLength;
|
|
||||||
let x = 0;
|
|
||||||
|
|
||||||
for (let i = 0; i < bufferLength; i++) {
|
|
||||||
let v = dataArray[i] / 128.0;
|
|
||||||
let y = v * audioVisualizer.height / 2;
|
|
||||||
|
|
||||||
if (i === 0) {
|
|
||||||
canvasCtx.moveTo(x, y);
|
|
||||||
} else {
|
|
||||||
canvasCtx.lineTo(x, y);
|
|
||||||
}
|
|
||||||
|
|
||||||
x += sliceWidth;
|
|
||||||
}
|
|
||||||
|
|
||||||
canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
|
|
||||||
canvasCtx.stroke();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
draw();
|
this.mediaRecorder.onstop = async () => {
|
||||||
}
|
await this.handleRecordingStop();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
startRecording() {
|
||||||
|
state.isVisualizing = true;
|
||||||
|
state.chunks = [];
|
||||||
|
state.isRecording = true;
|
||||||
|
this.mediaRecorder.start();
|
||||||
|
|
||||||
|
const source = this.audioContext.createMediaStreamSource(
|
||||||
|
this.mediaRecorder.stream
|
||||||
|
);
|
||||||
|
|
||||||
|
const analyser = this.audioAnalyzer.setup(source, this.audioContext);
|
||||||
|
audioVisualizer.classList.remove("hidden");
|
||||||
|
|
||||||
|
this.visualizer.draw(analyser, () => {
|
||||||
|
if (!state.silenceTimer) {
|
||||||
|
state.silenceTimer = setTimeout(
|
||||||
|
() => this.stopRecording(),
|
||||||
|
SILENCE_DURATION
|
||||||
|
);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
recordBtn.dataset.recording = true;
|
||||||
|
recordBtn.classList.add("processing");
|
||||||
|
}
|
||||||
|
|
||||||
|
stopRecording() {
|
||||||
|
if (state.isRecording) {
|
||||||
|
state.isVisualizing = false;
|
||||||
|
state.isRecording = false;
|
||||||
|
this.mediaRecorder.stop();
|
||||||
|
clearTimeout(state.silenceTimer);
|
||||||
|
state.silenceTimer = null;
|
||||||
|
recordBtn.dataset.recording = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async handleRecordingStop() {
|
||||||
|
console.log("Processing recording...");
|
||||||
|
recordBtn.dataset.pending = true;
|
||||||
|
recordBtn.disabled = true;
|
||||||
|
|
||||||
|
const audioBlob = new Blob(state.chunks, { type: "audio/wav" });
|
||||||
|
if (!fileInput.files.length) {
|
||||||
|
recordBtn.dataset.pending = false;
|
||||||
|
recordBtn.disabled = false;
|
||||||
|
alert("Please select a file.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append("audio_data", audioBlob);
|
||||||
|
formData.append("file", fileInput.files[0]);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.processRecording(formData);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Processing error:", error);
|
||||||
|
} finally {
|
||||||
|
this.audioAnalyzer.cleanup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async processRecording(formData) {
|
||||||
|
const response = await fetch("/ask", {
|
||||||
|
method: "POST",
|
||||||
|
body: formData,
|
||||||
|
});
|
||||||
|
const data = await response.json();
|
||||||
|
|
||||||
|
await this.handleResponse(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
async handleResponse(data) {
|
||||||
|
audioPlayback.src = "data:audio/wav;base64," + data.audio_base64;
|
||||||
|
|
||||||
|
audioPlayback.onloadedmetadata = () => {
|
||||||
|
const analyser = this.audioAnalyzer.setupForPlayback(
|
||||||
|
audioPlayback,
|
||||||
|
this.audioContext
|
||||||
|
);
|
||||||
|
audioVisualizer.classList.remove("hidden");
|
||||||
|
|
||||||
|
this.visualizer.draw(analyser, () => {});
|
||||||
|
audioPlayback.play();
|
||||||
|
state.isVisualizing = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
audioPlayback.onended = () => {
|
||||||
|
this.audioAnalyzer.cleanup();
|
||||||
|
recordBtn.dataset.pending = false;
|
||||||
|
recordBtn.disabled = false;
|
||||||
|
state.isVisualizing = false;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const uploadFile = async (e) => {
|
||||||
|
uploadBtn.innerText = "Uploading File...";
|
||||||
|
e.preventDefault();
|
||||||
|
const file = fileInput.files[0];
|
||||||
|
|
||||||
|
if (!file) {
|
||||||
|
alert("Please select a file.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append("file", file);
|
||||||
|
try {
|
||||||
|
await fetch("/upload", {
|
||||||
|
method: "POST",
|
||||||
|
body: formData,
|
||||||
|
});
|
||||||
|
recordBtn.classList.remove("hidden");
|
||||||
|
fileInputContainer.classList.add("hidden");
|
||||||
|
} catch (error) {
|
||||||
|
recordBtn.classList.add("hidden");
|
||||||
|
fileInputContainer.classList.remove("hidden");
|
||||||
|
console.error("Error uploading file:", error);
|
||||||
|
uploadBtn.innerText = "Upload Failed. Try again";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const uploadBtn = document.getElementById("upload-btn");
|
||||||
|
uploadBtn.addEventListener("click", uploadFile);
|
||||||
|
|
||||||
|
// Main initialization
|
||||||
|
async function initializeApp() {
|
||||||
|
if (!navigator.mediaDevices) {
|
||||||
|
console.error("Media devices not supported");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const recorder = new RecordingHandler();
|
||||||
|
await recorder.initialize();
|
||||||
|
|
||||||
|
recordBtn.onclick = () => {
|
||||||
|
if (recorder.mediaRecorder.state === "inactive") {
|
||||||
|
recorder.startRecording();
|
||||||
|
} else if (recorder.mediaRecorder.state === "recording") {
|
||||||
|
recorder.stopRecording();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the application
|
||||||
|
initializeApp();
|
||||||
|
1
examples/quivr-whisper/static/loader.svg
Normal file
1
examples/quivr-whisper/static/loader.svg
Normal file
@ -0,0 +1 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-loader-pinwheel"><path d="M22 12a1 1 0 0 1-10 0 1 1 0 0 0-10 0"/><path d="M7 20.7a1 1 0 1 1 5-8.7 1 1 0 1 0 5-8.6"/><path d="M7 3.3a1 1 0 1 1 5 8.6 1 1 0 1 0 5 8.6"/><circle cx="12" cy="12" r="10"/></svg>
|
After Width: | Height: | Size: 406 B |
1
examples/quivr-whisper/static/mic-off.svg
Normal file
1
examples/quivr-whisper/static/mic-off.svg
Normal file
@ -0,0 +1 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-mic-off"><line x1="2" x2="22" y1="2" y2="22"/><path d="M18.89 13.23A7.12 7.12 0 0 0 19 12v-2"/><path d="M5 10v2a7 7 0 0 0 12 5"/><path d="M15 9.34V5a3 3 0 0 0-5.68-1.33"/><path d="M9 9v3a3 3 0 0 0 5.12 2.12"/><line x1="12" x2="12" y1="19" y2="22"/></svg>
|
After Width: | Height: | Size: 456 B |
1
examples/quivr-whisper/static/mic.svg
Normal file
1
examples/quivr-whisper/static/mic.svg
Normal file
@ -0,0 +1 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-mic"><path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" x2="12" y1="19" y2="22"/></svg>
|
After Width: | Height: | Size: 354 B |
@ -1,3 +1,151 @@
|
|||||||
|
* {
|
||||||
|
box-sizing: border-box;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
body {
|
||||||
|
color: #f4f4f4;
|
||||||
|
background-color: #252525;
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
align-items: center;
|
||||||
|
flex-direction: column;
|
||||||
|
justify-content: center;
|
||||||
|
min-height: 100vh;
|
||||||
|
}
|
||||||
|
|
||||||
|
.primary {
|
||||||
|
background-color: #6142d4;
|
||||||
|
}
|
||||||
|
|
||||||
|
button {
|
||||||
|
background-color: #6142d4;
|
||||||
|
border: none;
|
||||||
|
padding: .75rem 2rem;
|
||||||
|
border-radius: 0.5rem;
|
||||||
|
color: #f4f4f4;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
canvas {
|
||||||
|
position: absolute;
|
||||||
|
width: 100%;
|
||||||
|
height: 100%;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
background-color: #252525;
|
||||||
|
z-index: -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.record-btn {
|
||||||
|
background-color: #f5f5f5;
|
||||||
|
border: none;
|
||||||
|
outline: none;
|
||||||
|
width: 256px;
|
||||||
|
height: 256px;
|
||||||
|
background-repeat: no-repeat;
|
||||||
|
background-position: center;
|
||||||
|
border-radius: 50%;
|
||||||
|
background-size: 50%;
|
||||||
|
transition: background-color 200ms ease-in, transform 200ms ease-out;
|
||||||
|
}
|
||||||
|
|
||||||
|
.record-btn:hover {
|
||||||
|
background-color: #fff;
|
||||||
|
transform: scale(1.025);
|
||||||
|
}
|
||||||
|
|
||||||
|
.record-btn:active {
|
||||||
|
background-color: #e2e2e2;
|
||||||
|
transform: scale(0.975);
|
||||||
|
}
|
||||||
|
|
||||||
|
.record-btn[data-recording="true"] {
|
||||||
|
background-image: url("./mic.svg");
|
||||||
|
}
|
||||||
|
|
||||||
|
.record-btn[data-recording="false"] {
|
||||||
|
background-image: url("./mic-off.svg");
|
||||||
|
}
|
||||||
|
|
||||||
|
.record-btn[data-pending="true"] {
|
||||||
|
background-image: url("./loader.svg") !important;
|
||||||
|
animation: spin 1s linear infinite;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hidden {
|
||||||
|
display: none !important;
|
||||||
|
visibility: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.custom-file-input {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: center;
|
||||||
|
gap: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.custom-file-input input[type="file"] {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.custom-file-input label {
|
||||||
|
border: solid 2px #6142d4;
|
||||||
|
color: white;
|
||||||
|
padding: 8px 16px;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 14px;
|
||||||
|
font-weight: bold;
|
||||||
|
transition: background-color 0.3s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.custom-file-input label:hover {
|
||||||
|
background-color: #6142d4;
|
||||||
|
}
|
||||||
|
|
||||||
|
.custom-file-input span {
|
||||||
|
font-size: 14px;
|
||||||
|
color: #f4f4f4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Adjust appearance when a file is selected */
|
||||||
|
.custom-file-input span.file-selected {
|
||||||
|
color: #ffffff;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
# Override default MUI light theme. (Check theme.ts)
|
||||||
|
[UI.theme.light]
|
||||||
|
background = "#fcfcfc"
|
||||||
|
paper = "#f8f8f8"
|
||||||
|
|
||||||
|
[UI.theme.light.primary]
|
||||||
|
main = "#6142d4"
|
||||||
|
dark = "#6e53cf"
|
||||||
|
light = "#6e53cf30"
|
||||||
|
[UI.theme.light.text]
|
||||||
|
primary = "#1f1f1f"
|
||||||
|
secondary = "#818080"
|
||||||
|
|
||||||
|
# Override default MUI dark theme. (Check theme.ts)
|
||||||
|
[UI.theme.dark]
|
||||||
|
background = "#252525"
|
||||||
|
paper = "#1f1f1f"
|
||||||
|
|
||||||
|
[UI.theme.dark.primary]
|
||||||
|
main = "#6142d4"
|
||||||
|
dark = "#6e53cf"
|
||||||
|
light = "#6e53cf30"
|
||||||
|
[UI.theme.dark.text]
|
||||||
|
primary = "#f4f4f4"
|
||||||
|
secondary = "#c8c8c8"
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
.loader {
|
.loader {
|
||||||
border: 4px solid #f3f3f3;
|
border: 4px solid #f3f3f3;
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
|
@ -1,26 +1,37 @@
|
|||||||
<!doctype html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
|
<head>
|
||||||
<head>
|
<meta charset="UTF-8" />
|
||||||
<meta charset="UTF-8">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
<title>Audio Interaction WebApp</title>
|
<title>Audio Interaction WebApp</title>
|
||||||
<script src="https://cdn.tailwindcss.com"></script>
|
<link
|
||||||
<link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
|
rel="stylesheet"
|
||||||
</head>
|
href="{{ url_for('static', filename='styles.css') }}"
|
||||||
|
/>
|
||||||
|
</head>
|
||||||
|
|
||||||
<body class="bg-gray-100 flex flex-col items-center justify-center h-screen">
|
<body>
|
||||||
<h1 class="text-6xl font-bold mb-8">Quivr.app</h1>
|
<button
|
||||||
<div id="app" class="text-center">
|
type="button"
|
||||||
<button id="record-btn"
|
id="record-btn"
|
||||||
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-full cursor-pointer">
|
class="record-btn hidden"
|
||||||
Ask a question to Quivr
|
data-recording="false"
|
||||||
</button>
|
data-pending="false"
|
||||||
<canvas id="audio-visualizer" width="640" height="100"
|
></button>
|
||||||
class="hidden bg-white rounded-lg cursor-pointer"></canvas>
|
<div class="custom-file-input">
|
||||||
<audio id="audio-playback" controls class="hidden mt-4"></audio>
|
<label for="fileInput">Choose a file</label>
|
||||||
|
<input
|
||||||
|
type="file"
|
||||||
|
accept="text/plain"
|
||||||
|
name="fileInput"
|
||||||
|
required
|
||||||
|
id="fileInput"
|
||||||
|
/>
|
||||||
|
<span id="fileName">No file chosen</span>
|
||||||
|
<button id="upload-btn" class="upload-btn">Upload</button>
|
||||||
</div>
|
</div>
|
||||||
|
<canvas id="audio-visualizer" class=""></canvas>
|
||||||
|
<audio id="audio-playback" controls class="hidden"></audio>
|
||||||
<script src="{{ url_for('static', filename='app.js') }}"></script>
|
<script src="{{ url_for('static', filename='app.js') }}"></script>
|
||||||
</body>
|
</body>
|
||||||
|
</html>
|
||||||
</html>
|
|
||||||
|
Loading…
Reference in New Issue
Block a user