feat: 🎸 telemetry (#2169)

# Description

Please include a summary of the changes and the related issue. Please
also include relevant motivation and context.

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):
This commit is contained in:
Stan Girard 2024-02-07 20:44:37 -08:00 committed by GitHub
parent a1dbce559c
commit 597fb4711a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 129 additions and 8 deletions

View File

@ -34,6 +34,7 @@ PG_DATABASE_URL=notimplementedyet
ANTHROPIC_API_KEY=null
JWT_SECRET_KEY=super-secret-jwt-token-with-at-least-32-characters-long
AUTHENTICATE=true
TELEMETRY_ENABLED=true
CELERY_BROKER_URL=redis://redis:6379/0
CELEBRY_BROKER_QUEUE_NAME=quivr-preview.fifo

View File

@ -23,6 +23,7 @@ from modules.prompt.controller import prompt_router
from modules.upload.controller import upload_router
from modules.user.controller import user_router
from packages.utils import handle_request_validation_error
from packages.utils.telemetry import send_telemetry
from routes.crawl_routes import crawl_router
from routes.subscription_routes import subscription_router
from sentry_sdk.integrations.fastapi import FastApiIntegration
@ -79,6 +80,14 @@ async def http_exception_handler(_, exc):
handle_request_validation_error(app)
if os.getenv("TELEMETRY_ENABLED") == "true":
logger.info("Telemetry enabled, we use telemetry to collect anonymous usage data.")
logger.info(
"To disable telemetry, set the TELEMETRY_ENABLED environment variable to false."
)
send_telemetry("booting", {"status": "ok"})
if __name__ == "__main__":
# run main.py to debug backend
import uvicorn

View File

@ -20,6 +20,7 @@ from modules.brain.service.integration_brain_service import (
)
from modules.prompt.service.prompt_service import PromptService
from modules.user.entity.user_identity import UserIdentity
from packages.utils.telemetry import send_telemetry
from repository.brain import get_question_context_from_brain
logger = get_logger(__name__)
@ -105,7 +106,7 @@ async def create_new_brain(
status_code=429,
detail=f"Maximum number of brains reached ({user_settings.get('max_brains', 5)}).",
)
send_telemetry("create_brain", {"brain_name": brain.name})
new_brain = brain_service.create_brain(
brain=brain,
user_id=current_user.id,

View File

@ -26,6 +26,7 @@ from modules.chat.entity.chat import Chat
from modules.chat.service.chat_service import ChatService
from modules.notification.service.notification_service import NotificationService
from modules.user.entity.user_identity import UserIdentity
from packages.utils.telemetry import send_telemetry
from vectorstore.supabase import CustomSupabaseVectorStore
logger = get_logger(__name__)
@ -103,6 +104,8 @@ def get_answer_generator(
model_name=model_to_use.name,
)
send_telemetry("question_asked", {"model_name": model_to_use.name})
gpt_answer_generator = chat_instance.get_answer_generator(
chat_id=str(chat_id),
model=model_to_use.name,
@ -205,9 +208,9 @@ async def create_question_handler(
request: Request,
chat_question: ChatQuestion,
chat_id: UUID,
brain_id: NullableUUID
| UUID
| None = Query(..., description="The ID of the brain"),
brain_id: NullableUUID | UUID | None = Query(
..., description="The ID of the brain"
),
current_user: UserIdentity = Depends(get_current_user),
):
try:
@ -238,9 +241,9 @@ async def create_stream_question_handler(
request: Request,
chat_question: ChatQuestion,
chat_id: UUID,
brain_id: NullableUUID
| UUID
| None = Query(..., description="The ID of the brain"),
brain_id: NullableUUID | UUID | None = Query(
..., description="The ID of the brain"
),
current_user: UserIdentity = Depends(get_current_user),
) -> StreamingResponse:
chat_instance = BrainfulChat()

View File

@ -21,6 +21,7 @@ from modules.notification.entity.notification import NotificationsStatusEnum
from modules.notification.service.notification_service import NotificationService
from modules.user.entity.user_identity import UserIdentity
from packages.files.file import convert_bytes, get_file_size
from packages.utils.telemetry import send_telemetry
from repository.files.upload_file import upload_file_storage
logger = get_logger(__name__)
@ -55,7 +56,7 @@ async def upload_file(
user_settings = user_daily_usage.get_user_settings()
remaining_free_space = user_settings.get("max_brain_size", 1000000000)
send_telemetry("upload_file", {"file_name": uploadFile.filename})
file_size = get_file_size(uploadFile)
if remaining_free_space - file_size < 0:
message = f"Brain will exceed maximum capacity. Maximum file allowed is : {convert_bytes(remaining_free_space)}"

View File

@ -0,0 +1,60 @@
import hashlib
import json
import os
import threading
import httpx
# Assume these are your Supabase Function endpoint and any necessary headers
TELEMETRY_URL = "https://ovbvcnwemowuuuaebizd.supabase.co/functions/v1/telemetry"
HEADERS = {
"Content-Type": "application/json",
}
def generate_machine_key():
# Get the OpenAI API key from the environment variables
seed = os.getenv("OPENAI_API_KEY")
# Use SHA-256 hash to generate a unique key from the seed
unique_key = hashlib.sha256(seed.encode()).hexdigest()
return unique_key
def send_telemetry(event_name: str, event_data: dict):
# Generate a unique machine key
machine_key = generate_machine_key()
# Prepare the payload
payload = json.dumps(
{
"anonymous_identifier": machine_key,
"event_name": event_name,
"event_data": event_data,
}
)
# Send the telemetry data
with httpx.Client() as client:
_ = client.post(TELEMETRY_URL, headers=HEADERS, data=payload)
def maybe_send_telemetry(event_name: str, event_data: dict):
enable_telemetry = os.getenv("TELEMETRY_ENABLED", "false")
if enable_telemetry.lower() != "true":
return
threading.Thread(target=send_telemetry, args=(event_name, event_data)).start()
async def main():
await send_telemetry("user_login", {"login_success": True})
# Run the example
if __name__ == "__main__":
import asyncio
asyncio.run(main())

View File

@ -0,0 +1,46 @@
import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
Deno.serve(async (req: Request) => {
console.log("Received request");
const authHeader = req.headers.get("Authorization")!;
const supabaseClient = createClient(
Deno.env.get("SUPABASE_URL") ?? "",
Deno.env.get("SUPABASE_ANON_KEY") ?? "",
{ global: { headers: { Authorization: authHeader } } }
);
// Parse the request body to get event data and the anonymous identifier
const { anonymous_identifier, event_name, event_data } = await req.json();
console.log(
`Parsed request body: ${JSON.stringify({
anonymous_identifier,
event_name,
event_data,
})}`
);
// Insert the telemetry data along with the anonymous identifier into the Supabase table
const { data, error } = await supabaseClient
.from("telemetry")
.insert([{ anonymous_identifier, event_name, event_data }]);
if (error) {
console.error(`Error inserting data into Supabase: ${error.message}`);
return new Response(JSON.stringify({ error: error.message }), {
status: 400,
headers: {
"Content-Type": "application/json",
},
});
}
console.log("Successfully inserted data into Supabase");
return new Response(JSON.stringify({ message: "Telemetry logged", data }), {
status: 200,
headers: {
"Content-Type": "application/json",
},
});
});