feat: 🎸 telemetry (#2169)

# Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate):
2024-12-14 17:03:29 +03:00 · 2024-02-07 20:44:37 -08:00 · 2024-02-07 20:44:37 -08:00 · 597fb4711a
commit 597fb4711a
parent a1dbce559c
7 changed files with 129 additions and 8 deletions
--- a/.env.example
+++ b/.env.example
@ -34,6 +34,7 @@ PG_DATABASE_URL=notimplementedyet
 ANTHROPIC_API_KEY=null
 JWT_SECRET_KEY=super-secret-jwt-token-with-at-least-32-characters-long
 AUTHENTICATE=true
+TELEMETRY_ENABLED=true
 CELERY_BROKER_URL=redis://redis:6379/0
 CELEBRY_BROKER_QUEUE_NAME=quivr-preview.fifo

--- a/backend/main.py
+++ b/backend/main.py
@ -23,6 +23,7 @@ from modules.prompt.controller import prompt_router
 from modules.upload.controller import upload_router
 from modules.user.controller import user_router
 from packages.utils import handle_request_validation_error
+from packages.utils.telemetry import send_telemetry
 from routes.crawl_routes import crawl_router
 from routes.subscription_routes import subscription_router
 from sentry_sdk.integrations.fastapi import FastApiIntegration
@ -79,6 +80,14 @@ async def http_exception_handler(_, exc):

 handle_request_validation_error(app)

+if os.getenv("TELEMETRY_ENABLED") == "true":
+    logger.info("Telemetry enabled, we use telemetry to collect anonymous usage data.")
+    logger.info(
+        "To disable telemetry, set the TELEMETRY_ENABLED environment variable to false."
+    )
+    send_telemetry("booting", {"status": "ok"})
+
+
 if __name__ == "__main__":
    # run main.py to debug backend
    import uvicorn
--- a/backend/modules/brain/controller/brain_routes.py
+++ b/backend/modules/brain/controller/brain_routes.py
@ -20,6 +20,7 @@ from modules.brain.service.integration_brain_service import (
 )
 from modules.prompt.service.prompt_service import PromptService
 from modules.user.entity.user_identity import UserIdentity
+from packages.utils.telemetry import send_telemetry
 from repository.brain import get_question_context_from_brain

 logger = get_logger(__name__)
@ -105,7 +106,7 @@ async def create_new_brain(
            status_code=429,
            detail=f"Maximum number of brains reached ({user_settings.get('max_brains', 5)}).",
        )
-
+    send_telemetry("create_brain", {"brain_name": brain.name})
    new_brain = brain_service.create_brain(
        brain=brain,
        user_id=current_user.id,
--- a/backend/modules/chat/controller/chat_routes.py
+++ b/backend/modules/chat/controller/chat_routes.py
@ -26,6 +26,7 @@ from modules.chat.entity.chat import Chat
 from modules.chat.service.chat_service import ChatService
 from modules.notification.service.notification_service import NotificationService
 from modules.user.entity.user_identity import UserIdentity
+from packages.utils.telemetry import send_telemetry
 from vectorstore.supabase import CustomSupabaseVectorStore

 logger = get_logger(__name__)
@ -103,6 +104,8 @@ def get_answer_generator(
        model_name=model_to_use.name,
    )

+    send_telemetry("question_asked", {"model_name": model_to_use.name})
+
    gpt_answer_generator = chat_instance.get_answer_generator(
        chat_id=str(chat_id),
        model=model_to_use.name,
@ -205,9 +208,9 @@ async def create_question_handler(
    request: Request,
    chat_question: ChatQuestion,
    chat_id: UUID,
-    brain_id: NullableUUID
-    | UUID
-    | None = Query(..., description="The ID of the brain"),
+    brain_id: NullableUUID | UUID | None = Query(
+        ..., description="The ID of the brain"
+    ),
    current_user: UserIdentity = Depends(get_current_user),
 ):
    try:
@ -238,9 +241,9 @@ async def create_stream_question_handler(
    request: Request,
    chat_question: ChatQuestion,
    chat_id: UUID,
-    brain_id: NullableUUID
-    | UUID
-    | None = Query(..., description="The ID of the brain"),
+    brain_id: NullableUUID | UUID | None = Query(
+        ..., description="The ID of the brain"
+    ),
    current_user: UserIdentity = Depends(get_current_user),
 ) -> StreamingResponse:
    chat_instance = BrainfulChat()
--- a/backend/modules/upload/controller/upload_routes.py
+++ b/backend/modules/upload/controller/upload_routes.py
@ -21,6 +21,7 @@ from modules.notification.entity.notification import NotificationsStatusEnum
 from modules.notification.service.notification_service import NotificationService
 from modules.user.entity.user_identity import UserIdentity
 from packages.files.file import convert_bytes, get_file_size
+from packages.utils.telemetry import send_telemetry
 from repository.files.upload_file import upload_file_storage

 logger = get_logger(__name__)
@ -55,7 +56,7 @@ async def upload_file(
    user_settings = user_daily_usage.get_user_settings()

    remaining_free_space = user_settings.get("max_brain_size", 1000000000)
-
+    send_telemetry("upload_file", {"file_name": uploadFile.filename})
    file_size = get_file_size(uploadFile)
    if remaining_free_space - file_size < 0:
        message = f"Brain will exceed maximum capacity. Maximum file allowed is : {convert_bytes(remaining_free_space)}"
--- a/backend/packages/utils/telemetry.py
+++ b/backend/packages/utils/telemetry.py
@ -0,0 +1,60 @@
+import hashlib
+import json
+import os
+import threading
+
+import httpx
+
+# Assume these are your Supabase Function endpoint and any necessary headers
+TELEMETRY_URL = "https://ovbvcnwemowuuuaebizd.supabase.co/functions/v1/telemetry"
+HEADERS = {
+    "Content-Type": "application/json",
+}
+
+
+def generate_machine_key():
+    # Get the OpenAI API key from the environment variables
+    seed = os.getenv("OPENAI_API_KEY")
+
+    # Use SHA-256 hash to generate a unique key from the seed
+    unique_key = hashlib.sha256(seed.encode()).hexdigest()
+
+    return unique_key
+
+
+def send_telemetry(event_name: str, event_data: dict):
+    # Generate a unique machine key
+    machine_key = generate_machine_key()
+
+    # Prepare the payload
+    payload = json.dumps(
+        {
+            "anonymous_identifier": machine_key,
+            "event_name": event_name,
+            "event_data": event_data,
+        }
+    )
+
+    # Send the telemetry data
+    with httpx.Client() as client:
+        _ = client.post(TELEMETRY_URL, headers=HEADERS, data=payload)
+
+
+def maybe_send_telemetry(event_name: str, event_data: dict):
+    enable_telemetry = os.getenv("TELEMETRY_ENABLED", "false")
+
+    if enable_telemetry.lower() != "true":
+        return
+
+    threading.Thread(target=send_telemetry, args=(event_name, event_data)).start()
+
+
+async def main():
+    await send_telemetry("user_login", {"login_success": True})
+
+
+# Run the example
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())
--- a/supabase/functions/telemetry/index.ts
+++ b/supabase/functions/telemetry/index.ts
@ -0,0 +1,46 @@
+import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
+
+Deno.serve(async (req: Request) => {
+  console.log("Received request");
+
+  const authHeader = req.headers.get("Authorization")!;
+  const supabaseClient = createClient(
+    Deno.env.get("SUPABASE_URL") ?? "",
+    Deno.env.get("SUPABASE_ANON_KEY") ?? "",
+    { global: { headers: { Authorization: authHeader } } }
+  );
+
+  // Parse the request body to get event data and the anonymous identifier
+  const { anonymous_identifier, event_name, event_data } = await req.json();
+
+  console.log(
+    `Parsed request body: ${JSON.stringify({
+      anonymous_identifier,
+      event_name,
+      event_data,
+    })}`
+  );
+
+  // Insert the telemetry data along with the anonymous identifier into the Supabase table
+  const { data, error } = await supabaseClient
+    .from("telemetry")
+    .insert([{ anonymous_identifier, event_name, event_data }]);
+
+  if (error) {
+    console.error(`Error inserting data into Supabase: ${error.message}`);
+    return new Response(JSON.stringify({ error: error.message }), {
+      status: 400,
+      headers: {
+        "Content-Type": "application/json",
+      },
+    });
+  }
+
+  console.log("Successfully inserted data into Supabase");
+  return new Response(JSON.stringify({ message: "Telemetry logged", data }), {
+    status: 200,
+    headers: {
+      "Content-Type": "application/json",
+    },
+  });
+});