fix(crawler): route

2024-10-26 14:00:37 +03:00 · 2023-06-29 08:00:34 +02:00 · 2023-06-29 08:00:34 +02:00 · 3946dbc668
commit 3946dbc668
parent 8978c8ab37
3 changed files with 46 additions and 26 deletions
--- a/backend/routes/chat_routes.py
+++ b/backend/routes/chat_routes.py
@ -7,8 +7,9 @@ from uuid import UUID
 from auth.auth_bearer import AuthBearer, get_current_user
 from fastapi import APIRouter, Depends, HTTPException, Query, Request
 from llm.brainpicking import BrainPicking
-from llm.BrainPickingOpenAIFunctions.BrainPickingOpenAIFunctions import \
-    BrainPickingOpenAIFunctions
+from llm.BrainPickingOpenAIFunctions.BrainPickingOpenAIFunctions import (
+    BrainPickingOpenAIFunctions,
+)
 from llm.PrivateBrainPicking import PrivateBrainPicking
 from models.chat import Chat, ChatHistory
 from models.chats import ChatQuestion
@ -52,7 +53,7 @@ async def get_chats(current_user: User = Depends(get_current_user)):
    containing the chat ID and chat name for each chat.
    """
    commons = common_dependencies()
-    chats = get_user_chats( current_user.id)
+    chats = get_user_chats(current_user.id)
    return {"chats": chats}


@ -83,7 +84,6 @@ async def update_chat_metadata_handler(
    """
    commons = common_dependencies()

-    
    chat = get_chat_by_id(chat_id)
    if current_user.id != chat.user_id:
        raise HTTPException(
@ -94,14 +94,13 @@ async def update_chat_metadata_handler(

 # helper method for update and create chat
 def check_user_limit(
-    user : User,
-    
+    user: User,
 ):
    if user.user_openai_api_key is None:
        date = time.strftime("%Y%m%d")
        max_requests_number = os.getenv("MAX_REQUESTS_NUMBER")

-        user.increment_user_request_count( date )
+        user.increment_user_request_count(date)
        if user.requests_count >= float(max_requests_number):
            raise HTTPException(
                status_code=429,
@ -121,7 +120,7 @@ async def create_chat_handler(
    Create a new chat with initial chat messages.
    """

-    return create_chat(user_id=current_user.id,chat_data=chat_data)
+    return create_chat(user_id=current_user.id, chat_data=chat_data)


 # add new question to chat
@ -150,7 +149,7 @@ async def create_question_handler(
                chat_id=str(chat_id),
                temperature=chat_question.temperature,
                max_tokens=chat_question.max_tokens,
-                brain_id = brain_id,
+                brain_id=brain_id,
                user_openai_api_key=current_user.user_openai_api_key,
            )
            answer = gpt_answer_generator.generate_answer(chat_question.question)
@ -162,7 +161,7 @@ async def create_question_handler(
                temperature=chat_question.temperature,
                max_tokens=chat_question.max_tokens,
                # TODO: use user_id in vectors table instead of email
-                brain_id = brain_id,
+                brain_id=brain_id,
                user_openai_api_key=current_user.user_openai_api_key,
            )
            answer = gpt_answer_generator.generate_answer(chat_question.question)
@ -172,7 +171,7 @@ async def create_question_handler(
                model=chat_question.model,
                max_tokens=chat_question.max_tokens,
                temperature=chat_question.temperature,
-                brain_id = brain_id,
+                brain_id=brain_id,
                user_openai_api_key=current_user.user_openai_api_key,
            )
            answer = brainPicking.generate_answer(chat_question.question)
--- a/backend/routes/crawl_routes.py
+++ b/backend/routes/crawl_routes.py
@ -16,40 +16,61 @@ from utils.processors import filter_file

 crawl_router = APIRouter()

-@crawl_router.post("/crawl/", dependencies=[Depends(AuthBearer())], tags=["Crawl"])
-async def crawl_endpoint(request: Request, crawl_website: CrawlWebsite, brain_id: UUID = Query(..., description="The ID of the brain"),enable_summarization: bool = False,  current_user: User = Depends(get_current_user)):
+
+@crawl_router.post("/crawl", dependencies=[Depends(AuthBearer())], tags=["Crawl"])
+async def crawl_endpoint(
+    request: Request,
+    crawl_website: CrawlWebsite,
+    brain_id: UUID = Query(..., description="The ID of the brain"),
+    enable_summarization: bool = False,
+    current_user: User = Depends(get_current_user),
+):
    """
    Crawl a website and process the crawled data.
    """

-   # [TODO] check if the user is the owner/editor of the brain
-    brain = Brain(id= brain_id)
+    # [TODO] check if the user is the owner/editor of the brain
+    brain = Brain(id=brain_id)

    commons = common_dependencies()

-    if request.headers.get('Openai-Api-Key'):
-        brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY",209715200)
+    if request.headers.get("Openai-Api-Key"):
+        brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200)

    file_size = 1000000
-    remaining_free_space =  brain.remaining_brain_size
+    remaining_free_space = brain.remaining_brain_size

    if remaining_free_space - file_size < 0:
-        message = {"message": f"❌ User's brain will exceed maximum capacity with this upload. Maximum file allowed is : {convert_bytes(remaining_free_space)}", "type": "error"}
-    else: 
+        message = {
+            "message": f"❌ User's brain will exceed maximum capacity with this upload. Maximum file allowed is : {convert_bytes(remaining_free_space)}",
+            "type": "error",
+        }
+    else:
        if not crawl_website.checkGithub():
            file_path, file_name = crawl_website.process()
            # Create a SpooledTemporaryFile from the file_path
            spooled_file = SpooledTemporaryFile()
-            with open(file_path, 'rb') as f:
+            with open(file_path, "rb") as f:
                shutil.copyfileobj(f, spooled_file)

            # Pass the SpooledTemporaryFile to UploadFile
            uploadFile = UploadFile(file=spooled_file, filename=file_name)
-            file = File(file = uploadFile)
+            file = File(file=uploadFile)
            #  check remaining free space here !!
-            message = await filter_file(commons, file, enable_summarization, brain.id, openai_api_key=request.headers.get('Openai-Api-Key', None))
+            message = await filter_file(
+                commons,
+                file,
+                enable_summarization,
+                brain.id,
+                openai_api_key=request.headers.get("Openai-Api-Key", None),
+            )
            return message
        else:
-
            #  check remaining free space here !!
-            message = await process_github(commons,crawl_website.url, "false", brain_id, user_openai_api_key=request.headers.get('Openai-Api-Key', None))
+            message = await process_github(
+                commons,
+                crawl_website.url,
+                "false",
+                brain_id,
+                user_openai_api_key=request.headers.get("Openai-Api-Key", None),
+            )
--- a/frontend/app/upload/components/Crawler/hooks/useCrawler.ts
+++ b/frontend/app/upload/components/Crawler/hooks/useCrawler.ts
@ -53,7 +53,7 @@ export const useCrawler = () => {
        console.log("Crawling website...", brainId);
        if (brainId !== undefined) {
          const response = await axiosInstance.post(
-            `/crawl/?brain_id=${brainId}`,
+            `/crawl?brain_id=${brainId}`,
            config
          );