From 7281fd905a24b8e4dad7214d7809b8856685fca8 Mon Sep 17 00:00:00 2001 From: Stan Girard Date: Fri, 18 Aug 2023 10:18:29 +0200 Subject: [PATCH] feat(microservices): split into 4 quivr to better handle long services (#972) * feat(verbose): removed * feat(microservices): split quivr into micro services * test(main): fixed --- backend/core/chat_service.py | 68 +++++++++++++++++++++++++ backend/core/crawl_service.py | 68 +++++++++++++++++++++++++ backend/core/llm/qa_base.py | 6 +-- backend/core/routes/chat_routes.py | 4 +- backend/core/upload_service.py | 74 +++++++++++++++++++++++++++ docker-compose.dev.yml | 80 +++++++++++++++++++++++++++--- docker-compose.yml | 76 ++++++++++++++++++++++++++-- 7 files changed, 359 insertions(+), 17 deletions(-) create mode 100644 backend/core/chat_service.py create mode 100644 backend/core/crawl_service.py create mode 100644 backend/core/upload_service.py diff --git a/backend/core/chat_service.py b/backend/core/chat_service.py new file mode 100644 index 000000000..575f8a7d3 --- /dev/null +++ b/backend/core/chat_service.py @@ -0,0 +1,68 @@ +import os +if __name__ == "__main__": + # import needed here when running main.py to debug backend + # you will need to run pip install python-dotenv + from dotenv import load_dotenv + load_dotenv() +import sentry_sdk +from fastapi import FastAPI, HTTPException, Request, status +from fastapi.exceptions import RequestValidationError +from fastapi.responses import JSONResponse +from logger import get_logger +from middlewares.cors import add_cors_middleware +from routes.misc_routes import misc_router +from routes.chat_routes import chat_router + +logger = get_logger(__name__) + +sentry_dsn = os.getenv("SENTRY_DSN") +if sentry_dsn: + sentry_sdk.init( + dsn=sentry_dsn, + traces_sample_rate=1.0, + ) + +app = FastAPI() + +add_cors_middleware(app) + + + +app.include_router(chat_router) +app.include_router(misc_router) + + + +@app.exception_handler(HTTPException) +async def http_exception_handler(_, exc): + return JSONResponse( + status_code=exc.status_code, + content={"detail": exc.detail}, + ) + + +# log more details about validation errors (422) +def handle_request_validation_error(app: FastAPI): + @app.exception_handler(RequestValidationError) + async def validation_exception_handler( + request: Request, exc: RequestValidationError + ): + exc_str = f"{exc}".replace("\n", " ").replace(" ", " ") + logger.error(request, exc_str) + content = { + "status_code": status.HTTP_422_UNPROCESSABLE_ENTITY, + "message": exc_str, + "data": None, + } + return JSONResponse( + content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY + ) + + +handle_request_validation_error(app) + +if __name__ == "__main__": + # run main.py to debug backend + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=5050) + diff --git a/backend/core/crawl_service.py b/backend/core/crawl_service.py new file mode 100644 index 000000000..ae3fec2d0 --- /dev/null +++ b/backend/core/crawl_service.py @@ -0,0 +1,68 @@ +import os +if __name__ == "__main__": + # import needed here when running main.py to debug backend + # you will need to run pip install python-dotenv + from dotenv import load_dotenv + load_dotenv() +import sentry_sdk +from fastapi import FastAPI, HTTPException, Request, status +from fastapi.exceptions import RequestValidationError +from fastapi.responses import JSONResponse +from logger import get_logger +from middlewares.cors import add_cors_middleware +from routes.misc_routes import misc_router +from routes.crawl_routes import crawl_router + +logger = get_logger(__name__) + +sentry_dsn = os.getenv("SENTRY_DSN") +if sentry_dsn: + sentry_sdk.init( + dsn=sentry_dsn, + traces_sample_rate=1.0, + ) + +app = FastAPI() + +add_cors_middleware(app) + + + +app.include_router(crawl_router) +app.include_router(misc_router) + + + +@app.exception_handler(HTTPException) +async def http_exception_handler(_, exc): + return JSONResponse( + status_code=exc.status_code, + content={"detail": exc.detail}, + ) + + +# log more details about validation errors (422) +def handle_request_validation_error(app: FastAPI): + @app.exception_handler(RequestValidationError) + async def validation_exception_handler( + request: Request, exc: RequestValidationError + ): + exc_str = f"{exc}".replace("\n", " ").replace(" ", " ") + logger.error(request, exc_str) + content = { + "status_code": status.HTTP_422_UNPROCESSABLE_ENTITY, + "message": exc_str, + "data": None, + } + return JSONResponse( + content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY + ) + + +handle_request_validation_error(app) + +if __name__ == "__main__": + # run main.py to debug backend + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=5050) + diff --git a/backend/core/llm/qa_base.py b/backend/core/llm/qa_base.py index dcd8ffa77..f3f54cea0 100644 --- a/backend/core/llm/qa_base.py +++ b/backend/core/llm/qa_base.py @@ -91,7 +91,7 @@ class QABaseBrainPicking(BaseBrainPicking): temperature=temperature, model=model, streaming=streaming, - verbose=True, + verbose=False, callbacks=callbacks, openai_api_key=self.openai_api_key, ) # pyright: ignore reportPrivateUsage=none @@ -135,7 +135,7 @@ class QABaseBrainPicking(BaseBrainPicking): question_generator=LLMChain( llm=self._create_llm(model=self.model), prompt=CONDENSE_QUESTION_PROMPT ), - verbose=True, + verbose=False, ) model_response = qa( @@ -208,7 +208,7 @@ class QABaseBrainPicking(BaseBrainPicking): question_generator=LLMChain( llm=self._create_llm(model=self.model), prompt=CONDENSE_QUESTION_PROMPT ), - verbose=True, + verbose=False, ) transformed_history = format_chat_history(history) diff --git a/backend/core/routes/chat_routes.py b/backend/core/routes/chat_routes.py index 85fb40737..1da1fbcbf 100644 --- a/backend/core/routes/chat_routes.py +++ b/backend/core/routes/chat_routes.py @@ -10,7 +10,7 @@ from fastapi.responses import StreamingResponse from llm.openai import OpenAIBrainPicking from models.brains import Brain from models.brain_entity import BrainEntity -from models.chat import Chat, ChatHistory +from models.chat import Chat from models.chats import ChatQuestion from models.databases.supabase.supabase import SupabaseDB from models.settings import LLMSettings, get_supabase_db @@ -62,7 +62,7 @@ def check_user_limit( ): if user.user_openai_api_key is None: date = time.strftime("%Y%m%d") - max_requests_number = int(os.getenv("MAX_REQUESTS_NUMBER", 1000)) + max_requests_number = int(os.getenv("MAX_REQUESTS_NUMBER", 1)) user.increment_user_request_count(date) if int(user.requests_count) >= int(max_requests_number): diff --git a/backend/core/upload_service.py b/backend/core/upload_service.py new file mode 100644 index 000000000..8c01f2c88 --- /dev/null +++ b/backend/core/upload_service.py @@ -0,0 +1,74 @@ +import os +if __name__ == "__main__": + # import needed here when running main.py to debug backend + # you will need to run pip install python-dotenv + from dotenv import load_dotenv + load_dotenv() +import sentry_sdk +import pypandoc +from fastapi import FastAPI, HTTPException, Request, status +from fastapi.exceptions import RequestValidationError +from fastapi.responses import JSONResponse +from logger import get_logger +from middlewares.cors import add_cors_middleware +from routes.misc_routes import misc_router +from routes.upload_routes import upload_router + +logger = get_logger(__name__) + +sentry_dsn = os.getenv("SENTRY_DSN") +if sentry_dsn: + sentry_sdk.init( + dsn=sentry_dsn, + traces_sample_rate=1.0, + ) + +app = FastAPI() + +@app.on_event("startup") +async def startup_event(): + if not os.path.exists(pypandoc.get_pandoc_path()): + pypandoc.download_pandoc() + +add_cors_middleware(app) + + + +app.include_router(upload_router) +app.include_router(misc_router) + + + +@app.exception_handler(HTTPException) +async def http_exception_handler(_, exc): + return JSONResponse( + status_code=exc.status_code, + content={"detail": exc.detail}, + ) + + +# log more details about validation errors (422) +def handle_request_validation_error(app: FastAPI): + @app.exception_handler(RequestValidationError) + async def validation_exception_handler( + request: Request, exc: RequestValidationError + ): + exc_str = f"{exc}".replace("\n", " ").replace(" ", " ") + logger.error(request, exc_str) + content = { + "status_code": status.HTTP_422_UNPROCESSABLE_ENTITY, + "message": exc_str, + "data": None, + } + return JSONResponse( + content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY + ) + + +handle_request_validation_error(app) + +if __name__ == "__main__": + # run main.py to debug backend + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=5050) + diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 66309b70d..5fe3891d2 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -1,6 +1,19 @@ -version: "3" +version: '3' services: + traefik: + image: traefik:v2.10 + command: + - "--api.insecure=true" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.web.address=:5050" + ports: + - "5050:5050" + - "8080:8080" # For the Traefik dashboard (optional) + volumes: + - /var/run/docker.sock:/var/run/docker.sock + frontend: env_file: - ./frontend/.env @@ -9,12 +22,9 @@ services: dockerfile: Dockerfile.dev container_name: web restart: always - volumes: - - ./frontend/:/app - - /app/node_modules - - /app/.next ports: - 3000:3000 + backend-core: env_file: - ./backend/core/.env @@ -22,9 +32,63 @@ services: context: backend/core dockerfile: Dockerfile container_name: backend-core + command: uvicorn main:app --host 0.0.0.0 --port 5050 restart: always volumes: - ./backend/core/:/code/ - - ~/.config/gcloud:/root/.config/gcloud - ports: - - 5050:5050 \ No newline at end of file + labels: + - "traefik.enable=true" + - "traefik.http.routers.backend-core.rule=PathPrefix(`/`)" + - "traefik.http.routers.backend-core.entrypoints=web" + - "traefik.http.services.backend-core.loadbalancer.server.port=5050" + + backend-chat: + env_file: + - ./backend/core/.env + build: + context: backend/core + dockerfile: Dockerfile + container_name: backend-chat + command: uvicorn chat_service:app --host 0.0.0.0 --port 5050 + restart: always + volumes: + - ./backend/core/:/code/ + labels: + - "traefik.enable=true" + - "traefik.http.routers.backend-chat.rule=PathPrefix(`/chat`)" + - "traefik.http.routers.backend-chat.entrypoints=web" + - "traefik.http.services.backend-chat.loadbalancer.server.port=5050" + + backend-crawl: + env_file: + - ./backend/core/.env + build: + context: backend/core + dockerfile: Dockerfile + container_name: backend-crawl + command: uvicorn crawl_service:app --host 0.0.0.0 --port 5050 + restart: always + volumes: + - ./backend/core/:/code/ + labels: + - "traefik.enable=true" + - "traefik.http.routers.backend-crawl.rule=PathPrefix(`/crawl`)" + - "traefik.http.routers.backend-crawl.entrypoints=web" + - "traefik.http.services.backend-crawl.loadbalancer.server.port=5050" + + backend-upload: + env_file: + - ./backend/core/.env + build: + context: backend/core + dockerfile: Dockerfile + container_name: backend-upload + command: uvicorn upload_service:app --host 0.0.0.0 --port 5050 + restart: always + volumes: + - ./backend/core/:/code/ + labels: + - "traefik.enable=true" + - "traefik.http.routers.backend-upload.rule=PathPrefix(`/upload`)" + - "traefik.http.routers.backend-upload.entrypoints=web" + - "traefik.http.services.backend-upload.loadbalancer.server.port=5050" diff --git a/docker-compose.yml b/docker-compose.yml index 2140c7c6d..609fa753b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,19 @@ -version: "3" +version: '3' services: + traefik: + image: traefik:v2.10 + command: + - "--api.insecure=true" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.web.address=:5050" + ports: + - "5050:5050" + - "8080:8080" # For the Traefik dashboard (optional) + volumes: + - /var/run/docker.sock:/var/run/docker.sock + frontend: env_file: - ./frontend/.env @@ -11,6 +24,7 @@ services: restart: always ports: - 3000:3000 + backend-core: env_file: - ./backend/core/.env @@ -18,9 +32,63 @@ services: context: backend/core dockerfile: Dockerfile container_name: backend-core + command: uvicorn main:app --host 0.0.0.0 --port 5050 restart: always volumes: - ./backend/core/:/code/ - - ~/.config/gcloud:/root/.config/gcloud - ports: - - 5050:5050 \ No newline at end of file + labels: + - "traefik.enable=true" + - "traefik.http.routers.backend-core.rule=PathPrefix(`/`)" + - "traefik.http.routers.backend-core.entrypoints=web" + - "traefik.http.services.backend-core.loadbalancer.server.port=5050" + + backend-chat: + env_file: + - ./backend/core/.env + build: + context: backend/core + dockerfile: Dockerfile + container_name: backend-chat + command: uvicorn chat_service:app --host 0.0.0.0 --port 5050 + restart: always + volumes: + - ./backend/core/:/code/ + labels: + - "traefik.enable=true" + - "traefik.http.routers.backend-chat.rule=PathPrefix(`/chat`)" + - "traefik.http.routers.backend-chat.entrypoints=web" + - "traefik.http.services.backend-chat.loadbalancer.server.port=5050" + + backend-crawl: + env_file: + - ./backend/core/.env + build: + context: backend/core + dockerfile: Dockerfile + container_name: backend-crawl + command: uvicorn crawl_service:app --host 0.0.0.0 --port 5050 + restart: always + volumes: + - ./backend/core/:/code/ + labels: + - "traefik.enable=true" + - "traefik.http.routers.backend-crawl.rule=PathPrefix(`/crawl`)" + - "traefik.http.routers.backend-crawl.entrypoints=web" + - "traefik.http.services.backend-crawl.loadbalancer.server.port=5050" + + backend-upload: + env_file: + - ./backend/core/.env + build: + context: backend/core + dockerfile: Dockerfile + container_name: backend-upload + command: uvicorn upload_service:app --host 0.0.0.0 --port 5050 + restart: always + volumes: + - ./backend/core/:/code/ + labels: + - "traefik.enable=true" + - "traefik.http.routers.backend-upload.rule=PathPrefix(`/upload`)" + - "traefik.http.routers.backend-upload.entrypoints=web" + - "traefik.http.services.backend-upload.loadbalancer.server.port=5050"