mirror of
https://github.com/QuivrHQ/quivr.git
synced 2025-01-05 23:03:53 +03:00
feat: get files from storage (#1205)
* 🌱 list files in storage & generate signed URL * ✨ add knowledge router * 🗃️ add knowledge tables * ✨ add knowledge during upload * 🚧 add knowledge a brain_knowledge models and repo * 🔥 remove brain_knowledge * ✨ add upload to knowledge table * ✨ add crawl to knowledge table * ✏️ fixes
This commit is contained in:
parent
37935c59ca
commit
be7acf052b
@ -18,6 +18,7 @@ from routes.brain_routes import brain_router
|
|||||||
from routes.chat_routes import chat_router
|
from routes.chat_routes import chat_router
|
||||||
from routes.crawl_routes import crawl_router
|
from routes.crawl_routes import crawl_router
|
||||||
from routes.explore_routes import explore_router
|
from routes.explore_routes import explore_router
|
||||||
|
from routes.knowledge_routes import knowledge_router
|
||||||
from routes.misc_routes import misc_router
|
from routes.misc_routes import misc_router
|
||||||
from routes.notification_routes import notification_router
|
from routes.notification_routes import notification_router
|
||||||
from routes.prompt_routes import prompt_router
|
from routes.prompt_routes import prompt_router
|
||||||
@ -56,6 +57,7 @@ app.include_router(api_key_router)
|
|||||||
app.include_router(subscription_router)
|
app.include_router(subscription_router)
|
||||||
app.include_router(prompt_router)
|
app.include_router(prompt_router)
|
||||||
app.include_router(notification_router)
|
app.include_router(notification_router)
|
||||||
|
app.include_router(knowledge_router)
|
||||||
|
|
||||||
|
|
||||||
@app.exception_handler(HTTPException)
|
@app.exception_handler(HTTPException)
|
||||||
|
@ -16,7 +16,7 @@ class Brain(BaseModel):
|
|||||||
name: Optional[str] = "Default brain"
|
name: Optional[str] = "Default brain"
|
||||||
description: Optional[str] = "This is a description"
|
description: Optional[str] = "This is a description"
|
||||||
status: Optional[str] = "private"
|
status: Optional[str] = "private"
|
||||||
model: Optional[str]
|
model: Optional[str] = None
|
||||||
temperature: Optional[float] = 0.0
|
temperature: Optional[float] = 0.0
|
||||||
max_tokens: Optional[int] = 256
|
max_tokens: Optional[int] = 256
|
||||||
openai_api_key: Optional[str] = None
|
openai_api_key: Optional[str] = None
|
||||||
|
@ -239,3 +239,15 @@ class Repository(ABC):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_notifications_by_chat_id(self, chat_id: UUID):
|
def get_notifications_by_chat_id(self, chat_id: UUID):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def insert_knowledge(self, brain_id: UUID):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def remove_knowledge_by_id(self, knowledge_id: UUID):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_knowledge_by_id(self, knowledge_id: UUID):
|
||||||
|
pass
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
from models.databases.supabase.api_key_handler import ApiKeyHandler
|
from models.databases.supabase.api_key_handler import ApiKeyHandler
|
||||||
from models.databases.supabase.brains import Brain
|
from models.databases.supabase.brains import Brain
|
||||||
from models.databases.supabase.brains_subscription_invitations import BrainSubscription
|
from models.databases.supabase.brains_subscription_invitations import \
|
||||||
|
BrainSubscription
|
||||||
from models.databases.supabase.chats import Chats
|
from models.databases.supabase.chats import Chats
|
||||||
from models.databases.supabase.files import File
|
from models.databases.supabase.files import File
|
||||||
|
from models.databases.supabase.knowledge import Knowledges
|
||||||
|
from models.databases.supabase.notifications import Notifications
|
||||||
from models.databases.supabase.prompts import Prompts
|
from models.databases.supabase.prompts import Prompts
|
||||||
from models.databases.supabase.user_usage import UserUsage
|
from models.databases.supabase.user_usage import UserUsage
|
||||||
from models.databases.supabase.vectors import Vector
|
from models.databases.supabase.vectors import Vector
|
||||||
from models.databases.supabase.notifications import Notifications
|
|
||||||
|
80
backend/models/databases/supabase/knowledge.py
Normal file
80
backend/models/databases/supabase/knowledge.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
from typing import Optional
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from models.databases.repository import Repository
|
||||||
|
from models.knowledge import Knowledge
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class CreateKnowledgeProperties(BaseModel):
|
||||||
|
brain_id: UUID
|
||||||
|
file_name: Optional[str] = None
|
||||||
|
url: Optional[str] = None
|
||||||
|
extension: str = "txt"
|
||||||
|
|
||||||
|
def dict(self, *args, **kwargs):
|
||||||
|
knowledge_dict = super().dict(*args, **kwargs)
|
||||||
|
knowledge_dict["brain_id"] = str(knowledge_dict.get("brain_id"))
|
||||||
|
return knowledge_dict
|
||||||
|
|
||||||
|
|
||||||
|
class DeleteKnowledgeResponse(BaseModel):
|
||||||
|
status: str = "delete"
|
||||||
|
knowledge_id: UUID
|
||||||
|
|
||||||
|
|
||||||
|
class Knowledges(Repository):
|
||||||
|
def __init__(self, supabase_client):
|
||||||
|
self.db = supabase_client
|
||||||
|
|
||||||
|
def insert_knowledge(self, knowledge: CreateKnowledgeProperties) -> Knowledge:
|
||||||
|
"""
|
||||||
|
Add a knowledge
|
||||||
|
"""
|
||||||
|
response = (self.db.from_("knowledge").insert(knowledge.dict()).execute()).data
|
||||||
|
return Knowledge(**response[0])
|
||||||
|
|
||||||
|
def remove_knowledge_by_id(
|
||||||
|
# todo: remove brain
|
||||||
|
self,
|
||||||
|
knowledge_id: UUID,
|
||||||
|
) -> DeleteKnowledgeResponse:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
knowledge_id (UUID): The id of the knowledge
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Status message
|
||||||
|
"""
|
||||||
|
response = (
|
||||||
|
self.db.from_("knowledge")
|
||||||
|
.delete()
|
||||||
|
.filter("id", "eq", knowledge_id)
|
||||||
|
.execute()
|
||||||
|
.data
|
||||||
|
)
|
||||||
|
|
||||||
|
if response == []:
|
||||||
|
raise HTTPException(404, "Knowledge not found")
|
||||||
|
|
||||||
|
return DeleteKnowledgeResponse(
|
||||||
|
# change to response[0].brain_id and knowledge_id[0].brain_id
|
||||||
|
status="deleted",
|
||||||
|
knowledge_id=knowledge_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_knowledge_by_id(self, knowledge_id: UUID) -> Knowledge:
|
||||||
|
"""
|
||||||
|
Get a knowledge by its id
|
||||||
|
Args:
|
||||||
|
brain_id (UUID): The id of the brain
|
||||||
|
"""
|
||||||
|
knowledge = (
|
||||||
|
self.db.from_("knowledge")
|
||||||
|
.select("*")
|
||||||
|
.filter("knowledge_id", "eq", str(knowledge_id))
|
||||||
|
.execute()
|
||||||
|
).data
|
||||||
|
|
||||||
|
return Knowledge(**knowledge[0])
|
@ -5,6 +5,7 @@ from models.databases.supabase import (
|
|||||||
BrainSubscription,
|
BrainSubscription,
|
||||||
Chats,
|
Chats,
|
||||||
File,
|
File,
|
||||||
|
Knowledges,
|
||||||
Notifications,
|
Notifications,
|
||||||
Prompts,
|
Prompts,
|
||||||
UserUsage,
|
UserUsage,
|
||||||
@ -24,6 +25,7 @@ class SupabaseDB(
|
|||||||
Vector,
|
Vector,
|
||||||
Prompts,
|
Prompts,
|
||||||
Notifications,
|
Notifications,
|
||||||
|
Knowledges,
|
||||||
):
|
):
|
||||||
def __init__(self, supabase_client):
|
def __init__(self, supabase_client):
|
||||||
self.db = supabase_client
|
self.db = supabase_client
|
||||||
@ -36,3 +38,4 @@ class SupabaseDB(
|
|||||||
Vector.__init__(self, supabase_client)
|
Vector.__init__(self, supabase_client)
|
||||||
Prompts.__init__(self, supabase_client)
|
Prompts.__init__(self, supabase_client)
|
||||||
Notifications.__init__(self, supabase_client)
|
Notifications.__init__(self, supabase_client)
|
||||||
|
Knowledges.__init__(self, supabase_client)
|
||||||
|
16
backend/models/files_in_storage.py
Normal file
16
backend/models/files_in_storage.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class FileInStorage(BaseModel):
|
||||||
|
Id: UUID
|
||||||
|
Key: str
|
||||||
|
|
||||||
|
@property
|
||||||
|
def id(self) -> UUID:
|
||||||
|
return self.Id
|
||||||
|
|
||||||
|
@property
|
||||||
|
def key(self) -> str:
|
||||||
|
return self.Key
|
12
backend/models/knowledge.py
Normal file
12
backend/models/knowledge.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from typing import Optional
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class Knowledge(BaseModel):
|
||||||
|
id: UUID
|
||||||
|
brain_id: UUID
|
||||||
|
file_name: Optional[str] = None
|
||||||
|
url: Optional[str] = None
|
||||||
|
extension: str = "txt"
|
21
backend/repository/files/generate_file_signed_url.py
Normal file
21
backend/repository/files/generate_file_signed_url.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
from multiprocessing import get_logger
|
||||||
|
|
||||||
|
from models import get_supabase_client
|
||||||
|
from supabase.client import Client
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
SIGNED_URL_EXPIRATION_PERIOD_IN_SECONDS = 600
|
||||||
|
|
||||||
|
|
||||||
|
def generate_file_signed_url(path):
|
||||||
|
supabase_client: Client = get_supabase_client()
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = supabase_client.storage.from_("quivr").create_signed_url(
|
||||||
|
path, SIGNED_URL_EXPIRATION_PERIOD_IN_SECONDS
|
||||||
|
)
|
||||||
|
logger.info("RESPONSE SIGNED URL", response)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(e)
|
17
backend/repository/files/list_files.py
Normal file
17
backend/repository/files/list_files.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from multiprocessing import get_logger
|
||||||
|
|
||||||
|
from models import get_supabase_client
|
||||||
|
from supabase.client import Client
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def list_files_from_storage(path):
|
||||||
|
supabase_client: Client = get_supabase_client()
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = supabase_client.storage.from_("quivr").list(path)
|
||||||
|
logger.info("RESPONSE", response)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(e)
|
@ -1,7 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
from multiprocessing import get_logger
|
from multiprocessing import get_logger
|
||||||
|
|
||||||
from httpx import Response
|
|
||||||
from langchain.pydantic_v1 import Field
|
from langchain.pydantic_v1 import Field
|
||||||
from langchain.schema import Document
|
from langchain.schema import Document
|
||||||
from models import get_supabase_client
|
from models import get_supabase_client
|
||||||
@ -10,7 +9,7 @@ from supabase.client import Client
|
|||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
def upload_file_storage(file, file_identifier: str) -> Response:
|
def upload_file_storage(file, file_identifier: str):
|
||||||
supabase_client: Client = get_supabase_client()
|
supabase_client: Client = get_supabase_client()
|
||||||
# res = supabase_client.storage.create_bucket("quivr")
|
# res = supabase_client.storage.create_bucket("quivr")
|
||||||
response = None
|
response = None
|
||||||
@ -20,8 +19,7 @@ def upload_file_storage(file, file_identifier: str) -> Response:
|
|||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
print(e)
|
raise e
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
class DocumentSerializable(Document):
|
class DocumentSerializable(Document):
|
||||||
|
0
backend/repository/knowledge/__init__.py
Normal file
0
backend/repository/knowledge/__init__.py
Normal file
14
backend/repository/knowledge/add_knowledge.py
Normal file
14
backend/repository/knowledge/add_knowledge.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
from logger import get_logger
|
||||||
|
from models.databases.supabase.knowledge import CreateKnowledgeProperties
|
||||||
|
from models.settings import get_supabase_db
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def add_knowledge(knowledge_to_add: CreateKnowledgeProperties):
|
||||||
|
supabase_db = get_supabase_db()
|
||||||
|
|
||||||
|
knowledge = supabase_db.insert_knowledge(knowledge_to_add)
|
||||||
|
|
||||||
|
logger.info(f"Knowledge { knowledge.id} added successfully")
|
||||||
|
return knowledge
|
@ -5,12 +5,16 @@ from auth import AuthBearer, get_current_user
|
|||||||
from celery_worker import process_crawl_and_notify
|
from celery_worker import process_crawl_and_notify
|
||||||
from crawl.crawler import CrawlWebsite
|
from crawl.crawler import CrawlWebsite
|
||||||
from fastapi import APIRouter, Depends, Query, Request
|
from fastapi import APIRouter, Depends, Query, Request
|
||||||
|
from logger import get_logger
|
||||||
from models import Brain, UserIdentity, UserUsage
|
from models import Brain, UserIdentity, UserUsage
|
||||||
|
from models.databases.supabase.knowledge import CreateKnowledgeProperties
|
||||||
from models.databases.supabase.notifications import CreateNotificationProperties
|
from models.databases.supabase.notifications import CreateNotificationProperties
|
||||||
from models.notifications import NotificationsStatusEnum
|
from models.notifications import NotificationsStatusEnum
|
||||||
|
from repository.knowledge.add_knowledge import add_knowledge
|
||||||
from repository.notification.add_notification import add_notification
|
from repository.notification.add_notification import add_notification
|
||||||
from utils.file import convert_bytes
|
from utils.file import convert_bytes
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
crawl_router = APIRouter()
|
crawl_router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
@ -64,6 +68,16 @@ async def crawl_endpoint(
|
|||||||
status=NotificationsStatusEnum.Pending,
|
status=NotificationsStatusEnum.Pending,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
knowledge_to_add = CreateKnowledgeProperties(
|
||||||
|
brain_id=brain_id,
|
||||||
|
url=crawl_website.url,
|
||||||
|
extension="html",
|
||||||
|
)
|
||||||
|
|
||||||
|
added_knowledge = add_knowledge(knowledge_to_add)
|
||||||
|
logger.info(f"Knowledge {added_knowledge} added successfully")
|
||||||
|
|
||||||
process_crawl_and_notify.delay(
|
process_crawl_and_notify.delay(
|
||||||
crawl_website_url=crawl_website.url,
|
crawl_website_url=crawl_website.url,
|
||||||
enable_summarization=enable_summarization,
|
enable_summarization=enable_summarization,
|
||||||
|
110
backend/routes/knowledge_routes.py
Normal file
110
backend/routes/knowledge_routes.py
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from auth import AuthBearer, get_current_user
|
||||||
|
from fastapi import APIRouter, Depends, Query
|
||||||
|
from logger import get_logger
|
||||||
|
from models import Brain, UserIdentity, get_supabase_db
|
||||||
|
from repository.files.generate_file_signed_url import generate_file_signed_url
|
||||||
|
from repository.files.list_files import list_files_from_storage
|
||||||
|
from routes.authorizations.brain_authorization import (
|
||||||
|
RoleEnum,
|
||||||
|
has_brain_authorization,
|
||||||
|
validate_brain_authorization,
|
||||||
|
)
|
||||||
|
|
||||||
|
knowledge_router = APIRouter()
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@knowledge_router.get(
|
||||||
|
"/knowledge/", dependencies=[Depends(AuthBearer())], tags=["Knowledge"]
|
||||||
|
)
|
||||||
|
async def list_knowledge_in_brain_endpoint(
|
||||||
|
brain_id: UUID = Query(..., description="The ID of the brain"),
|
||||||
|
current_user: UserIdentity = Depends(get_current_user),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Retrieve and list all the knowledge in a brain.
|
||||||
|
"""
|
||||||
|
|
||||||
|
validate_brain_authorization(brain_id=brain_id, user_id=current_user.id)
|
||||||
|
|
||||||
|
brain = Brain(id=brain_id)
|
||||||
|
|
||||||
|
files = list_files_from_storage(str(brain_id))
|
||||||
|
logger.info("List of files from storage", files)
|
||||||
|
|
||||||
|
# TO DO: Retrieve from Knowledge table instead of storage or vectors
|
||||||
|
unique_data = brain.get_unique_brain_files()
|
||||||
|
|
||||||
|
print("UNIQUE DATA", unique_data)
|
||||||
|
unique_data.sort(key=lambda x: int(x["size"]), reverse=True)
|
||||||
|
|
||||||
|
return {"documents": unique_data}
|
||||||
|
|
||||||
|
|
||||||
|
@knowledge_router.delete(
|
||||||
|
"/knowledge/{file_name}/",
|
||||||
|
dependencies=[
|
||||||
|
Depends(AuthBearer()),
|
||||||
|
Depends(has_brain_authorization(RoleEnum.Owner)),
|
||||||
|
],
|
||||||
|
tags=["Knowledge"],
|
||||||
|
)
|
||||||
|
async def delete_endpoint(
|
||||||
|
file_name: str,
|
||||||
|
current_user: UserIdentity = Depends(get_current_user),
|
||||||
|
brain_id: UUID = Query(..., description="The ID of the brain"),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Delete a specific user file by file name.
|
||||||
|
"""
|
||||||
|
|
||||||
|
validate_brain_authorization(brain_id=brain_id, user_id=current_user.id)
|
||||||
|
|
||||||
|
brain = Brain(id=brain_id)
|
||||||
|
brain.delete_file_from_brain(file_name)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"message": f"{file_name} of brain {brain_id} has been deleted by user {current_user.email}."
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@knowledge_router.get(
|
||||||
|
"/explore/{file_name}/signed_download_url",
|
||||||
|
dependencies=[Depends(AuthBearer())],
|
||||||
|
tags=["Knowledge"],
|
||||||
|
)
|
||||||
|
async def generate_signed_url_endpoint(
|
||||||
|
file_name: str, current_user: UserIdentity = Depends(get_current_user)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Generate a signed url to download the file from storage.
|
||||||
|
"""
|
||||||
|
# check if user has the right to get the file: add brain_id to the query
|
||||||
|
|
||||||
|
supabase_db = get_supabase_db()
|
||||||
|
response = supabase_db.get_vectors_by_file_name(file_name)
|
||||||
|
documents = response.data
|
||||||
|
|
||||||
|
if len(documents) == 0:
|
||||||
|
return {"documents": []}
|
||||||
|
|
||||||
|
related_brain_id = (
|
||||||
|
documents[0]["brains_vectors"][0]["brain_id"]
|
||||||
|
if len(documents[0]["brains_vectors"]) != 0
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
if related_brain_id is None:
|
||||||
|
raise Exception(f"File {file_name} has no brain_id associated with it")
|
||||||
|
|
||||||
|
file_path_in_storage = f"{related_brain_id}/{file_name}"
|
||||||
|
|
||||||
|
print("FILE PATH IN STORAGE", file_path_in_storage)
|
||||||
|
file_signed_url = generate_file_signed_url(file_path_in_storage)
|
||||||
|
|
||||||
|
print("FILE SIGNED URL", file_signed_url)
|
||||||
|
|
||||||
|
validate_brain_authorization(brain_id=related_brain_id, user_id=current_user.id)
|
||||||
|
|
||||||
|
return file_signed_url
|
@ -1,25 +1,27 @@
|
|||||||
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from auth import AuthBearer, get_current_user
|
from auth import AuthBearer, get_current_user
|
||||||
from celery_worker import process_file_and_notify
|
from celery_worker import process_file_and_notify
|
||||||
from fastapi import APIRouter, Depends, Query, Request, UploadFile
|
from fastapi import APIRouter, Depends, HTTPException, Query, Request, UploadFile
|
||||||
|
from logger import get_logger
|
||||||
from models import Brain, UserIdentity, UserUsage
|
from models import Brain, UserIdentity, UserUsage
|
||||||
from models.databases.supabase.notifications import (
|
from models.databases.supabase.knowledge import CreateKnowledgeProperties
|
||||||
CreateNotificationProperties,
|
from models.databases.supabase.notifications import CreateNotificationProperties
|
||||||
)
|
|
||||||
from models.notifications import NotificationsStatusEnum
|
from models.notifications import NotificationsStatusEnum
|
||||||
from repository.brain import get_brain_details
|
from repository.brain import get_brain_details
|
||||||
from repository.files.upload_file import upload_file_storage
|
from repository.files.upload_file import upload_file_storage
|
||||||
|
from repository.knowledge.add_knowledge import add_knowledge
|
||||||
from repository.notification.add_notification import add_notification
|
from repository.notification.add_notification import add_notification
|
||||||
from repository.user_identity import get_user_identity
|
from repository.user_identity import get_user_identity
|
||||||
from utils.file import convert_bytes, get_file_size
|
|
||||||
|
|
||||||
from routes.authorizations.brain_authorization import (
|
from routes.authorizations.brain_authorization import (
|
||||||
RoleEnum,
|
RoleEnum,
|
||||||
validate_brain_authorization,
|
validate_brain_authorization,
|
||||||
)
|
)
|
||||||
|
from utils.file import convert_bytes, get_file_size
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
upload_router = APIRouter()
|
upload_router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
@ -78,10 +80,34 @@ async def upload_file(
|
|||||||
openai_api_key = get_user_identity(current_user.id).openai_api_key
|
openai_api_key = get_user_identity(current_user.id).openai_api_key
|
||||||
|
|
||||||
file_content = await uploadFile.read()
|
file_content = await uploadFile.read()
|
||||||
# filename_with_brain_id = str(brain_id) + "/" + str(uploadFile.filename)
|
|
||||||
filename_with_brain_id = str(brain_id) + "/" + str(uploadFile.filename)
|
filename_with_brain_id = str(brain_id) + "/" + str(uploadFile.filename)
|
||||||
|
|
||||||
upload_file_storage(file_content, filename_with_brain_id)
|
try:
|
||||||
|
fileInStorage = upload_file_storage(file_content, filename_with_brain_id)
|
||||||
|
logger.info(f"File {fileInStorage} uploaded successfully")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if "The resource already exists" in str(e):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=403,
|
||||||
|
detail=f"File {uploadFile.filename} already exists in storage.",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500, detail="Failed to upload file to storage."
|
||||||
|
)
|
||||||
|
|
||||||
|
knowledge_to_add = CreateKnowledgeProperties(
|
||||||
|
brain_id=brain_id,
|
||||||
|
file_name=uploadFile.filename,
|
||||||
|
extension=os.path.splitext(
|
||||||
|
uploadFile.filename # pyright: ignore reportPrivateUsage=none
|
||||||
|
)[-1].lower(),
|
||||||
|
)
|
||||||
|
|
||||||
|
added_knowledge = add_knowledge(knowledge_to_add)
|
||||||
|
logger.info(f"Knowledge {added_knowledge} added successfully")
|
||||||
|
|
||||||
process_file_and_notify.delay(
|
process_file_and_notify.delay(
|
||||||
file_name=filename_with_brain_id,
|
file_name=filename_with_brain_id,
|
||||||
file_original_name=uploadFile.filename,
|
file_original_name=uploadFile.filename,
|
||||||
|
29
scripts/202309151054032_add_knowledge_tables.sql
Normal file
29
scripts/202309151054032_add_knowledge_tables.sql
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
BEGIN;
|
||||||
|
|
||||||
|
-- knowledge table
|
||||||
|
CREATE TABLE IF NOT EXISTS knowledge (
|
||||||
|
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
||||||
|
file_name TEXT,
|
||||||
|
url TEXT,
|
||||||
|
brain_id UUID NOT NULL REFERENCES brains(brain_id),
|
||||||
|
extension TEXT NOT NULL,
|
||||||
|
CHECK ((file_name IS NOT NULL AND url IS NULL) OR (file_name IS NULL AND url IS NOT NULL))
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
-- knowledge_vectors table
|
||||||
|
CREATE TABLE IF NOT EXISTS knowledge_vectors (
|
||||||
|
knowledge_id UUID NOT NULL REFERENCES knowledge(id),
|
||||||
|
vector_id UUID NOT NULL REFERENCES vectors(id),
|
||||||
|
embedding_model TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (knowledge_id, vector_id, embedding_model)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Update migrations table
|
||||||
|
INSERT INTO migrations (name)
|
||||||
|
SELECT '202309151054032_add_knowledge_tables'
|
||||||
|
WHERE NOT EXISTS (
|
||||||
|
SELECT 1 FROM migrations WHERE name = '202309151054032_add_knowledge_tables'
|
||||||
|
);
|
||||||
|
|
||||||
|
COMMIT;
|
@ -235,6 +235,26 @@ CREATE TABLE IF NOT EXISTS user_settings (
|
|||||||
max_brain_size INT DEFAULT 1000000
|
max_brain_size INT DEFAULT 1000000
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- knowledge table
|
||||||
|
CREATE TABLE IF NOT EXISTS knowledge (
|
||||||
|
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
||||||
|
file_name TEXT,
|
||||||
|
url TEXT,
|
||||||
|
brain_id UUID NOT NULL REFERENCES brains(brain_id),
|
||||||
|
extension TEXT NOT NULL,
|
||||||
|
CHECK ((file_name IS NOT NULL AND url IS NULL) OR (file_name IS NULL AND url IS NOT NULL))
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
-- knowledge_vectors table
|
||||||
|
CREATE TABLE IF NOT EXISTS knowledge_vectors (
|
||||||
|
knowledge_id UUID NOT NULL REFERENCES knowledge(id),
|
||||||
|
vector_id UUID NOT NULL REFERENCES vectors(id),
|
||||||
|
embedding_model TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (knowledge_id, vector_id, embedding_model)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
insert into
|
insert into
|
||||||
storage.buckets (id, name)
|
storage.buckets (id, name)
|
||||||
values
|
values
|
||||||
@ -249,9 +269,9 @@ CREATE POLICY "Access Quivr Storage 1jccrwz_2" ON storage.objects FOR UPDATE TO
|
|||||||
CREATE POLICY "Access Quivr Storage 1jccrwz_3" ON storage.objects FOR DELETE TO anon USING (bucket_id = 'quivr');
|
CREATE POLICY "Access Quivr Storage 1jccrwz_3" ON storage.objects FOR DELETE TO anon USING (bucket_id = 'quivr');
|
||||||
|
|
||||||
INSERT INTO migrations (name)
|
INSERT INTO migrations (name)
|
||||||
SELECT '202309157004032_add_sha1_column'
|
SELECT '202309151054032_add_knowledge_tables'
|
||||||
WHERE NOT EXISTS (
|
WHERE NOT EXISTS (
|
||||||
SELECT 1 FROM migrations WHERE name = '202309157004032_add_sha1_column'
|
SELECT 1 FROM migrations WHERE name = '202309151054032_add_knowledge_tables'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user