mirror of
https://github.com/StanGirard/quivr.git
synced 2024-07-15 00:40:22 +03:00
fix: 🐛 crawler (#1735)
fixed # Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate):
This commit is contained in:
parent
10e94e5a91
commit
7ff081cf40
@ -57,50 +57,68 @@ def process_file_and_notify(
|
||||
file_original_name: str,
|
||||
brain_id,
|
||||
notification_id=None,
|
||||
):
|
||||
supabase_client = get_supabase_client()
|
||||
tmp_file_name = "tmp-file-" + file_name
|
||||
tmp_file_name = tmp_file_name.replace("/", "_")
|
||||
):
|
||||
try:
|
||||
supabase_client = get_supabase_client()
|
||||
tmp_file_name = "tmp-file-" + file_name
|
||||
tmp_file_name = tmp_file_name.replace("/", "_")
|
||||
|
||||
with open(tmp_file_name, "wb+") as f:
|
||||
res = supabase_client.storage.from_("quivr").download(file_name)
|
||||
f.write(res)
|
||||
f.seek(0)
|
||||
file_content = f.read()
|
||||
with open(tmp_file_name, "wb+") as f:
|
||||
res = supabase_client.storage.from_("quivr").download(file_name)
|
||||
f.write(res)
|
||||
f.seek(0)
|
||||
file_content = f.read()
|
||||
|
||||
upload_file = UploadFile(
|
||||
file=f, filename=file_name.split("/")[-1], size=len(file_content)
|
||||
)
|
||||
|
||||
file_instance = File(file=upload_file)
|
||||
loop = asyncio.get_event_loop()
|
||||
message = loop.run_until_complete(
|
||||
filter_file(
|
||||
file=file_instance,
|
||||
brain_id=brain_id,
|
||||
original_file_name=file_original_name,
|
||||
upload_file = UploadFile(
|
||||
file=f, filename=file_name.split("/")[-1], size=len(file_content)
|
||||
)
|
||||
)
|
||||
|
||||
f.close()
|
||||
os.remove(tmp_file_name)
|
||||
|
||||
if notification_id:
|
||||
notification_message = {
|
||||
"status": message["type"],
|
||||
"message": message["message"],
|
||||
"name": file_instance.file.filename if file_instance.file else "",
|
||||
}
|
||||
update_notification_by_id(
|
||||
notification_id,
|
||||
NotificationUpdatableProperties(
|
||||
status=NotificationsStatusEnum.Done,
|
||||
message=str(notification_message),
|
||||
),
|
||||
file_instance = File(file=upload_file)
|
||||
loop = asyncio.get_event_loop()
|
||||
message = loop.run_until_complete(
|
||||
filter_file(
|
||||
file=file_instance,
|
||||
brain_id=brain_id,
|
||||
original_file_name=file_original_name,
|
||||
)
|
||||
)
|
||||
update_brain_last_update_time(brain_id)
|
||||
|
||||
return True
|
||||
f.close()
|
||||
os.remove(tmp_file_name)
|
||||
|
||||
if notification_id:
|
||||
notification_message = {
|
||||
"status": message["type"],
|
||||
"message": message["message"],
|
||||
"name": file_instance.file.filename if file_instance.file else "",
|
||||
}
|
||||
update_notification_by_id(
|
||||
notification_id,
|
||||
NotificationUpdatableProperties(
|
||||
status=NotificationsStatusEnum.Done,
|
||||
message=str(notification_message),
|
||||
),
|
||||
)
|
||||
update_brain_last_update_time(brain_id)
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
notification_message = {
|
||||
"status": "error",
|
||||
"message": "There was an error uploading the file. Please check the file and try again. If the issue persist, please open an issue on Github",
|
||||
"name": file_instance.file.filename if file_instance.file else "",
|
||||
}
|
||||
update_notification_by_id(
|
||||
notification_id,
|
||||
NotificationUpdatableProperties(
|
||||
status=NotificationsStatusEnum.Done,
|
||||
message=str(notification_message),
|
||||
),
|
||||
)
|
||||
raise e
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@celery.task(name="process_crawl_and_notify")
|
||||
|
@ -31,4 +31,4 @@ async def process_file(
|
||||
brain_id, doc_with_metadata.to_json(), file.file_sha1
|
||||
)
|
||||
|
||||
return "Hello World!"
|
||||
return len(file.documents)
|
||||
|
@ -81,12 +81,17 @@ async def filter_file(
|
||||
|
||||
if file.file_extension in file_processors:
|
||||
try:
|
||||
await file_processors[file.file_extension](
|
||||
result = await file_processors[file.file_extension](
|
||||
file=file,
|
||||
brain_id=brain_id,
|
||||
)
|
||||
if result is None or result == 0:
|
||||
return create_response(
|
||||
f"? {using_file_name} has been uploaded to brain. There might have been an error while reading it, please make sure the file is not illformed or just an image", # pyright: ignore reportPrivateUsage=none
|
||||
"warning",
|
||||
)
|
||||
return create_response(
|
||||
f"✅ {using_file_name} has been uploaded to brain {brain.name}.", # pyright: ignore reportPrivateUsage=none
|
||||
f"✅ {using_file_name} has been uploaded to brain {brain.name} in {result} chunks", # pyright: ignore reportPrivateUsage=none
|
||||
"success",
|
||||
)
|
||||
except Exception as e:
|
||||
|
@ -15,6 +15,10 @@ from packages.files.file import convert_bytes, get_file_size
|
||||
from repository.knowledge.add_knowledge import add_knowledge
|
||||
from repository.files.upload_file import upload_file_storage
|
||||
from repository.notification.add_notification import add_notification
|
||||
from repository.notification.update_notification import update_notification_by_id
|
||||
from models.databases.supabase.notifications import NotificationUpdatableProperties
|
||||
|
||||
|
||||
|
||||
from routes.authorizations.brain_authorization import (
|
||||
RoleEnum,
|
||||
@ -73,6 +77,19 @@ async def upload_file(
|
||||
logger.info(f"File {file_in_storage} uploaded successfully")
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
notification_message = {
|
||||
"status": "error",
|
||||
"message": "There was an error uploading the file. Please check the file and try again. If the issue persist, please open an issue on Github",
|
||||
"name": uploadFile.filename if uploadFile else "Last Upload File",
|
||||
}
|
||||
update_notification_by_id(
|
||||
upload_notification.id,
|
||||
NotificationUpdatableProperties(
|
||||
status=NotificationsStatusEnum.Done,
|
||||
message=str(notification_message),
|
||||
),
|
||||
)
|
||||
if "The resource already exists" in str(e):
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
|
@ -1,7 +1,7 @@
|
||||
CREATE OR REPLACE FUNCTION update_max_brains_theodo() RETURNS TRIGGER AS $$
|
||||
DECLARE
|
||||
userEmail TEXT;
|
||||
allowedDomains TEXT[] := ARRAY['%@theodo.fr', '%@theodo.com', '%@theodo.co.uk', '%@bam.tech', '%@padok.fr', '%@sicara.com', '%@hokla.com', '%@sipios.com'];
|
||||
allowedDomains TEXT[] := ARRAY['%@theodo.fr', '%@theodo.com', '%@theodo.co.uk', '%@bam.tech', '%@padok.fr', '%@aleios.com', '%@sicara.com', '%@hokla.com', '%@sipios.com'];
|
||||
BEGIN
|
||||
SELECT email INTO userEmail FROM auth.users WHERE id = NEW.user_id;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user