2023-09-14 12:56:59 +03:00
|
|
|
import asyncio
|
|
|
|
import io
|
|
|
|
import os
|
2024-04-07 04:35:57 +03:00
|
|
|
from datetime import datetime, timezone
|
2023-09-14 12:56:59 +03:00
|
|
|
|
2023-10-16 17:11:34 +03:00
|
|
|
from celery.schedules import crontab
|
2024-02-06 08:02:46 +03:00
|
|
|
from celery_config import celery
|
2023-09-14 12:56:59 +03:00
|
|
|
from fastapi import UploadFile
|
2023-12-14 01:08:15 +03:00
|
|
|
from logger import get_logger
|
2023-09-14 12:56:59 +03:00
|
|
|
from models.files import File
|
|
|
|
from models.settings import get_supabase_client
|
2024-02-06 08:02:46 +03:00
|
|
|
from modules.brain.integrations.Notion.Notion_connector import NotionConnector
|
2024-02-28 08:30:25 +03:00
|
|
|
from modules.brain.repository.integration_brains import IntegrationBrain
|
2023-12-01 00:29:28 +03:00
|
|
|
from modules.brain.service.brain_service import BrainService
|
2024-02-28 08:30:25 +03:00
|
|
|
from modules.brain.service.brain_vector_service import BrainVectorService
|
2023-11-28 16:27:39 +03:00
|
|
|
from modules.notification.dto.inputs import NotificationUpdatableProperties
|
|
|
|
from modules.notification.entity.notification import NotificationsStatusEnum
|
|
|
|
from modules.notification.service.notification_service import NotificationService
|
2023-11-24 12:25:02 +03:00
|
|
|
from modules.onboarding.service.onboarding_service import OnboardingService
|
2023-11-14 16:31:02 +03:00
|
|
|
from packages.files.crawl.crawler import CrawlWebsite
|
2023-11-14 11:52:44 +03:00
|
|
|
from packages.files.parsers.github import process_github
|
|
|
|
from packages.files.processors import filter_file
|
2024-04-25 17:22:13 +03:00
|
|
|
from packages.utils.telemetry import maybe_send_telemetry
|
2023-09-14 12:56:59 +03:00
|
|
|
|
2023-12-14 01:08:15 +03:00
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
2023-11-24 12:25:02 +03:00
|
|
|
onboardingService = OnboardingService()
|
2023-11-28 16:27:39 +03:00
|
|
|
notification_service = NotificationService()
|
2023-12-01 00:29:28 +03:00
|
|
|
brain_service = BrainService()
|
2023-10-16 17:11:34 +03:00
|
|
|
|
2023-09-14 12:56:59 +03:00
|
|
|
|
|
|
|
@celery.task(name="process_file_and_notify")
|
|
|
|
def process_file_and_notify(
|
|
|
|
file_name: str,
|
2023-09-18 22:28:07 +03:00
|
|
|
file_original_name: str,
|
2023-09-14 12:56:59 +03:00
|
|
|
brain_id,
|
|
|
|
notification_id=None,
|
2024-02-06 08:02:46 +03:00
|
|
|
integration=None,
|
2024-02-28 08:30:25 +03:00
|
|
|
delete_file=False,
|
2023-11-28 16:27:39 +03:00
|
|
|
):
|
|
|
|
try:
|
2023-11-27 19:36:46 +03:00
|
|
|
supabase_client = get_supabase_client()
|
|
|
|
tmp_file_name = "tmp-file-" + file_name
|
|
|
|
tmp_file_name = tmp_file_name.replace("/", "_")
|
|
|
|
|
|
|
|
with open(tmp_file_name, "wb+") as f:
|
|
|
|
res = supabase_client.storage.from_("quivr").download(file_name)
|
|
|
|
f.write(res)
|
|
|
|
f.seek(0)
|
|
|
|
file_content = f.read()
|
2023-09-14 12:56:59 +03:00
|
|
|
|
2023-11-27 19:36:46 +03:00
|
|
|
upload_file = UploadFile(
|
|
|
|
file=f, filename=file_name.split("/")[-1], size=len(file_content)
|
2023-09-14 12:56:59 +03:00
|
|
|
)
|
|
|
|
|
2023-11-27 19:36:46 +03:00
|
|
|
file_instance = File(file=upload_file)
|
|
|
|
loop = asyncio.get_event_loop()
|
2024-02-28 08:30:25 +03:00
|
|
|
brain_vector_service = BrainVectorService(brain_id)
|
|
|
|
if delete_file: # TODO fix bug
|
|
|
|
brain_vector_service.delete_file_from_brain(
|
|
|
|
file_original_name, only_vectors=True
|
|
|
|
)
|
2023-11-27 19:36:46 +03:00
|
|
|
message = loop.run_until_complete(
|
|
|
|
filter_file(
|
|
|
|
file=file_instance,
|
|
|
|
brain_id=brain_id,
|
|
|
|
original_file_name=file_original_name,
|
|
|
|
)
|
|
|
|
)
|
2023-09-14 12:56:59 +03:00
|
|
|
|
2023-11-27 19:36:46 +03:00
|
|
|
f.close()
|
|
|
|
os.remove(tmp_file_name)
|
|
|
|
|
|
|
|
if notification_id:
|
2024-05-01 21:11:12 +03:00
|
|
|
|
2023-11-28 16:27:39 +03:00
|
|
|
notification_service.update_notification_by_id(
|
2023-11-27 19:36:46 +03:00
|
|
|
notification_id,
|
|
|
|
NotificationUpdatableProperties(
|
2024-05-01 21:11:12 +03:00
|
|
|
status=NotificationsStatusEnum.SUCCESS,
|
|
|
|
description="Your file has been properly uploaded!",
|
2023-11-27 19:36:46 +03:00
|
|
|
),
|
|
|
|
)
|
2023-12-01 00:29:28 +03:00
|
|
|
brain_service.update_brain_last_update_time(brain_id)
|
2023-11-27 19:36:46 +03:00
|
|
|
|
|
|
|
return True
|
2023-12-14 01:08:15 +03:00
|
|
|
except TimeoutError:
|
|
|
|
logger.error("TimeoutError")
|
|
|
|
|
2023-11-27 19:36:46 +03:00
|
|
|
except Exception as e:
|
2023-11-28 16:27:39 +03:00
|
|
|
notification_service.update_notification_by_id(
|
2023-11-27 19:36:46 +03:00
|
|
|
notification_id,
|
|
|
|
NotificationUpdatableProperties(
|
2024-05-01 21:11:12 +03:00
|
|
|
status=NotificationsStatusEnum.ERROR,
|
|
|
|
description=f"An error occurred while processing the file: {e}",
|
2023-11-27 19:36:46 +03:00
|
|
|
),
|
2023-11-28 16:27:39 +03:00
|
|
|
)
|
2024-05-01 21:11:12 +03:00
|
|
|
return False
|
2023-09-22 12:44:09 +03:00
|
|
|
|
2023-09-14 12:56:59 +03:00
|
|
|
|
|
|
|
@celery.task(name="process_crawl_and_notify")
|
|
|
|
def process_crawl_and_notify(
|
|
|
|
crawl_website_url,
|
|
|
|
brain_id,
|
|
|
|
notification_id=None,
|
|
|
|
):
|
|
|
|
crawl_website = CrawlWebsite(url=crawl_website_url)
|
|
|
|
|
|
|
|
if not crawl_website.checkGithub():
|
|
|
|
file_path, file_name = crawl_website.process()
|
|
|
|
|
|
|
|
with open(file_path, "rb") as f:
|
|
|
|
file_content = f.read()
|
|
|
|
|
|
|
|
# Create a file-like object in memory using BytesIO
|
|
|
|
file_object = io.BytesIO(file_content)
|
|
|
|
upload_file = UploadFile(
|
|
|
|
file=file_object, filename=file_name, size=len(file_content)
|
|
|
|
)
|
|
|
|
file_instance = File(file=upload_file)
|
|
|
|
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
message = loop.run_until_complete(
|
|
|
|
filter_file(
|
|
|
|
file=file_instance,
|
|
|
|
brain_id=brain_id,
|
2023-09-18 22:28:07 +03:00
|
|
|
original_file_name=crawl_website_url,
|
2023-09-14 12:56:59 +03:00
|
|
|
)
|
|
|
|
)
|
2024-05-08 17:20:35 +03:00
|
|
|
notification_service.update_notification_by_id(
|
|
|
|
notification_id,
|
|
|
|
NotificationUpdatableProperties(
|
|
|
|
status=NotificationsStatusEnum.SUCCESS,
|
|
|
|
description=f"Your URL has been properly crawled!",
|
|
|
|
),
|
|
|
|
)
|
2023-09-14 12:56:59 +03:00
|
|
|
else:
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
message = loop.run_until_complete(
|
|
|
|
process_github(
|
|
|
|
repo=crawl_website.url,
|
|
|
|
brain_id=brain_id,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
if notification_id:
|
2023-11-28 16:27:39 +03:00
|
|
|
notification_service.update_notification_by_id(
|
2023-09-14 12:56:59 +03:00
|
|
|
notification_id,
|
|
|
|
NotificationUpdatableProperties(
|
2024-05-01 21:11:12 +03:00
|
|
|
status=NotificationsStatusEnum.SUCCESS,
|
|
|
|
description="Your file has been properly uploaded!",
|
2023-09-14 12:56:59 +03:00
|
|
|
),
|
|
|
|
)
|
2024-05-08 17:42:31 +03:00
|
|
|
|
2023-12-02 20:08:22 +03:00
|
|
|
brain_service.update_brain_last_update_time(brain_id)
|
2023-09-14 12:56:59 +03:00
|
|
|
return True
|
2023-10-16 17:11:34 +03:00
|
|
|
|
|
|
|
|
|
|
|
@celery.task
|
|
|
|
def remove_onboarding_more_than_x_days_task():
|
2023-11-24 12:25:02 +03:00
|
|
|
onboardingService.remove_onboarding_more_than_x_days(7)
|
2023-10-16 17:11:34 +03:00
|
|
|
|
|
|
|
|
2024-02-06 08:02:46 +03:00
|
|
|
@celery.task(name="NotionConnectorLoad")
|
|
|
|
def process_integration_brain_created_initial_load(brain_id, user_id):
|
|
|
|
notion_connector = NotionConnector(brain_id=brain_id, user_id=user_id)
|
|
|
|
|
2024-02-28 08:30:25 +03:00
|
|
|
pages = notion_connector.load()
|
2024-02-06 08:02:46 +03:00
|
|
|
|
|
|
|
print("pages: ", len(pages))
|
2024-02-28 08:30:25 +03:00
|
|
|
|
|
|
|
|
|
|
|
@celery.task
|
|
|
|
def process_integration_brain_sync_user_brain(brain_id, user_id):
|
|
|
|
notion_connector = NotionConnector(brain_id=brain_id, user_id=user_id)
|
|
|
|
|
|
|
|
notion_connector.poll()
|
|
|
|
|
|
|
|
|
2024-04-25 17:22:13 +03:00
|
|
|
@celery.task
|
|
|
|
def ping_telemetry():
|
|
|
|
maybe_send_telemetry("ping", {"ping": "pong"})
|
|
|
|
|
|
|
|
|
2024-02-28 08:30:25 +03:00
|
|
|
@celery.task
|
|
|
|
def process_integration_brain_sync():
|
|
|
|
integration = IntegrationBrain()
|
|
|
|
integrations = integration.get_integration_brain_by_type_integration("notion")
|
|
|
|
|
|
|
|
time = datetime.now(timezone.utc) # Make `time` timezone-aware
|
|
|
|
# last_synced is a string that represents a timestampz in the database
|
|
|
|
# only call process_integration_brain_sync_user_brain if more than 1 day has passed since the last sync
|
2024-04-02 04:40:56 +03:00
|
|
|
if not integrations:
|
|
|
|
return
|
2024-04-07 04:35:57 +03:00
|
|
|
# TODO fix this
|
|
|
|
# for integration in integrations:
|
|
|
|
# print(f"last_synced: {integration.last_synced}")
|
|
|
|
# print(f"Integration Name: {integration.name}")
|
|
|
|
# last_synced = datetime.strptime(
|
|
|
|
# integration.last_synced, "%Y-%m-%dT%H:%M:%S.%f%z"
|
|
|
|
# )
|
|
|
|
# if last_synced < time - timedelta(hours=12) and integration.name == "notion":
|
|
|
|
# process_integration_brain_sync_user_brain.delay(
|
|
|
|
# brain_id=integration.brain_id, user_id=integration.user_id
|
|
|
|
# )
|
2024-02-28 08:30:25 +03:00
|
|
|
|
|
|
|
|
|
|
|
celery.conf.beat_schedule = {
|
|
|
|
"remove_onboarding_more_than_x_days_task": {
|
|
|
|
"task": f"{__name__}.remove_onboarding_more_than_x_days_task",
|
|
|
|
"schedule": crontab(minute="0", hour="0"),
|
|
|
|
},
|
|
|
|
"process_integration_brain_sync": {
|
|
|
|
"task": f"{__name__}.process_integration_brain_sync",
|
|
|
|
"schedule": crontab(minute="*/5", hour="*"),
|
|
|
|
},
|
2024-04-25 17:22:13 +03:00
|
|
|
"ping_telemetry": {
|
|
|
|
"task": f"{__name__}.ping_telemetry",
|
|
|
|
"schedule": crontab(minute="*/30", hour="*"),
|
|
|
|
},
|
2024-02-28 08:30:25 +03:00
|
|
|
}
|