feat(integrations): integration with Notion in the backend (#2123)

moved to brains

# Description

Please include a summary of the changes and the related issue. Please
also include relevant motivation and context.

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):

---------

Co-authored-by: Antoine Dewez <44063631+Zewed@users.noreply.github.com>
This commit is contained in:
Stan Girard 2024-02-05 21:02:46 -08:00 committed by GitHub
parent 7bed81513f
commit 4d91d1cadc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
84 changed files with 1959 additions and 1869 deletions

3
.gitignore vendored
View File

@ -76,3 +76,6 @@ supabase/migrations/20240103191539_private.sql
supabase/20240103191539_private.sql
paulgraham.py
.env_test
supabase/seed-airwallex.sql
airwallexpayouts.py
application.log

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "frontend/app/platform"]
path = frontend/app/platform
url = https://github.com/StanGirard/platform.git

8
.idea/.gitignore vendored
View File

@ -1,8 +0,0 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -1,21 +0,0 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N803" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="bytes.__await__" />
</list>
</option>
</inspection_tool>
<inspection_tool class="TsLint" enabled="true" level="WARNING" enabled_by_default="true" />
</profile>
</component>

View File

@ -1,6 +0,0 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

View File

@ -1,4 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11" project-jdk-type="Python SDK" />
</project>

View File

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/quivr-llm.iml" filepath="$PROJECT_DIR$/.idea/quivr-llm.iml" />
</modules>
</component>
</project>

View File

@ -1,15 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/backend" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/backend/core" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.11" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>

View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@ -1,5 +0,0 @@
DB_HOST=localhost
DB_NAME=postgres
DB_PORT=5432
DB_USER=postgres
DB_PASSWORD=your-super-secret-and-long-postgres-password

View File

@ -54,5 +54,6 @@
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.envFile": "${workspaceFolder}/.env_test",
}

View File

@ -4,8 +4,8 @@ verify_ssl = true
name = "pypi"
[packages]
langchain = "==0.0.341"
litellm = "==1.7.7"
langchain = "*"
litellm = "*"
openai = "==1.1.1"
gitpython = "==3.1.36"
pdf2image = "==1.16.3"
@ -44,6 +44,8 @@ posthog = "==3.1.0"
jq = "==1.6.0"
pytest = "*"
ddtrace = "*"
watchdog = "*"
langchain-community = "*"
[dev-packages]
black = "*"

862
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -23,8 +23,6 @@ RUN apt-get clean && apt-get update && apt-get install -y \
# Add Rust binaries to the PATH
ENV PATH="/root/.cargo/bin:${PATH}"
WORKDIR /code
# Copy just the requirements first
COPY ./requirements.txt .

38
backend/celery_config.py Normal file
View File

@ -0,0 +1,38 @@
# celery_config.py
import os
from celery import Celery
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "")
CELERY_BROKER_QUEUE_NAME = os.getenv("CELERY_BROKER_QUEUE_NAME", "quivr")
celery = Celery(__name__)
if CELERY_BROKER_URL.startswith("sqs"):
broker_transport_options = {
CELERY_BROKER_QUEUE_NAME: {
"my-q": {
"url": CELERY_BROKER_URL,
}
}
}
celery = Celery(
__name__,
broker=CELERY_BROKER_URL,
task_serializer="json",
task_concurrency=4,
worker_prefetch_multiplier=1,
broker_transport_options=broker_transport_options,
)
celery.conf.task_default_queue = CELERY_BROKER_QUEUE_NAME
elif CELERY_BROKER_URL.startswith("redis"):
celery = Celery(
__name__,
broker=CELERY_BROKER_URL,
backend=CELERY_BROKER_URL,
task_concurrency=4,
worker_prefetch_multiplier=1,
task_serializer="json",
)
else:
raise ValueError(f"Unsupported broker URL: {CELERY_BROKER_URL}")

View File

@ -2,13 +2,13 @@ import asyncio
import io
import os
import sentry_sdk
from celery import Celery
from celery.schedules import crontab
from celery_config import celery
from fastapi import UploadFile
from logger import get_logger
from models.files import File
from models.settings import get_supabase_client
from modules.brain.integrations.Notion.Notion_connector import NotionConnector
from modules.brain.service.brain_service import BrainService
from modules.notification.dto.inputs import NotificationUpdatableProperties
from modules.notification.entity.notification import NotificationsStatusEnum
@ -20,50 +20,10 @@ from packages.files.processors import filter_file
logger = get_logger(__name__)
sentry_dsn = os.getenv("SENTRY_DSN")
if sentry_dsn:
sentry_sdk.init(
dsn=sentry_dsn,
sample_rate=0.1,
enable_tracing=True,
)
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "")
CELERY_BROKER_QUEUE_NAME = os.getenv("CELERY_BROKER_QUEUE_NAME", "quivr")
onboardingService = OnboardingService()
notification_service = NotificationService()
brain_service = BrainService()
if CELERY_BROKER_URL.startswith("sqs"):
broker_transport_options = {
CELERY_BROKER_QUEUE_NAME: {
"my-q": {
"url": CELERY_BROKER_URL,
}
}
}
celery = Celery(
__name__,
broker=CELERY_BROKER_URL,
task_serializer="json",
task_concurrency=4,
worker_prefetch_multiplier=1,
broker_transport_options=broker_transport_options,
)
celery.conf.task_default_queue = CELERY_BROKER_QUEUE_NAME
elif CELERY_BROKER_URL.startswith("redis"):
celery = Celery(
__name__,
broker=CELERY_BROKER_URL,
backend=CELERY_BROKER_URL,
task_concurrency=4,
worker_prefetch_multiplier=1,
task_serializer="json",
)
else:
raise ValueError(f"Unsupported broker URL: {CELERY_BROKER_URL}")
@celery.task(name="process_file_and_notify")
def process_file_and_notify(
@ -71,6 +31,7 @@ def process_file_and_notify(
file_original_name: str,
brain_id,
notification_id=None,
integration=None,
):
try:
supabase_client = get_supabase_client()
@ -201,3 +162,12 @@ celery.conf.beat_schedule = {
"schedule": crontab(minute="0", hour="0"),
},
}
@celery.task(name="NotionConnectorLoad")
def process_integration_brain_created_initial_load(brain_id, user_id):
notion_connector = NotionConnector(brain_id=brain_id, user_id=user_id)
pages = notion_connector.compile_all_pages()
print("pages: ", len(pages))

View File

@ -1,4 +1 @@
from .knowledge_brain_qa import KnowledgeBrainQA
from .qa_headless import HeadlessQA
__all__ = ["KnowledgeBrainQA", "HeadlessQA"]

View File

@ -1,17 +1,26 @@
import logging
from logging.handlers import RotatingFileHandler
def get_logger(logger_name, log_level=logging.INFO):
def get_logger(logger_name, log_level=logging.INFO, log_file="application.log"):
logger = logging.getLogger(logger_name)
logger.setLevel(log_level)
logger.propagate = False # Prevent log propagation to avoid double logging
formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
formatter = logging.Formatter(
"%(asctime)s [%(levelname)s] %(name)s [%(lineno)d]: %(message)s"
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
file_handler = RotatingFileHandler(
log_file, maxBytes=5000000, backupCount=5
) # 5MB file
file_handler.setFormatter(formatter)
if not logger.handlers:
logger.addHandler(console_handler)
logger.addHandler(file_handler)
return logger

View File

@ -7,7 +7,7 @@ class LLMModels(BaseModel):
BaseModel (BaseModel): Pydantic BaseModel
"""
name: str = "gpt-3.5-turbo-1106"
name: str = "gpt-3.5-turbo-0125"
price: int = 1
max_input: int = 512
max_output: int = 512

View File

@ -6,13 +6,13 @@ import jq
import requests
from fastapi import HTTPException
from litellm import completion
from llm.knowledge_brain_qa import KnowledgeBrainQA
from llm.qa_interface import QAInterface
from llm.utils.call_brain_api import call_brain_api
from llm.utils.get_api_brain_definition_as_json_schema import (
get_api_brain_definition_as_json_schema,
)
from logger import get_logger
from modules.brain.knowledge_brain_qa import KnowledgeBrainQA
from modules.brain.qa_interface import QAInterface
from modules.brain.service.brain_service import BrainService
from modules.chat.dto.chats import ChatQuestion
from modules.chat.dto.inputs import CreateChatHistory
@ -27,7 +27,7 @@ logger = get_logger(__name__)
class UUIDEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, uuid.UUID):
if isinstance(obj, UUID):
# if the object is uuid, we simply return the value of uuid
return str(obj)
return super().default(obj)

View File

@ -4,11 +4,11 @@ from uuid import UUID
from fastapi import HTTPException
from litellm import completion
from llm.api_brain_qa import APIBrainQA
from llm.knowledge_brain_qa import KnowledgeBrainQA
from llm.qa_headless import HeadlessQA
from logger import get_logger
from modules.brain.api_brain_qa import APIBrainQA
from modules.brain.entity.brain_entity import BrainEntity, BrainType
from modules.brain.knowledge_brain_qa import KnowledgeBrainQA
from modules.brain.qa_headless import HeadlessQA
from modules.brain.service.brain_service import BrainService
from modules.chat.dto.chats import ChatQuestion
from modules.chat.dto.inputs import CreateChatHistory
@ -135,9 +135,9 @@ class CompositeBrainQA(
"user_message": question.question,
"assistant": response.assistant,
"message_time": new_chat.message_time,
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": brain.name,
"message_id": new_chat.message_id,
"brain_id": str(brain.id),
@ -149,9 +149,9 @@ class CompositeBrainQA(
"user_message": question.question,
"assistant": response.assistant,
"message_time": None,
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": brain.name,
"message_id": None,
"brain_id": str(brain.id),
@ -169,9 +169,9 @@ class CompositeBrainQA(
tools.append(format_brain_to_tool(connected_brain))
available_functions[
connected_brain_id
] = self.get_answer_generator_from_brain_type(connected_brain)
available_functions[connected_brain_id] = (
self.get_answer_generator_from_brain_type(connected_brain)
)
connected_brains_details[str(connected_brain.id)] = connected_brain
@ -194,7 +194,7 @@ class CompositeBrainQA(
messages.append({"role": "user", "content": question.question})
response = completion(
model="gpt-3.5-turbo-1106",
model="gpt-3.5-turbo-0125",
messages=messages,
tools=tools,
tool_choice="auto",
@ -231,9 +231,9 @@ class CompositeBrainQA(
"user_message": question.question,
"assistant": brain_completion_output.response.message.content,
"message_time": new_chat.message_time if new_chat else None,
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": brain.name if brain else None,
"message_id": new_chat.message_id if new_chat else None,
"brain_id": str(brain.id) if brain else None,
@ -313,7 +313,7 @@ class CompositeBrainQA(
)
response_after_tools_answers = completion(
model="gpt-3.5-turbo-1106",
model="gpt-3.5-turbo-0125",
messages=messages,
tools=tools,
tool_choice="auto",
@ -356,9 +356,9 @@ class CompositeBrainQA(
"message_time": streamed_chat_history.message_time,
"user_message": question.question,
"assistant": "",
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": brain.name if brain else None,
"brain_id": str(brain.id) if brain else None,
}
@ -371,9 +371,9 @@ class CompositeBrainQA(
"message_time": None,
"user_message": question.question,
"assistant": "",
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": brain.name if brain else None,
"brain_id": str(brain.id) if brain else None,
}
@ -442,7 +442,7 @@ class CompositeBrainQA(
messages.append({"role": "user", "content": question.question})
initial_response = completion(
model="gpt-3.5-turbo-1106",
model="gpt-3.5-turbo-0125",
stream=True,
messages=messages,
tools=tools,
@ -555,7 +555,7 @@ class CompositeBrainQA(
messages.append({"role": "system", "content": PROMPT_2})
response_after_tools_answers = completion(
model="gpt-3.5-turbo-1106",
model="gpt-3.5-turbo-0125",
messages=messages,
tools=tools,
tool_choice="auto",

View File

@ -11,9 +11,13 @@ from modules.brain.dto.inputs import (
CreateBrainProperties,
)
from modules.brain.entity.brain_entity import PublicBrain, RoleEnum
from modules.brain.entity.integration_brain import IntegrationDescriptionEntity
from modules.brain.service.brain_authorization_service import has_brain_authorization
from modules.brain.service.brain_service import BrainService
from modules.brain.service.brain_user_service import BrainUserService
from modules.brain.service.integration_brain_service import (
IntegrationBrainDescriptionService,
)
from modules.prompt.service.prompt_service import PromptService
from modules.user.entity.user_identity import UserIdentity
from repository.brain import get_question_context_from_brain
@ -24,6 +28,16 @@ brain_router = APIRouter()
prompt_service = PromptService()
brain_service = BrainService()
brain_user_service = BrainUserService()
integration_brain_description_service = IntegrationBrainDescriptionService()
@brain_router.get(
"/brains/integrations/",
dependencies=[Depends(AuthBearer())],
)
async def get_integration_brain_description() -> list[IntegrationDescriptionEntity]:
"""Retrieve the integration brain description."""
return integration_brain_description_service.get_all_integration_descriptions()
@brain_router.get("/brains/", dependencies=[Depends(AuthBearer())], tags=["Brain"])
@ -96,22 +110,13 @@ async def create_new_brain(
brain=brain,
user_id=current_user.id,
)
if brain_user_service.get_user_default_brain(current_user.id):
logger.info(f"Default brain already exists for user {current_user.id}")
brain_user_service.create_brain_user(
user_id=current_user.id,
brain_id=new_brain.brain_id,
rights=RoleEnum.Owner,
is_default_brain=False,
)
else:
logger.info(f"Creating default brain for user {current_user.id}.")
brain_user_service.create_brain_user(
user_id=current_user.id,
brain_id=new_brain.brain_id,
rights=RoleEnum.Owner,
is_default_brain=True,
)
logger.info(f"Creating default brain for user {current_user.id}.")
brain_user_service.create_brain_user(
user_id=current_user.id,
brain_id=new_brain.brain_id,
rights=RoleEnum.Owner,
is_default_brain=True,
)
return {"id": new_brain.brain_id, "name": brain.name, "rights": "Owner"}

View File

@ -24,6 +24,21 @@ class CreateApiBrainDefinition(BaseModel, extra=Extra.forbid):
jq_instructions: Optional[str] = None
class CreateIntegrationBrain(BaseModel, extra=Extra.forbid):
integration_name: str
integration_logo_url: str
connection_settings: dict
class BrainIntegrationSettings(BaseModel, extra=Extra.forbid):
integration_id: str
settings: dict
class BrainIntegrationUpdateSettings(BaseModel, extra=Extra.forbid):
settings: dict
class CreateBrainProperties(BaseModel, extra=Extra.forbid):
name: Optional[str] = "Default brain"
description: str = "This is a description"
@ -34,8 +49,9 @@ class CreateBrainProperties(BaseModel, extra=Extra.forbid):
prompt_id: Optional[UUID] = None
brain_type: Optional[BrainType] = BrainType.DOC
brain_definition: Optional[CreateApiBrainDefinition]
brain_secrets_values: dict = {}
brain_secrets_values: Optional[dict] = {}
connected_brains_ids: Optional[list[UUID]] = []
integration: Optional[BrainIntegrationSettings] = None
def dict(self, *args, **kwargs):
brain_dict = super().dict(*args, **kwargs)
@ -54,6 +70,7 @@ class BrainUpdatableProperties(BaseModel):
prompt_id: Optional[UUID]
brain_definition: Optional[ApiBrainDefinitionEntity]
connected_brains_ids: Optional[list[UUID]] = []
integration: Optional[BrainIntegrationUpdateSettings] = None
def dict(self, *args, **kwargs):
brain_dict = super().dict(*args, **kwargs)

View File

@ -10,6 +10,7 @@ class BrainType(str, Enum):
DOC = "doc"
API = "api"
COMPOSITE = "composite"
INTEGRATION = "integration"
class BrainEntity(BaseModel):

View File

@ -0,0 +1,20 @@
from typing import Optional
from uuid import UUID
from pydantic import BaseModel
class IntegrationDescriptionEntity(BaseModel):
id: UUID
integration_name: str
integration_logo_url: Optional[str]
connection_settings: Optional[dict]
class IntegrationEntity(BaseModel):
id: str
user_id: str
brain_id: str
integration_id: str
settings: Optional[dict]
credentials: Optional[dict]

View File

@ -0,0 +1,18 @@
from modules.brain.knowledge_brain_qa import KnowledgeBrainQA
class NotionBrain(KnowledgeBrainQA):
"""This is the Notion brain class. it is a KnowledgeBrainQA has the data is stored locally.
It is going to call the Data Store internally to get the data.
Args:
KnowledgeBrainQA (_type_): A brain that store the knowledge internaly
"""
def __init__(
self,
**kwargs,
):
super().__init__(
**kwargs,
)

View File

@ -0,0 +1,333 @@
import os
import tempfile
from io import BytesIO
from typing import Any, List, Optional
import requests
from celery_config import celery
from fastapi import UploadFile
from logger import get_logger
from modules.brain.entity.integration_brain import IntegrationEntity
from modules.brain.repository.integration_brains import IntegrationBrain
from modules.knowledge.dto.inputs import CreateKnowledgeProperties
from modules.knowledge.repository.knowledge_interface import KnowledgeInterface
from modules.knowledge.service.knowledge_service import KnowledgeService
from pydantic import BaseModel
from repository.files.upload_file import upload_file_storage
logger = get_logger(__name__)
class NotionPage(BaseModel):
"""Represents a Notion Page object"""
id: str
created_time: str
last_edited_time: str
archived: bool
properties: dict[str, Any]
url: str
class NotionSearchResponse(BaseModel):
"""Represents the response from the Notion Search API"""
results: list[dict[str, Any]]
next_cursor: Optional[str]
has_more: bool = False
class NotionConnector(IntegrationBrain):
"""A class to interact with the Notion API"""
credentials: dict[str, str] = None
integration_details: IntegrationEntity = None
brain_id: str = None
user_id: str = None
knowledge_service: KnowledgeInterface
recursive_index_enabled: bool = False
max_pages: int = 100
def __init__(self, brain_id: str, user_id: str):
super().__init__()
self.brain_id = brain_id
self.user_id = user_id
self._load_credentials()
self.knowledge_service = KnowledgeService()
def _load_credentials(self) -> dict[str, str]:
"""Load the Notion credentials"""
self.integration_details = self.get_integration_brain(
self.brain_id, self.user_id
)
if self.credentials is None:
logger.info("Loading Notion credentials")
self.integration_details.credentials = {
"notion_integration_token": self.integration_details.settings.get(
"notion_integration_token", ""
)
}
self.update_integration_brain(
self.brain_id, self.user_id, self.integration_details
)
self.credentials = self.integration_details.credentials
else: # pragma: no cover
self.credentials = self.integration_details.credentials
def _headers(self) -> dict[str, str]:
"""Get the headers for the Notion API"""
return {
"Authorization": f'Bearer {self.credentials["notion_integration_token"]}',
"Content-Type": "application/json",
"Notion-Version": "2022-06-28",
}
def _search_notion(self, query_dict: dict[str, Any]) -> NotionSearchResponse:
"""
Search for pages from a Notion database.
"""
# Use self.credentials to authenticate the request
headers = self._headers()
res = requests.post(
"https://api.notion.com/v1/search",
headers=headers,
json=query_dict,
# Adjust the timeout as needed
timeout=10,
)
res.raise_for_status()
return NotionSearchResponse(**res.json())
def _fetch_blocks(self, page_id: str, cursor: str | None = None) -> dict[str, Any]:
"""
Fetch the blocks of a Notion page.
"""
logger.info(f"Fetching blocks for page: {page_id}")
headers = self._headers()
query_params = None if not cursor else {"start_cursor": cursor}
res = requests.get(
f"https://api.notion.com/v1/blocks/{page_id}/children",
params=query_params,
headers=headers,
timeout=10,
)
res.raise_for_status()
return res.json()
def _fetch_page(self, page_id: str) -> dict[str, Any]:
"""
Fetch a Notion page.
"""
logger.info(f"Fetching page: {page_id}")
headers = self._headers()
block_url = f"https://api.notion.com/v1/pages/{page_id}"
res = requests.get(
block_url,
headers=headers,
timeout=10,
)
try:
res.raise_for_status()
except Exception:
logger.exception(f"Error fetching page - {res.json()}")
return None
return NotionPage(**res.json())
def _read_blocks(
self, page_block_id: str
) -> tuple[list[tuple[str, str]], list[str]]:
"""Reads blocks for a page"""
result_lines: list[tuple[str, str]] = []
child_pages: list[str] = []
cursor = None
while True:
data = self._fetch_blocks(page_block_id, cursor)
for result in data["results"]:
result_block_id = result["id"]
result_type = result["type"]
result_obj = result[result_type]
cur_result_text_arr = []
if "rich_text" in result_obj:
for rich_text in result_obj["rich_text"]:
# skip if doesn't have text object
if "text" in rich_text:
text = rich_text["text"]["content"]
cur_result_text_arr.append(text)
if result["has_children"]:
if result_type == "child_page":
child_pages.append(result_block_id)
else:
logger.info(f"Entering sub-block: {result_block_id}")
subblock_result_lines, subblock_child_pages = self._read_blocks(
result_block_id
)
logger.info(f"Finished sub-block: {result_block_id}")
result_lines.extend(subblock_result_lines)
child_pages.extend(subblock_child_pages)
# if result_type == "child_database" and self.recursive_index_enabled:
# child_pages.extend(self._read_pages_from_database(result_block_id))
cur_result_text = "\n".join(cur_result_text_arr)
if cur_result_text:
result_lines.append((cur_result_text, result_block_id))
if data["next_cursor"] is None:
break
cursor = data["next_cursor"]
return result_lines, child_pages
def _read_page_title(self, page: NotionPage) -> str:
"""Extracts the title from a Notion page"""
page_title = None
for _, prop in page.properties.items():
if prop["type"] == "title" and len(prop["title"]) > 0:
page_title = " ".join([t["plain_text"] for t in prop["title"]]).strip()
break
if page_title is None:
page_title = f"Untitled Page [{page.id}]"
page_title = "".join(e for e in page_title if e.isalnum())
return page_title
def _read_page_url(self, page: NotionPage) -> str:
"""Extracts the URL from a Notion page"""
return page.url
def _read_pages_from_database(self, database_id: str) -> list[str]:
"""Reads pages from a Notion database"""
headers = self._headers()
res = requests.post(
f"https://api.notion.com/v1/databases/{database_id}/query",
headers=headers,
timeout=10,
)
res.raise_for_status()
return [page["id"] for page in res.json()["results"]]
def _read_page(self, page_id: str) -> tuple[str, list[str]]:
"""Reads a Notion page"""
page = self._fetch_page(page_id)
if page is None:
return None, None, None, None
page_title = self._read_page_title(page)
page_content, child_pages = self._read_blocks(page_id)
page_url = self._read_page_url(page)
return page_title, page_content, child_pages, page_url
def get_all_pages(self) -> list[NotionPage]:
"""
Get all the pages from Notion.
"""
query_dict = {
"filter": {"property": "object", "value": "page"},
"page_size": 100,
}
max_pages = self.max_pages
pages_count = 0
while True:
search_response = self._search_notion(query_dict)
for page in search_response.results:
pages_count += 1
if pages_count > max_pages:
break
yield NotionPage(**page)
if search_response.has_more:
query_dict["start_cursor"] = search_response.next_cursor
else:
break
def add_file_to_knowledge(
self, page_content: List[tuple[str, str]], page_name: str, page_url: str
):
"""
Add a file to the knowledge base
"""
filename_with_brain_id = (
str(self.brain_id) + "/" + str(page_name) + "_notion.txt"
)
try:
concatened_page_content = ""
if page_content:
for content in page_content:
concatened_page_content += content[0] + "\n"
# Create a BytesIO object from the content
content_io = BytesIO(concatened_page_content.encode("utf-8"))
# Create a file of type UploadFile
file = UploadFile(filename=filename_with_brain_id, file=content_io)
# Write the UploadFile content to a temporary file
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(file.file.read())
temp_file_path = temp_file.name
# Upload the temporary file to the knowledge base
response = upload_file_storage(temp_file_path, filename_with_brain_id)
logger.info(f"File {response} uploaded successfully")
# Delete the temporary file
os.remove(temp_file_path)
knowledge_to_add = CreateKnowledgeProperties(
brain_id=self.brain_id,
file_name=page_name + "_notion.txt",
extension="txt",
integration="notion",
integration_link=page_url,
)
added_knowledge = self.knowledge_service.add_knowledge(knowledge_to_add)
logger.info(f"Knowledge {added_knowledge} added successfully")
celery.send_task(
"process_file_and_notify",
kwargs={
"file_name": filename_with_brain_id,
"file_original_name": page_name + "_notion.txt",
"brain_id": self.brain_id,
},
)
except Exception:
logger.error("Error adding knowledge")
def compile_all_pages(self):
"""
Get all the pages, blocks, databases from Notion into a single document per page
"""
all_pages = list(self.get_all_pages()) # Convert generator to list
documents = []
for page in all_pages:
logger.info(f"Reading page: {page.id}")
page_title, page_content, child_pages, page_url = self._read_page(page.id)
document = {
"page_title": page_title,
"page_content": page_content,
"child_pages": child_pages,
"page_url": page_url,
}
documents.append(document)
self.add_file_to_knowledge(page_content, page_title, page_url)
return documents
if __name__ == "__main__":
notion = NotionConnector(
brain_id="b3ab23c5-9e13-4dd8-8883-106d613e3de8",
user_id="39418e3b-0258-4452-af60-7acfcc1263ff",
)
celery.send_task(
"NotionConnectorLoad",
kwargs={
"brain_id": "b3ab23c5-9e13-4dd8-8883-106d613e3de8",
"user_id": "39418e3b-0258-4452-af60-7acfcc1263ff",
},
)

View File

@ -5,14 +5,14 @@ from uuid import UUID
from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler
from langchain.chains import ConversationalRetrievalChain
from llm.qa_interface import QAInterface
from llm.rags.quivr_rag import QuivrRAG
from llm.rags.rag_interface import RAGInterface
from llm.utils.format_chat_history import format_chat_history
from llm.utils.get_prompt_to_use import get_prompt_to_use
from llm.utils.get_prompt_to_use_id import get_prompt_to_use_id
from logger import get_logger
from models import BrainSettings
from modules.brain.qa_interface import QAInterface
from modules.brain.rags.quivr_rag import QuivrRAG
from modules.brain.rags.rag_interface import RAGInterface
from modules.brain.service.brain_service import BrainService
from modules.chat.dto.chats import ChatQuestion, Sources
from modules.chat.dto.inputs import CreateChatHistory
@ -120,9 +120,9 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
knowledge_qa: Optional[RAGInterface]
metadata: Optional[dict] = None
callbacks: List[
AsyncIteratorCallbackHandler
] = None # pyright: ignore reportPrivateUsage=none
callbacks: List[AsyncIteratorCallbackHandler] = (
None # pyright: ignore reportPrivateUsage=none
)
prompt_id: Optional[UUID]
@ -225,9 +225,9 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
"user_message": question.question,
"assistant": answer,
"message_time": new_chat.message_time,
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": brain.name if brain else None,
"message_id": new_chat.message_id,
"brain_id": str(brain.brain_id) if brain else None,
@ -240,9 +240,9 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
"user_message": question.question,
"assistant": answer,
"message_time": None,
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": None,
"message_id": None,
"brain_id": str(brain.brain_id) if brain else None,
@ -318,9 +318,9 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
"message_time": streamed_chat_history.message_time,
"user_message": question.question,
"assistant": "",
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": brain.name if brain else None,
"brain_id": str(brain.brain_id) if brain else None,
"metadata": self.metadata,
@ -334,9 +334,9 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
"message_time": None,
"user_message": question.question,
"assistant": "",
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": brain.name if brain else None,
"brain_id": str(brain.brain_id) if brain else None,
"metadata": self.metadata,

View File

@ -5,10 +5,9 @@ from uuid import UUID
from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler
from langchain.chains import LLMChain
from langchain.chat_models import ChatLiteLLM
from langchain.chat_models.base import BaseChatModel
from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate
from llm.qa_interface import QAInterface
from langchain_community.chat_models import ChatLiteLLM
from llm.utils.format_chat_history import (
format_chat_history,
format_history_to_openai_mesages,
@ -17,6 +16,7 @@ from llm.utils.get_prompt_to_use import get_prompt_to_use
from llm.utils.get_prompt_to_use_id import get_prompt_to_use_id
from logger import get_logger
from models import BrainSettings # Importing settings related to the 'brain'
from modules.brain.qa_interface import QAInterface
from modules.chat.dto.chats import ChatQuestion
from modules.chat.dto.inputs import CreateChatHistory
from modules.chat.dto.outputs import GetChatHistoryOutput
@ -140,9 +140,9 @@ class HeadlessQA(BaseModel, QAInterface):
"user_message": question.question,
"assistant": answer,
"message_time": new_chat.message_time,
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": None,
"message_id": new_chat.message_id,
}
@ -154,9 +154,9 @@ class HeadlessQA(BaseModel, QAInterface):
"user_message": question.question,
"assistant": answer,
"message_time": None,
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": None,
"message_id": None,
}
@ -224,9 +224,9 @@ class HeadlessQA(BaseModel, QAInterface):
"message_time": streamed_chat_history.message_time,
"user_message": question.question,
"assistant": "",
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": None,
}
)
@ -238,9 +238,9 @@ class HeadlessQA(BaseModel, QAInterface):
"message_time": None,
"user_message": question.question,
"assistant": "",
"prompt_title": self.prompt_to_use.title
if self.prompt_to_use
else None,
"prompt_title": (
self.prompt_to_use.title if self.prompt_to_use else None
),
"brain_name": None,
}
)

View File

@ -3,7 +3,6 @@ from uuid import UUID
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatLiteLLM
from langchain.embeddings.ollama import OllamaEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms.base import BaseLLM
@ -12,18 +11,18 @@ from langchain.prompts.chat import (
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from llm.rags.rag_interface import RAGInterface
from langchain_community.chat_models import ChatLiteLLM
from llm.prompts.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
from llm.utils.get_prompt_to_use import get_prompt_to_use
from logger import get_logger
from models import BrainSettings # Importing settings related to the 'brain'
from modules.brain.rags.rag_interface import RAGInterface
from modules.brain.service.brain_service import BrainService
from modules.chat.service.chat_service import ChatService
from pydantic import BaseModel
from supabase.client import Client, create_client
from vectorstore.supabase import CustomSupabaseVectorStore
from ..prompts.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
logger = get_logger(__name__)
QUIVR_DEFAULT_PROMPT = "Your name is Quivr. You're a helpful assistant. If you don't know the answer, just say that you don't know, don't try to make up an answer."
@ -204,6 +203,3 @@ class QuivrRAG(BaseModel, RAGInterface):
def get_retriever(self):
return self.vector_store.as_retriever()
# Some other methods can be added such as on_stream, on_end,... to abstract history management (each answer should be saved or not) # Some other methods can be added such as on_stream, on_end,... to abstract history management (each answer should be saved or not)
# Some other methods can be added such as on_stream, on_end,... to abstract history management (each answer should be saved or not)

View File

@ -4,3 +4,4 @@ from .brains_users import BrainsUsers
from .brains_vectors import BrainsVectors
from .composite_brains_connections import CompositeBrainsConnections
from .external_api_secrets import ExternalApiSecrets
from .integration_brains import IntegrationBrain, IntegrationDescription

View File

@ -23,6 +23,7 @@ class Brains(BrainsInterface):
"brain_definition",
"brain_secrets_values",
"connected_brains_ids",
"integration",
}
)
brain_dict["meaning"] = brain_meaning

View File

@ -0,0 +1,107 @@
from models.settings import get_supabase_client
from modules.brain.entity.integration_brain import (
IntegrationDescriptionEntity,
IntegrationEntity,
)
from modules.brain.repository.interfaces.integration_brains_interface import (
IntegrationBrainInterface,
IntegrationDescriptionInterface,
)
class IntegrationBrain(IntegrationBrainInterface):
"""This is all the methods to interact with the integration brain.
Args:
IntegrationBrainInterface (_type_): _description_
"""
def __init__(self):
self.db = get_supabase_client()
def get_integration_brain(self, brain_id, user_id):
response = (
self.db.table("integrations_user")
.select("*")
.filter("brain_id", "eq", brain_id)
.filter("user_id", "eq", user_id)
.execute()
)
if len(response.data) == 0:
return None
return IntegrationEntity(**response.data[0])
def add_integration_brain(self, brain_id, user_id, integration_id, settings):
response = (
self.db.table("integrations_user")
.insert(
[
{
"brain_id": str(brain_id),
"user_id": str(user_id),
"integration_id": str(integration_id),
"settings": settings,
}
]
)
.execute()
)
if len(response.data) == 0:
return None
return IntegrationEntity(**response.data[0])
def update_integration_brain(self, brain_id, user_id, integration_brain):
response = (
self.db.table("integrations_user")
.update(integration_brain.dict(exclude={"brain_id", "user_id"}))
.filter("brain_id", "eq", str(brain_id))
.filter("user_id", "eq", str(user_id))
.execute()
)
if len(response.data) == 0:
return None
return IntegrationEntity(**response.data[0])
def delete_integration_brain(self, brain_id, user_id):
self.db.table("integrations_user").delete().filter(
"brain_id", "eq", str(brain_id)
).filter("user_id", "eq", str(user_id)).execute()
return None
class IntegrationDescription(IntegrationDescriptionInterface):
def __init__(self):
self.db = get_supabase_client()
def get_integration_description(self, integration_id):
response = (
self.db.table("integrations")
.select("*")
.filter("id", "eq", integration_id)
.execute()
)
if len(response.data) == 0:
return None
return IntegrationDescriptionEntity(**response.data[0])
def get_integration_description_by_user_brain_id(self, brain_id, user_id):
response = (
self.db.table("integrations_user")
.select("*")
.filter("brain_id", "eq", brain_id)
.filter("user_id", "eq", user_id)
.execute()
)
if len(response.data) == 0:
return None
integration_id = response.data[0]["integration_id"]
return self.get_integration_description(integration_id)
def get_all_integration_descriptions(self):
response = self.db.table("integrations").select("*").execute()
return [IntegrationDescriptionEntity(**data) for data in response.data]

View File

@ -1,6 +1,9 @@
from .composite_brains_connections_interface import CompositeBrainsConnectionsInterface
from .api_brain_definitions_interface import ApiBrainDefinitionsInterface
from .brains_interface import BrainsInterface
from .brains_users_interface import BrainsUsersInterface
from .brains_vectors_interface import BrainsVectorsInterface
from .composite_brains_connections_interface import \
CompositeBrainsConnectionsInterface
from .external_api_secrets_interface import ExternalApiSecretsInterface
from .integration_brains_interface import (IntegrationBrainInterface,
IntegrationDescriptionInterface)

View File

@ -0,0 +1,64 @@
from abc import ABC, abstractmethod
from uuid import UUID
from modules.brain.entity.integration_brain import (
IntegrationDescriptionEntity,
IntegrationEntity,
)
class IntegrationBrainInterface(ABC):
@abstractmethod
def get_integration_brain(self, brain_id: UUID) -> IntegrationEntity:
"""Get the integration brain entity
Args:
brain_id (UUID): ID of the brain
Returns:
IntegrationEntity: Integration brain entity
"""
pass
@abstractmethod
def add_integration_brain(
self, brain_id: UUID, integration_brain: IntegrationEntity
) -> IntegrationEntity:
pass
@abstractmethod
def update_integration_brain(
self, brain_id: UUID, integration_brain: IntegrationEntity
) -> IntegrationEntity:
pass
@abstractmethod
def delete_integration_brain(self, brain_id: UUID) -> None:
pass
class IntegrationDescriptionInterface(ABC):
@abstractmethod
def get_integration_description(
self, integration_id: UUID
) -> IntegrationDescriptionEntity:
"""Get the integration description entity
Args:
integration_id (UUID): ID of the integration
Returns:
IntegrationEntity: Integration description entity
"""
pass
@abstractmethod
def get_all_integration_descriptions(self) -> list[IntegrationDescriptionEntity]:
pass
@abstractmethod
def get_integration_description_by_user_brain_id(
self, brain_id: UUID, user_id: UUID
) -> IntegrationDescriptionEntity:
pass

View File

@ -1,16 +1,20 @@
from typing import Optional
from uuid import UUID
from celery_config import celery
from fastapi import HTTPException
from logger import get_logger
from modules.brain.dto.inputs import BrainUpdatableProperties, CreateBrainProperties
from modules.brain.entity.brain_entity import BrainEntity, BrainType, PublicBrain
from modules.brain.entity.integration_brain import IntegrationEntity
from modules.brain.repository import (
Brains,
BrainsUsers,
BrainsVectors,
CompositeBrainsConnections,
ExternalApiSecrets,
IntegrationBrain,
IntegrationDescription,
)
from modules.brain.repository.interfaces import (
BrainsInterface,
@ -18,6 +22,7 @@ from modules.brain.repository.interfaces import (
BrainsVectorsInterface,
CompositeBrainsConnectionsInterface,
ExternalApiSecretsInterface,
IntegrationDescriptionInterface,
)
from modules.brain.service.api_brain_definition_service import ApiBrainDefinitionService
from modules.brain.service.utils.validate_brain import validate_api_brain
@ -37,6 +42,8 @@ class BrainService:
brain_vector_repository: BrainsVectorsInterface
external_api_secrets_repository: ExternalApiSecretsInterface
composite_brains_connections_repository: CompositeBrainsConnectionsInterface
integration_brains_repository: IntegrationDescriptionInterface
integration_description_repository: IntegrationDescriptionInterface
def __init__(self):
self.brain_repository = Brains()
@ -44,10 +51,17 @@ class BrainService:
self.brain_vector = BrainsVectors()
self.external_api_secrets_repository = ExternalApiSecrets()
self.composite_brains_connections_repository = CompositeBrainsConnections()
self.integration_brains_repository = IntegrationBrain()
self.integration_description_repository = IntegrationDescription()
def get_brain_by_id(self, brain_id: UUID):
return self.brain_repository.get_brain_by_id(brain_id)
def get_integration_brain(self, brain_id, user_id) -> IntegrationEntity | None:
return self.integration_brains_repository.get_integration_brain(
brain_id, user_id
)
def find_brain_from_question(
self,
brain_id: UUID,
@ -124,6 +138,9 @@ class BrainService:
if brain.brain_type == BrainType.COMPOSITE:
return self.create_brain_composite(brain)
if brain.brain_type == BrainType.INTEGRATION:
return self.create_brain_integration(user_id, brain)
created_brain = self.brain_repository.create_brain(brain)
return created_brain
@ -167,6 +184,33 @@ class BrainService:
return created_brain
def create_brain_integration(
self,
user_id: UUID,
brain: CreateBrainProperties,
) -> BrainEntity:
created_brain = self.brain_repository.create_brain(brain)
logger.info(f"Created brain: {created_brain}")
if brain.integration is not None:
logger.info(f"Integration: {brain.integration}")
self.integration_brains_repository.add_integration_brain(
user_id=user_id,
brain_id=created_brain.brain_id,
integration_id=brain.integration.integration_id,
settings=brain.integration.settings,
)
if (
self.integration_description_repository.get_integration_description(
brain.integration.integration_id
).integration_name
== "Notion"
):
celery.send_task(
"NotionConnectorLoad",
kwargs={"brain_id": created_brain.brain_id, "user_id": user_id},
)
return created_brain
def delete_brain_secrets_values(self, brain_id: UUID) -> None:
brain_definition = api_brain_definition_service.get_api_brain_definition(
brain_id=brain_id
@ -229,7 +273,7 @@ class BrainService:
brain_id,
brain=BrainUpdatableProperties(
**brain_new_values.dict(
exclude={"brain_definition", "connected_brains_ids"}
exclude={"brain_definition", "connected_brains_ids", "integration"}
)
),
)

View File

@ -0,0 +1,25 @@
from modules.brain.entity.integration_brain import IntegrationDescriptionEntity
from modules.brain.repository.integration_brains import IntegrationDescription
from modules.brain.repository.interfaces import IntegrationDescriptionInterface
class IntegrationBrainDescriptionService:
repository: IntegrationDescriptionInterface
def __init__(self):
self.repository = IntegrationDescription()
def get_all_integration_descriptions(self) -> list[IntegrationDescriptionEntity]:
return self.repository.get_all_integration_descriptions()
def get_integration_description(
self, integration_id
) -> IntegrationDescriptionEntity:
return self.repository.get_integration_description(integration_id)
def get_integration_description_by_user_brain_id(
self, brain_id, user_id
) -> IntegrationDescriptionEntity:
return self.repository.get_integration_description_by_user_brain_id(
brain_id, user_id
)

View File

@ -31,7 +31,7 @@ def test_create_brain(client, api_key):
payload = {
"name": random_brain_name,
"status": "public",
"model": "gpt-3.5-turbo-1106",
"model": "gpt-3.5-turbo-0125",
"temperature": 0,
"max_tokens": 2000,
"brain_type": "doc",
@ -175,7 +175,7 @@ def test_set_as_default_brain_endpoint(client, api_key):
payload = {
"name": random_brain_name,
"status": "public",
"model": "gpt-3.5-turbo-1106",
"model": "gpt-3.5-turbo-0125",
"temperature": 0,
"max_tokens": 256,
}
@ -228,7 +228,7 @@ def create_public_brain_retrieve_and_then_delete(client, api_key):
payload = {
"name": random_brain_name,
"status": "public",
"model": "gpt-3.5-turbo-1106",
"model": "gpt-3.5-turbo-0125",
"temperature": 0,
"max_tokens": 256,
"brain_type": "doc",

View File

@ -1,17 +1,22 @@
from llm.api_brain_qa import APIBrainQA
from llm.knowledge_brain_qa import KnowledgeBrainQA
from logger import get_logger
from modules.brain.api_brain_qa import APIBrainQA
from modules.brain.entity.brain_entity import BrainType, RoleEnum
from modules.brain.integrations.Notion.Brain import NotionBrain
from modules.brain.knowledge_brain_qa import KnowledgeBrainQA
from modules.brain.service.api_brain_definition_service import ApiBrainDefinitionService
from modules.brain.service.brain_authorization_service import (
validate_brain_authorization,
)
from modules.brain.service.brain_service import BrainService
from modules.brain.service.integration_brain_service import (
IntegrationBrainDescriptionService,
)
from modules.chat.controller.chat.interface import ChatInterface
from modules.chat.service.chat_service import ChatService
chat_service = ChatService()
api_brain_definition_service = ApiBrainDefinitionService()
integration_brain_description_service = IntegrationBrainDescriptionService()
logger = get_logger(__name__)
@ -19,9 +24,11 @@ models_supporting_function_calls = [
"gpt-4",
"gpt-4-1106-preview",
"gpt-4-0613",
"gpt-3.5-turbo-1106",
"gpt-3.5-turbo-0125",
"gpt-3.5-turbo-1106",
"gpt-3.5-turbo-0613",
"gpt-4-0125-preview",
"gpt-3.5-turbo",
]
brain_service = BrainService()
@ -49,11 +56,7 @@ class BrainfulChat(ChatInterface):
user_id,
metadata,
):
if (
brain
and brain.brain_type == BrainType.DOC
or model not in models_supporting_function_calls
):
if brain and brain.brain_type == BrainType.DOC:
return KnowledgeBrainQA(
chat_id=chat_id,
model=model,
@ -86,3 +89,20 @@ class BrainfulChat(ChatInterface):
brain_definition.jq_instructions if brain_definition else None
),
)
if brain.brain_type == BrainType.INTEGRATION:
integration_brain = integration_brain_description_service.get_integration_description_by_user_brain_id(
brain.brain_id, user_id
)
if integration_brain.integration_name == "Notion":
return NotionBrain(
chat_id=chat_id,
model=model,
max_tokens=max_tokens,
max_input=max_input,
temperature=temperature,
brain_id=str(brain.brain_id),
streaming=streaming,
prompt_id=prompt_id,
metadata=metadata,
)

View File

@ -16,10 +16,10 @@ from modules.chat.controller.chat.utils import (
def test_find_model_and_generate_metadata(mock_chat_service):
chat_id = uuid.uuid4()
brain = Mock()
brain.model = "gpt-3.5-turbo-1106"
user_settings = {"models": ["gpt-3.5-turbo-1106"]}
brain.model = "gpt-3.5-turbo-0125"
user_settings = {"models": ["gpt-3.5-turbo-0125"]}
models_settings = [
{"name": "gpt-3.5-turbo-1106", "max_input": 512, "max_output": 512}
{"name": "gpt-3.5-turbo-0125", "max_input": 512, "max_output": 512}
]
metadata_brain = {"key": "value"}
@ -30,13 +30,13 @@ def test_find_model_and_generate_metadata(mock_chat_service):
)
assert isinstance(model_to_use, LLMModels)
assert model_to_use.name == "gpt-3.5-turbo-1106"
assert model_to_use.name == "gpt-3.5-turbo-0125"
assert model_to_use.max_input == 512
assert model_to_use.max_output == 512
assert metadata == {
"key": "value",
"follow_up_questions": [],
"model": "gpt-3.5-turbo-1106",
"model": "gpt-3.5-turbo-0125",
"max_tokens": 512,
"max_input": 512,
}
@ -46,13 +46,13 @@ def test_find_model_and_generate_metadata(mock_chat_service):
def test_find_model_and_generate_metadata_user_not_allowed(mock_chat_service):
chat_id = uuid.uuid4()
brain = Mock()
brain.model = "gpt-3.5-turbo-1106"
brain.model = "gpt-3.5-turbo-0125"
user_settings = {
"models": ["gpt-3.5-turbo-1107"]
} # User is not allowed to use the brain's model
models_settings = [
{"name": "gpt-3.5-turbo-1106", "max_input": 512, "max_output": 512},
{"name": "gpt-3.5-turbo-1107", "max_input": 512, "max_output": 512},
{"name": "gpt-3.5-turbo-0125", "max_input": 512, "max_output": 512},
{"name": "gpt-3.5-turbo-1107", "max_input": 12000, "max_output": 12000},
]
metadata_brain = {"key": "value"}
@ -63,15 +63,15 @@ def test_find_model_and_generate_metadata_user_not_allowed(mock_chat_service):
)
assert isinstance(model_to_use, LLMModels)
assert model_to_use.name == "gpt-3.5-turbo-1106" # Default model is used
assert model_to_use.max_input == 512
assert model_to_use.max_output == 512
assert model_to_use.name == "gpt-3.5-turbo-0125" # Default model is used
assert model_to_use.max_input == 12000
assert model_to_use.max_output == 1000
assert metadata == {
"key": "value",
"follow_up_questions": [],
"model": "gpt-3.5-turbo-1106",
"max_tokens": 512,
"max_input": 512,
"model": "gpt-3.5-turbo-0125",
"max_tokens": 1000,
"max_input": 12000,
}

View File

@ -40,9 +40,9 @@ def find_model_and_generate_metadata(
metadata = {**metadata, **metadata_brain}
follow_up_questions = chat_service.get_follow_up_question(chat_id)
metadata["follow_up_questions"] = follow_up_questions
# Default model is gpt-3.5-turbo-1106
# Default model is gpt-3.5-turbo-0125
model_to_use = LLMModels( # TODO Implement default models in database
name="gpt-3.5-turbo-1106", price=1, max_input=12000, max_output=1000
name="gpt-3.5-turbo-0125", price=1, max_input=12000, max_output=1000
)
is_brain_model_available = any(
@ -50,7 +50,7 @@ def find_model_and_generate_metadata(
)
is_user_allowed_model = brain.model in user_settings.get(
"models", ["gpt-3.5-turbo-1106"]
"models", ["gpt-3.5-turbo-0125"]
) # Checks if the model is available in the list of models
logger.info(f"Brain model: {brain.model}")

View File

@ -50,7 +50,7 @@ def test_create_chat_and_talk(client, api_key):
response = client.post(
f"/chat/{chat_id}/question?brain_id={default_brain_id}",
json={
"model": "gpt-3.5-turbo-1106",
"model": "gpt-3.5-turbo-0125",
"question": "Hello, how are you?",
"temperature": "0",
"max_tokens": "2000",

View File

@ -9,6 +9,8 @@ class CreateKnowledgeProperties(BaseModel):
file_name: Optional[str] = None
url: Optional[str] = None
extension: str = "txt"
integration: Optional[str] = None
integration_link: Optional[str] = None
def dict(self, *args, **kwargs):
knowledge_dict = super().dict(*args, **kwargs)

View File

@ -5,7 +5,7 @@ from pathlib import Path
from typing import List
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
from langchain_community.document_loaders.base import BaseLoader
def concatenate_rows(row: dict) -> str:

View File

@ -9,7 +9,9 @@ from models import File, get_documents_vector_store
from packages.files.file import compute_sha1_from_content
async def process_audio(file: File, user, original_file_name):
async def process_audio(
file: File, user, original_file_name, integration=None, integration_link=None
):
temp_filename = None
file_sha = ""
dateshort = time.strftime("%Y%m%d-%H%M%S")

View File

@ -1,13 +1,17 @@
from langchain.document_loaders import PythonLoader
from langchain_community.document_loaders import PythonLoader
from models import File
from .common import process_file
async def process_python(file: File, brain_id, original_file_name):
async def process_python(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return await process_file(
file=file,
loader_class=PythonLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -9,7 +9,14 @@ from repository.files.upload_file import DocumentSerializable
logger = get_logger(__name__)
async def process_file(file: File, loader_class, brain_id, original_file_name):
async def process_file(
file: File,
loader_class,
brain_id,
original_file_name,
integration=None,
integration_link=None,
):
dateshort = time.strftime("%Y%m%d")
neurons = Neurons()
@ -23,6 +30,8 @@ async def process_file(file: File, loader_class, brain_id, original_file_name):
"chunk_overlap": file.chunk_overlap,
"date": dateshort,
"original_file_name": original_file_name or file.file_name,
"integration": integration or "",
"integration_link": integration_link or "",
}
docs = []

View File

@ -1,13 +1,17 @@
from langchain.document_loaders import CSVLoader
from langchain_community.document_loaders import CSVLoader
from models import File
from .common import process_file
def process_csv(file: File, brain_id, original_file_name):
def process_csv(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=CSVLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,13 +1,17 @@
from langchain.document_loaders import Docx2txtLoader
from langchain_community.document_loaders import Docx2txtLoader
from models import File
from .common import process_file
def process_docx(file: File, brain_id, original_file_name):
def process_docx(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=Docx2txtLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,13 +1,17 @@
from langchain.document_loaders.epub import UnstructuredEPubLoader
from langchain_community.document_loaders.epub import UnstructuredEPubLoader
from models import File
from .common import process_file
def process_epub(file: File, brain_id, original_file_name):
def process_epub(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=UnstructuredEPubLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,9 +1,9 @@
import os
import time
from langchain.document_loaders import GitLoader
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import GitLoader
from models.files import File
from packages.embeddings.vectors import Neurons
from packages.files.file import compute_sha1_from_content

View File

@ -1,13 +1,17 @@
from langchain.document_loaders import UnstructuredHTMLLoader
from langchain_community.document_loaders import UnstructuredHTMLLoader
from models import File
from .common import process_file
def process_html(file: File, brain_id, original_file_name):
def process_html(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=UnstructuredHTMLLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,13 +1,17 @@
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from models import File
from .common import process_file
def process_markdown(file: File, brain_id, original_file_name):
def process_markdown(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=UnstructuredMarkdownLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,13 +1,17 @@
from langchain.document_loaders import NotebookLoader
from langchain_community.document_loaders import NotebookLoader
from models import File
from .common import process_file
def process_ipnyb(file: File, brain_id, original_file_name):
def process_ipnyb(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=NotebookLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,13 +1,17 @@
from langchain.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import UnstructuredPDFLoader
from models import File
from .common import process_file
def process_odt(file: File, brain_id, original_file_name):
def process_odt(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=UnstructuredPDFLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,13 +1,17 @@
from langchain.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import UnstructuredPDFLoader
from models import File
from .common import process_file
def process_pdf(file: File, brain_id, original_file_name):
def process_pdf(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=UnstructuredPDFLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,13 +1,17 @@
from langchain.document_loaders import UnstructuredPowerPointLoader
from langchain_community.document_loaders import UnstructuredPowerPointLoader
from models import File
from .common import process_file
def process_powerpoint(file: File, brain_id, original_file_name):
def process_powerpoint(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=UnstructuredPowerPointLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -4,10 +4,14 @@ from packages.files.loaders.telegram import TelegramChatFileLoader
from .common import process_file
def process_telegram(file: File, brain_id, original_file_name):
def process_telegram(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=TelegramChatFileLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,17 +1,17 @@
from langchain.document_loaders import TextLoader
from langchain_community.document_loaders import TextLoader
from models import File
from .common import process_file
async def process_txt(
file: File,
brain_id,
original_file_name,
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return await process_file(
file=file,
loader_class=TextLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,13 +1,17 @@
from langchain.document_loaders import UnstructuredExcelLoader
from langchain_community.document_loaders import UnstructuredExcelLoader
from models.files import File
from .common import process_file
def process_xlsx(file: File, brain_id, original_file_name):
def process_xlsx(
file: File, brain_id, original_file_name, integration=None, integration_link=None
):
return process_file(
file=file,
loader_class=UnstructuredExcelLoader,
brain_id=brain_id,
original_file_name=original_file_name,
integration=integration,
integration_link=integration_link,
)

View File

@ -1,10 +1,9 @@
-i https://pypi.org/simple
aiohttp==3.9.1; python_version >= '3.8'
aiohttp==3.9.3; python_version >= '3.8'
aiosignal==1.3.1; python_version >= '3.7'
amqp==5.2.0; python_version >= '3.6'
anyio==3.7.1; python_version >= '3.7'
appdirs==1.4.4
argilla==1.22.0; python_version < '3.12' and python_version >= '3.8'
argilla==1.23.0; python_version < '3.12' and python_version >= '3.8'
async-generator==1.10; python_version >= '3.5'
async-timeout==4.0.3; python_full_version <= '3.11.2'
asyncpg==0.27.0; python_full_version >= '3.7.0'
@ -12,24 +11,24 @@ attrs==23.2.0; python_version >= '3.7'
backoff==2.2.1; python_version >= '3.7' and python_version < '4.0'
beautifulsoup4==4.12.3; python_full_version >= '3.6.0'
billiard==4.2.0; python_version >= '3.7'
black==24.1.0; python_version >= '3.8'
black==24.1.1; python_version >= '3.8'
boto3==1.33.7; python_version >= '3.7'
botocore==1.33.7; python_version >= '3.7'
bytecode==0.15.1; python_version >= '3.8'
cattrs==23.2.3; python_version >= '3.8'
celery[sqs]==5.3.6; python_version >= '3.8'
certifi==2023.11.17; python_version >= '3.6'
certifi==2024.2.2; python_version >= '3.6'
cffi==1.16.0; platform_python_implementation != 'PyPy'
charset-normalizer==3.3.2; python_full_version >= '3.7.0'
click==8.1.7; python_version >= '3.7'
click-didyoumean==0.3.0; python_full_version >= '3.6.2' and python_full_version < '4.0.0'
click-plugins==1.1.1
click-repl==0.3.0; python_version >= '3.6'
cryptography==42.0.1; python_version >= '3.7'
cryptography==42.0.2; python_version >= '3.7'
cssselect==1.2.0; python_version >= '3.7'
dataclasses-json==0.6.3; python_version >= '3.7' and python_version < '4.0'
dataclasses-json==0.6.4; python_version >= '3.7' and python_version < '4.0'
ddsketch==2.0.4; python_version >= '2.7'
ddtrace==2.5.1; python_version >= '3.7'
ddtrace==2.5.2; python_version >= '3.7'
deprecated==1.2.14; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
deprecation==2.1.0
distro==1.9.0; python_version >= '3.6'
@ -45,7 +44,7 @@ flake8==6.0.0; python_full_version >= '3.8.1'
flake8-black==0.3.6; python_version >= '3.7'
flower==2.0.1; python_version >= '3.7'
frozenlist==1.4.1; python_version >= '3.8'
fsspec==2023.12.2; python_version >= '3.8'
fsspec==2024.2.0; python_version >= '3.8'
gitdb==4.0.11; python_version >= '3.7'
gitpython==3.1.36; python_version >= '3.7'
gotrue==1.3.1; python_version >= '3.8' and python_version < '4.0'
@ -66,20 +65,21 @@ jq==1.6.0; python_version >= '3.5'
jsonpatch==1.33; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
jsonpointer==2.4; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
kombu[sqs]==5.3.5; python_version >= '3.8'
langchain==0.0.341; python_version < '4.0' and python_full_version >= '3.8.1'
langchain-core==0.0.6; python_version < '4.0' and python_full_version >= '3.8.1'
langsmith==0.0.83; python_version < '4.0' and python_full_version >= '3.8.1'
litellm==1.7.7; python_version >= '3.8' and python_version < '4.0'
langchain==0.1.5; python_version < '4.0' and python_full_version >= '3.8.1'
langchain-community==0.0.17; python_version < '4.0' and python_full_version >= '3.8.1'
langchain-core==0.1.18; python_version < '4.0' and python_full_version >= '3.8.1'
langsmith==0.0.86; python_version < '4.0' and python_full_version >= '3.8.1'
litellm==1.22.5; python_version not in '2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7' and python_version >= '3.8'
lxml==5.1.0; python_version >= '3.6'
markdown==3.5.2; python_version >= '3.8'
markdown-it-py==3.0.0; python_version >= '3.8'
markupsafe==2.1.4; python_version >= '3.7'
markupsafe==2.1.5; python_version >= '3.7'
marshmallow==3.20.2; python_version >= '3.8'
mccabe==0.7.0; python_version >= '3.6'
mdurl==0.1.2; python_version >= '3.7'
monotonic==1.6
msg-parser==1.2.0; python_version >= '3.4'
multidict==6.0.4; python_version >= '3.7'
multidict==6.0.5; python_version >= '3.7'
mypy-extensions==1.0.0; python_version >= '3.5'
nest-asyncio==1.5.6; python_version >= '3.5'
newspaper3k==0.2.8
@ -96,7 +96,7 @@ pathspec==0.12.1; python_version >= '3.8'
pdf2image==1.16.3
pdfminer.six==20231228; python_version >= '3.6'
pillow==10.2.0; python_version >= '3.8'
platformdirs==4.1.0; python_version >= '3.8'
platformdirs==4.2.0; python_version >= '3.8'
pluggy==1.4.0; python_version >= '3.8'
postgrest==0.11.0; python_version >= '3.8' and python_version < '4.0'
posthog==3.1.0
@ -114,7 +114,7 @@ pypandoc==1.11; python_version >= '3.6'
pypdf==3.9.0; python_version >= '3.6'
pyright==1.1.316; python_version >= '3.7'
pytesseract==0.3.10; python_version >= '3.7'
pytest==7.4.4; python_version >= '3.7'
pytest==8.0.0; python_version >= '3.8'
pytest-celery==0.0.0
pytest-mock==3.12.0; python_version >= '3.8'
python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
@ -124,18 +124,18 @@ python-jose==3.3.0
python-magic==0.4.27; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
python-multipart==0.0.6; python_version >= '3.7'
python-pptx==0.6.23
pytz==2023.3.post1
pytz==2024.1
pyyaml==6.0.1; python_version >= '3.6'
realtime==1.0.2; python_version >= '3.8' and python_version < '4.0'
redis==4.5.4; python_version >= '3.7'
regex==2023.12.25; python_version >= '3.7'
requests==2.31.0; python_version >= '3.7'
requests-file==1.5.1
requests-file==2.0.0
resend==0.5.1; python_version >= '3.7'
rich==13.7.0; python_full_version >= '3.7.0'
rsa==4.9; python_version >= '3.6' and python_version < '4'
s3transfer==0.8.2; python_version >= '3.7'
sentry-sdk[fastapi]==1.39.2
sentry-sdk[fastapi]==1.40.0
setuptools==69.0.3; python_version >= '3.8'
sgmllib3k==1.0.0
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
@ -163,6 +163,7 @@ unstructured==0.6.7; python_full_version >= '3.7.0'
urllib3==2.0.7; python_version >= '3.10'
uvicorn==0.22.0; python_version >= '3.7'
vine==5.1.0; python_version >= '3.6'
watchdog==3.0.0; python_version >= '3.7'
wcwidth==0.2.13
webencodings==0.5.1
websockets==11.0.3; python_version >= '3.7'

View File

@ -2,7 +2,7 @@ from typing import Any, List
from langchain.docstore.document import Document
from langchain.embeddings.base import Embeddings
from langchain.vectorstores import SupabaseVectorStore
from langchain_community.vectorstores import SupabaseVectorStore
from logger import get_logger
from supabase.client import Client

View File

@ -46,7 +46,12 @@ services:
context: backend
dockerfile: Dockerfile
container_name: worker
command: celery -A celery_worker worker -l info
volumes:
- ./backend/:/code/
command: >
/bin/sh -c "
watchmedo auto-restart -d . -p '*.py' --recursive -- celery -A celery_worker worker -l info
"
restart: always
depends_on:
- redis
@ -59,8 +64,13 @@ services:
build:
context: backend
dockerfile: Dockerfile
volumes:
- ./backend/:/code/
container_name: beat
command: celery -A celery_worker beat -l info
command: >
/bin/sh -c "
watchmedo auto-restart -d . -p '*.py' --recursive -- celery -A celery_worker beat -l info
"
restart: always
depends_on:
- redis
@ -73,6 +83,8 @@ services:
build:
context: backend
dockerfile: Dockerfile
volumes:
- ./backend/:/code/
container_name: flower
command: celery -A celery_worker flower -l info --port=5555
restart: always

View File

@ -7,12 +7,14 @@ export const defineMaxTokens = (
switch (model) {
case "gpt-3.5-turbo":
return 2000;
case "gpt-3.5-turbo-1106":
case "gpt-3.5-turbo-0125":
return 2000;
case "gpt-3.5-turbo-16k":
return 4000;
case "gpt-4":
return 4000;
case "gpt-4-0125-preview":
return 4000;
default:
return 1000;
}

View File

@ -41,6 +41,8 @@ export const openAiFreeModels = [
"gpt-3.5-turbo",
"gpt-3.5-turbo-1106",
"gpt-3.5-turbo-16k",
"gpt-4-0125-preview",
"gpt-3.5-turbo-0125",
] as const;
export const openAiPaidModels = [...openAiFreeModels, "gpt-4"] as const;

File diff suppressed because it is too large Load Diff

View File

@ -1,70 +0,0 @@
#!/bin/bash
# Exit script immediately on first error.
set -e
# Function to run SQL file
run_sql_file() {
local file="$1"
PGPASSWORD=${DB_PASSWORD} psql -h "${DB_HOST}" -p "${DB_PORT}" -d "${DB_NAME}" -U "${DB_USER}" -f "$file"
}
# Flag to indicate whether to prompt for DB info
prompt_for_db_info=false
# Check if .migration_info exists and source it
if [ -f .migration_info ]; then
source .migration_info
# Check if any of the variables are empty
if [ -z "$DB_HOST" ] || [ -z "$DB_NAME" ] || [ -z "$DB_PORT" ] || [ -z "$DB_USER" ] || [ -z "$DB_PASSWORD" ]; then
prompt_for_db_info=true # Some values are empty, so prompt user for values
fi
else
prompt_for_db_info=true # No .migration_info file, so prompt user for values
fi
# If .migration_info doesn't exist or any of the variables are empty, prompt for DB info
if $prompt_for_db_info ; then
echo "Please enter the following database connection information that can be found in Supabase in Settings -> database:"
DB_HOST=$(gum input --placeholder "Host - e.g. db.<your-project>.supabase.co")
DB_NAME=$(gum input --placeholder "Database name: always postgres")
DB_PORT=$(gum input --placeholder "Port: always 5432")
DB_USER=$(gum input --placeholder "User: always postgres")
DB_PASSWORD=$(gum input --placeholder "Password: the one you used at inti" --password)
# Save the inputs in .migration_info file
echo "DB_HOST=$DB_HOST" > .migration_info
echo "DB_NAME=$DB_NAME" >> .migration_info
echo "DB_PORT=$DB_PORT" >> .migration_info
echo "DB_USER=$DB_USER" >> .migration_info
echo "DB_PASSWORD=$DB_PASSWORD" >> .migration_info
fi
# Ask user whether to create tables or run migrations
CHOICE=$(gum choose --header "Choose an option" "Create all tables (First Time)" "Run Migrations (After updating Quivr)")
if [ "$CHOICE" == "Create all tables (First Time)" ]; then
# Running the tables.sql file to create tables
run_sql_file "scripts/tables.sql"
else
# Get the last migration that was executed
LAST_MIGRATION=$(PGPASSWORD=${DB_PASSWORD} psql -h "${DB_HOST}" -p "${DB_PORT}" -d "${DB_NAME}" -U "${DB_USER}" -tAc "SELECT name FROM migrations ORDER BY executed_at DESC LIMIT 1;")
echo "Last migration executed: $LAST_MIGRATION"
# Iterate through the migration files
for file in $(ls scripts | grep -E '^[0-9]+.*\.sql$' | sort); do
MIGRATION_ID=$(basename "$file" ".sql")
# Only run migrations that are newer than the last executed migration
if [ -z "$LAST_MIGRATION" ] || [ "$MIGRATION_ID" \> "$LAST_MIGRATION" ]; then
# Run the migration
echo "Running migration $file"
run_sql_file "scripts/$file"
# And record it as having been run
PGPASSWORD=${DB_PASSWORD} psql -h "${DB_HOST}" -p "${DB_PORT}" -d "${DB_NAME}" -U "${DB_USER}" -c "INSERT INTO migrations (name) VALUES ('${MIGRATION_ID}');"
fi
done
fi
echo "Migration script completed."

View File

@ -1,6 +0,0 @@
{
"dependencies": {
"change-case": "^5.4.2"
},
"version": "0.0.194"
}

View File

@ -0,0 +1,165 @@
alter table "public"."brains" alter column "brain_type" drop default;
alter type "public"."brain_type_enum" rename to "brain_type_enum__old_version_to_be_dropped";
create type "public"."brain_type_enum" as enum ('doc', 'api', 'composite', 'integration');
create table "public"."integrations" (
"created_at" timestamp with time zone not null default now(),
"integration_name" text not null,
"integration_logo_url" text,
"connection_settings" jsonb,
"id" uuid not null default gen_random_uuid()
);
alter table "public"."integrations" enable row level security;
create table "public"."integrations_user" (
"id" bigint generated by default as identity not null,
"created_at" timestamp with time zone not null default now(),
"user_id" uuid not null,
"brain_id" uuid,
"integration_id" uuid,
"settings" jsonb,
"credentials" jsonb
);
alter table "public"."integrations_user" enable row level security;
alter table "public"."brains" alter column brain_type type "public"."brain_type_enum" using brain_type::text::"public"."brain_type_enum";
alter table "public"."brains" alter column "brain_type" set default 'doc'::brain_type_enum;
drop type "public"."brain_type_enum__old_version_to_be_dropped";
alter table "public"."brains" alter column "model" set default 'gpt-3.5-turbo-1106'::text;
alter table "public"."brains_vectors" add column "id" uuid not null default gen_random_uuid();
alter table "public"."brains_vectors" alter column "vector_id" set not null;
alter table "public"."knowledge" add column "integration" text;
alter table "public"."knowledge" add column "integration_link" text;
alter table "public"."user_settings" alter column "models" set default '["gpt-3.5-turbo-0125"]'::jsonb;
CREATE UNIQUE INDEX brains_vectors_pkey ON public.brains_vectors USING btree (id);
CREATE UNIQUE INDEX integrations_id_key ON public.integrations USING btree (id);
CREATE UNIQUE INDEX integrations_integration_name_key ON public.integrations USING btree (integration_name);
CREATE UNIQUE INDEX integrations_pkey ON public.integrations USING btree (id);
CREATE UNIQUE INDEX integrations_user_pkey ON public.integrations_user USING btree (id);
alter table "public"."brains_vectors" add constraint "brains_vectors_pkey" PRIMARY KEY using index "brains_vectors_pkey";
alter table "public"."integrations" add constraint "integrations_pkey" PRIMARY KEY using index "integrations_pkey";
alter table "public"."integrations_user" add constraint "integrations_user_pkey" PRIMARY KEY using index "integrations_user_pkey";
alter table "public"."integrations" add constraint "integrations_id_key" UNIQUE using index "integrations_id_key";
alter table "public"."integrations" add constraint "integrations_integration_name_key" UNIQUE using index "integrations_integration_name_key";
alter table "public"."integrations_user" add constraint "integrations_user_brain_id_fkey" FOREIGN KEY (brain_id) REFERENCES brains(brain_id) ON UPDATE CASCADE ON DELETE CASCADE not valid;
alter table "public"."integrations_user" validate constraint "integrations_user_brain_id_fkey";
alter table "public"."integrations_user" add constraint "integrations_user_integration_id_fkey" FOREIGN KEY (integration_id) REFERENCES integrations(id) ON UPDATE CASCADE ON DELETE CASCADE not valid;
alter table "public"."integrations_user" validate constraint "integrations_user_integration_id_fkey";
alter table "public"."integrations_user" add constraint "integrations_user_user_id_fkey" FOREIGN KEY (user_id) REFERENCES users(id) ON UPDATE CASCADE ON DELETE CASCADE not valid;
alter table "public"."integrations_user" validate constraint "integrations_user_user_id_fkey";
grant delete on table "public"."integrations" to "anon";
grant insert on table "public"."integrations" to "anon";
grant references on table "public"."integrations" to "anon";
grant select on table "public"."integrations" to "anon";
grant trigger on table "public"."integrations" to "anon";
grant truncate on table "public"."integrations" to "anon";
grant update on table "public"."integrations" to "anon";
grant delete on table "public"."integrations" to "authenticated";
grant insert on table "public"."integrations" to "authenticated";
grant references on table "public"."integrations" to "authenticated";
grant select on table "public"."integrations" to "authenticated";
grant trigger on table "public"."integrations" to "authenticated";
grant truncate on table "public"."integrations" to "authenticated";
grant update on table "public"."integrations" to "authenticated";
grant delete on table "public"."integrations" to "service_role";
grant insert on table "public"."integrations" to "service_role";
grant references on table "public"."integrations" to "service_role";
grant select on table "public"."integrations" to "service_role";
grant trigger on table "public"."integrations" to "service_role";
grant truncate on table "public"."integrations" to "service_role";
grant update on table "public"."integrations" to "service_role";
grant delete on table "public"."integrations_user" to "anon";
grant insert on table "public"."integrations_user" to "anon";
grant references on table "public"."integrations_user" to "anon";
grant select on table "public"."integrations_user" to "anon";
grant trigger on table "public"."integrations_user" to "anon";
grant truncate on table "public"."integrations_user" to "anon";
grant update on table "public"."integrations_user" to "anon";
grant delete on table "public"."integrations_user" to "authenticated";
grant insert on table "public"."integrations_user" to "authenticated";
grant references on table "public"."integrations_user" to "authenticated";
grant select on table "public"."integrations_user" to "authenticated";
grant trigger on table "public"."integrations_user" to "authenticated";
grant truncate on table "public"."integrations_user" to "authenticated";
grant update on table "public"."integrations_user" to "authenticated";
grant delete on table "public"."integrations_user" to "service_role";
grant insert on table "public"."integrations_user" to "service_role";
grant references on table "public"."integrations_user" to "service_role";
grant select on table "public"."integrations_user" to "service_role";
grant trigger on table "public"."integrations_user" to "service_role";
grant truncate on table "public"."integrations_user" to "service_role";
grant update on table "public"."integrations_user" to "service_role";

View File

@ -24,7 +24,8 @@ SET row_security = off;
INSERT INTO "auth"."audit_log_entries" ("instance_id", "id", "payload", "created_at", "ip_address") VALUES
('00000000-0000-0000-0000-000000000000', '84e89c28-6f5f-4e24-a03b-68cdaa90b3f2', '{"action":"user_signedup","actor_id":"00000000-0000-0000-0000-000000000000","actor_username":"service_role","actor_via_sso":false,"log_type":"team","traits":{"user_email":"admin@quivr.app","user_id":"39418e3b-0258-4452-af60-7acfcc1263ff","user_phone":""}}', '2024-01-22 22:27:00.164777+00', ''),
('00000000-0000-0000-0000-000000000000', 'ac1d43e6-2b2a-4af1-bdd1-c03907e7ba5a', '{"action":"login","actor_id":"39418e3b-0258-4452-af60-7acfcc1263ff","actor_username":"admin@quivr.app","actor_via_sso":false,"log_type":"account","traits":{"provider":"email"}}', '2024-01-22 22:27:50.16388+00', '');
('00000000-0000-0000-0000-000000000000', 'ac1d43e6-2b2a-4af1-bdd1-c03907e7ba5a', '{"action":"login","actor_id":"39418e3b-0258-4452-af60-7acfcc1263ff","actor_username":"admin@quivr.app","actor_via_sso":false,"log_type":"account","traits":{"provider":"email"}}', '2024-01-22 22:27:50.16388+00', ''),
('00000000-0000-0000-0000-000000000000', 'e86de23b-ea26-408e-8e8c-a97d2f1f259c', '{"action":"login","actor_id":"39418e3b-0258-4452-af60-7acfcc1263ff","actor_username":"admin@quivr.app","actor_via_sso":false,"log_type":"account","traits":{"provider":"email"}}', '2024-02-06 04:08:08.378325+00', '');
--
@ -38,7 +39,7 @@ INSERT INTO "auth"."audit_log_entries" ("instance_id", "id", "payload", "created
--
INSERT INTO "auth"."users" ("instance_id", "id", "aud", "role", "email", "encrypted_password", "email_confirmed_at", "invited_at", "confirmation_token", "confirmation_sent_at", "recovery_token", "recovery_sent_at", "email_change_token_new", "email_change", "email_change_sent_at", "last_sign_in_at", "raw_app_meta_data", "raw_user_meta_data", "is_super_admin", "created_at", "updated_at", "phone", "phone_confirmed_at", "phone_change", "phone_change_token", "phone_change_sent_at", "email_change_token_current", "email_change_confirm_status", "banned_until", "reauthentication_token", "reauthentication_sent_at", "is_sso_user", "deleted_at") VALUES
('00000000-0000-0000-0000-000000000000', '39418e3b-0258-4452-af60-7acfcc1263ff', 'authenticated', 'authenticated', 'admin@quivr.app', '$2a$10$vwKX0eMLlrOZvxQEA3Vl4e5V4/hOuxPjGYn9QK1yqeaZxa.42Uhze', '2024-01-22 22:27:00.166861+00', NULL, '', NULL, '', NULL, '', '', NULL, '2024-01-22 22:27:50.165259+00', '{"provider": "email", "providers": ["email"]}', '{}', NULL, '2024-01-22 22:27:00.158026+00', '2024-01-22 22:27:50.169826+00', NULL, NULL, '', '', NULL, '', 0, NULL, '', NULL, false, NULL);
('00000000-0000-0000-0000-000000000000', '39418e3b-0258-4452-af60-7acfcc1263ff', 'authenticated', 'authenticated', 'admin@quivr.app', '$2a$10$vwKX0eMLlrOZvxQEA3Vl4e5V4/hOuxPjGYn9QK1yqeaZxa.42Uhze', '2024-01-22 22:27:00.166861+00', NULL, '', NULL, '', NULL, '', '', NULL, '2024-02-06 04:08:08.379821+00', '{"provider": "email", "providers": ["email"]}', '{}', NULL, '2024-01-22 22:27:00.158026+00', '2024-02-06 04:08:08.38296+00', NULL, NULL, '', '', NULL, '', 0, NULL, '', NULL, false, NULL);
--
@ -60,7 +61,8 @@ INSERT INTO "auth"."identities" ("provider_id", "user_id", "identity_data", "pro
--
INSERT INTO "auth"."sessions" ("id", "user_id", "created_at", "updated_at", "factor_id", "aal", "not_after", "refreshed_at", "user_agent", "ip", "tag") VALUES
('99541374-fc9d-4f5b-b0f6-ff092a455a60', '39418e3b-0258-4452-af60-7acfcc1263ff', '2024-01-22 22:27:50.165335+00', '2024-01-22 22:27:50.165335+00', NULL, 'aal1', NULL, NULL, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', '192.168.65.1', NULL);
('99541374-fc9d-4f5b-b0f6-ff092a455a60', '39418e3b-0258-4452-af60-7acfcc1263ff', '2024-01-22 22:27:50.165335+00', '2024-01-22 22:27:50.165335+00', NULL, 'aal1', NULL, NULL, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', '192.168.65.1', NULL),
('52f4a82d-9e1f-4beb-9548-61f894563ce9', '39418e3b-0258-4452-af60-7acfcc1263ff', '2024-02-06 04:08:08.379919+00', '2024-02-06 04:08:08.379919+00', NULL, 'aal1', NULL, NULL, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', '192.168.65.1', NULL);
--
@ -68,7 +70,8 @@ INSERT INTO "auth"."sessions" ("id", "user_id", "created_at", "updated_at", "fac
--
INSERT INTO "auth"."mfa_amr_claims" ("session_id", "created_at", "updated_at", "authentication_method", "id") VALUES
('99541374-fc9d-4f5b-b0f6-ff092a455a60', '2024-01-22 22:27:50.170258+00', '2024-01-22 22:27:50.170258+00', 'password', 'ecaccc76-7e60-4cac-8472-c104837906cd');
('99541374-fc9d-4f5b-b0f6-ff092a455a60', '2024-01-22 22:27:50.170258+00', '2024-01-22 22:27:50.170258+00', 'password', 'ecaccc76-7e60-4cac-8472-c104837906cd'),
('52f4a82d-9e1f-4beb-9548-61f894563ce9', '2024-02-06 04:08:08.383754+00', '2024-02-06 04:08:08.383754+00', 'password', '481e338a-dbff-45e3-9e5f-18ae4d96a9c8');
--
@ -88,7 +91,8 @@ INSERT INTO "auth"."mfa_amr_claims" ("session_id", "created_at", "updated_at", "
--
INSERT INTO "auth"."refresh_tokens" ("instance_id", "id", "token", "user_id", "revoked", "created_at", "updated_at", "parent", "session_id") VALUES
('00000000-0000-0000-0000-000000000000', 1, 'qusbTEienvum2hAm8cjwzw', '39418e3b-0258-4452-af60-7acfcc1263ff', false, '2024-01-22 22:27:50.167806+00', '2024-01-22 22:27:50.167806+00', NULL, '99541374-fc9d-4f5b-b0f6-ff092a455a60');
('00000000-0000-0000-0000-000000000000', 1, 'qusbTEienvum2hAm8cjwzw', '39418e3b-0258-4452-af60-7acfcc1263ff', false, '2024-01-22 22:27:50.167806+00', '2024-01-22 22:27:50.167806+00', NULL, '99541374-fc9d-4f5b-b0f6-ff092a455a60'),
('00000000-0000-0000-0000-000000000000', 2, 'GgZ5ahBDWrTNx5RyrsRjIA', '39418e3b-0258-4452-af60-7acfcc1263ff', false, '2024-02-06 04:08:08.381408+00', '2024-02-06 04:08:08.381408+00', NULL, '52f4a82d-9e1f-4beb-9548-61f894563ce9');
--
@ -191,6 +195,26 @@ INSERT INTO "public"."chat_history" ("message_id", "chat_id", "user_message", "a
--
-- Data for Name: integrations; Type: TABLE DATA; Schema: public; Owner: postgres
--
--
-- Data for Name: users; Type: TABLE DATA; Schema: public; Owner: postgres
--
INSERT INTO "public"."users" ("id", "email") VALUES
('39418e3b-0258-4452-af60-7acfcc1263ff', 'admin@quivr.app');
--
-- Data for Name: integrations_user; Type: TABLE DATA; Schema: public; Owner: postgres
--
--
-- Data for Name: knowledge; Type: TABLE DATA; Schema: public; Owner: postgres
--
@ -202,7 +226,7 @@ INSERT INTO "public"."chat_history" ("message_id", "chat_id", "user_message", "a
--
INSERT INTO "public"."models" ("name", "price", "max_input", "max_output") VALUES
('gpt-3.5-turbo-1106', 1, 2000, 1000);
('gpt-3.5-turbo-0125', 1, 2000, 1000);
--
@ -244,22 +268,21 @@ INSERT INTO "public"."user_daily_usage" ("user_id", "email", "date", "daily_requ
--
INSERT INTO "public"."user_settings" ("user_id", "models", "max_brains", "max_brain_size", "is_premium", "api_access", "monthly_chat_credit") VALUES
('39418e3b-0258-4452-af60-7acfcc1263ff', '["gpt-3.5-turbo-1106"]', 3, 50000000, false, false, 100);
--
-- Data for Name: users; Type: TABLE DATA; Schema: public; Owner: postgres
--
INSERT INTO "public"."users" ("id", "email") VALUES
('39418e3b-0258-4452-af60-7acfcc1263ff', 'admin@quivr.app');
('39418e3b-0258-4452-af60-7acfcc1263ff', '["gpt-3.5-turbo-0125"]', 3, 50000000, false, false, 100);
--
-- Name: refresh_tokens_id_seq; Type: SEQUENCE SET; Schema: auth; Owner: supabase_auth_admin
--
SELECT pg_catalog.setval('"auth"."refresh_tokens_id_seq"', 1, true);
SELECT pg_catalog.setval('"auth"."refresh_tokens_id_seq"', 2, true);
--
-- Name: integrations_user_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres
--
SELECT pg_catalog.setval('"public"."integrations_user_id_seq"', 1, false);
--

View File

@ -1,223 +0,0 @@
_format_version: '2.1'
_transform: true
###
### Consumers / Users
###
consumers:
- username: DASHBOARD
- username: anon
keyauth_credentials:
- key: $SUPABASE_ANON_KEY
- username: service_role
keyauth_credentials:
- key: $SUPABASE_SERVICE_KEY
###
### Access Control List
###
acls:
- consumer: anon
group: anon
- consumer: service_role
group: admin
###
### Dashboard credentials
###
basicauth_credentials:
- consumer: DASHBOARD
username: $DASHBOARD_USERNAME
password: $DASHBOARD_PASSWORD
###
### API Routes
###
services:
## Open Auth routes
- name: auth-v1-open
url: http://auth:9999/verify
routes:
- name: auth-v1-open
strip_path: true
paths:
- /auth/v1/verify
plugins:
- name: cors
- name: auth-v1-open-callback
url: http://auth:9999/callback
routes:
- name: auth-v1-open-callback
strip_path: true
paths:
- /auth/v1/callback
plugins:
- name: cors
- name: auth-v1-open-authorize
url: http://auth:9999/authorize
routes:
- name: auth-v1-open-authorize
strip_path: true
paths:
- /auth/v1/authorize
plugins:
- name: cors
## Secure Auth routes
- name: auth-v1
_comment: 'GoTrue: /auth/v1/* -> http://auth:9999/*'
url: http://auth:9999/
routes:
- name: auth-v1-all
strip_path: true
paths:
- /auth/v1/
plugins:
- name: cors
- name: key-auth
config:
hide_credentials: false
- name: acl
config:
hide_groups_header: true
allow:
- admin
- anon
## Secure REST routes
- name: rest-v1
_comment: 'PostgREST: /rest/v1/* -> http://rest:3000/*'
url: http://rest:3000/
routes:
- name: rest-v1-all
strip_path: true
paths:
- /rest/v1/
plugins:
- name: cors
- name: key-auth
config:
hide_credentials: true
- name: acl
config:
hide_groups_header: true
allow:
- admin
- anon
## Secure GraphQL routes
- name: graphql-v1
_comment: 'PostgREST: /graphql/v1/* -> http://rest:3000/rpc/graphql'
url: http://rest:3000/rpc/graphql
routes:
- name: graphql-v1-all
strip_path: true
paths:
- /graphql/v1
plugins:
- name: cors
- name: key-auth
config:
hide_credentials: true
- name: request-transformer
config:
add:
headers:
- Content-Profile:graphql_public
- name: acl
config:
hide_groups_header: true
allow:
- admin
- anon
## Secure Realtime routes
- name: realtime-v1
_comment: 'Realtime: /realtime/v1/* -> ws://realtime:4000/socket/*'
url: http://realtime-dev.supabase-realtime:4000/socket/
routes:
- name: realtime-v1-all
strip_path: true
paths:
- /realtime/v1/
plugins:
- name: cors
- name: key-auth
config:
hide_credentials: false
- name: acl
config:
hide_groups_header: true
allow:
- admin
- anon
## Storage routes: the storage server manages its own auth
- name: storage-v1
_comment: 'Storage: /storage/v1/* -> http://storage:5000/*'
url: http://storage:5000/
routes:
- name: storage-v1-all
strip_path: true
paths:
- /storage/v1/
plugins:
- name: cors
## Edge Functions routes
- name: functions-v1
_comment: 'Edge Functions: /functions/v1/* -> http://functions:9000/*'
url: http://functions:9000/
routes:
- name: functions-v1-all
strip_path: true
paths:
- /functions/v1/
plugins:
- name: cors
## Analytics routes
- name: analytics-v1
_comment: 'Analytics: /analytics/v1/* -> http://logflare:4000/*'
url: http://analytics:4000/
routes:
- name: analytics-v1-all
strip_path: true
paths:
- /analytics/v1/
## Secure Database routes
- name: meta
_comment: 'pg-meta: /pg/* -> http://pg-meta:8080/*'
url: http://meta:8080/
routes:
- name: meta-all
strip_path: true
paths:
- /pg/
plugins:
- name: key-auth
config:
hide_credentials: false
- name: acl
config:
hide_groups_header: true
allow:
- admin
## Protected Dashboard - catch all remaining routes
- name: dashboard
_comment: 'Studio: /* -> http://studio:3000/*'
url: http://studio:3000/
routes:
- name: dashboard-all
strip_path: true
paths:
- /
plugins:
- name: cors
- name: basic-auth
config:
hide_credentials: true

View File

@ -1,5 +0,0 @@
\set jwt_secret `echo "$JWT_SECRET"`
\set jwt_exp `echo "$JWT_EXP"`
ALTER DATABASE postgres SET "app.settings.jwt_secret" TO :'jwt_secret';
ALTER DATABASE postgres SET "app.settings.jwt_exp" TO :'jwt_exp';

View File

@ -1,4 +0,0 @@
\set pguser `echo "$POSTGRES_USER"`
create schema if not exists _analytics;
alter schema _analytics owner to :pguser;

View File

@ -1,4 +0,0 @@
\set pguser `echo "$POSTGRES_USER"`
create schema if not exists _realtime;
alter schema _realtime owner to :pguser;

View File

@ -1,8 +0,0 @@
-- NOTE: change to your own passwords for production environments
\set pgpass `echo "$POSTGRES_PASSWORD"`
ALTER USER authenticator WITH PASSWORD :'pgpass';
ALTER USER pgbouncer WITH PASSWORD :'pgpass';
ALTER USER supabase_auth_admin WITH PASSWORD :'pgpass';
ALTER USER supabase_functions_admin WITH PASSWORD :'pgpass';
ALTER USER supabase_storage_admin WITH PASSWORD :'pgpass';

View File

@ -1,208 +0,0 @@
BEGIN;
-- Create pg_net extension
CREATE EXTENSION IF NOT EXISTS pg_net SCHEMA extensions;
-- Create supabase_functions schema
CREATE SCHEMA supabase_functions AUTHORIZATION supabase_admin;
GRANT USAGE ON SCHEMA supabase_functions TO postgres, anon, authenticated, service_role;
ALTER DEFAULT PRIVILEGES IN SCHEMA supabase_functions GRANT ALL ON TABLES TO postgres, anon, authenticated, service_role;
ALTER DEFAULT PRIVILEGES IN SCHEMA supabase_functions GRANT ALL ON FUNCTIONS TO postgres, anon, authenticated, service_role;
ALTER DEFAULT PRIVILEGES IN SCHEMA supabase_functions GRANT ALL ON SEQUENCES TO postgres, anon, authenticated, service_role;
-- supabase_functions.migrations definition
CREATE TABLE supabase_functions.migrations (
version text PRIMARY KEY,
inserted_at timestamptz NOT NULL DEFAULT NOW()
);
-- Initial supabase_functions migration
INSERT INTO supabase_functions.migrations (version) VALUES ('initial');
-- supabase_functions.hooks definition
CREATE TABLE supabase_functions.hooks (
id bigserial PRIMARY KEY,
hook_table_id integer NOT NULL,
hook_name text NOT NULL,
created_at timestamptz NOT NULL DEFAULT NOW(),
request_id bigint
);
CREATE INDEX supabase_functions_hooks_request_id_idx ON supabase_functions.hooks USING btree (request_id);
CREATE INDEX supabase_functions_hooks_h_table_id_h_name_idx ON supabase_functions.hooks USING btree (hook_table_id, hook_name);
COMMENT ON TABLE supabase_functions.hooks IS 'Supabase Functions Hooks: Audit trail for triggered hooks.';
CREATE FUNCTION supabase_functions.http_request()
RETURNS trigger
LANGUAGE plpgsql
AS $function$
DECLARE
request_id bigint;
payload jsonb;
url text := TG_ARGV[0]::text;
method text := TG_ARGV[1]::text;
headers jsonb DEFAULT '{}'::jsonb;
params jsonb DEFAULT '{}'::jsonb;
timeout_ms integer DEFAULT 1000;
BEGIN
IF url IS NULL OR url = 'null' THEN
RAISE EXCEPTION 'url argument is missing';
END IF;
IF method IS NULL OR method = 'null' THEN
RAISE EXCEPTION 'method argument is missing';
END IF;
IF TG_ARGV[2] IS NULL OR TG_ARGV[2] = 'null' THEN
headers = '{"Content-Type": "application/json"}'::jsonb;
ELSE
headers = TG_ARGV[2]::jsonb;
END IF;
IF TG_ARGV[3] IS NULL OR TG_ARGV[3] = 'null' THEN
params = '{}'::jsonb;
ELSE
params = TG_ARGV[3]::jsonb;
END IF;
IF TG_ARGV[4] IS NULL OR TG_ARGV[4] = 'null' THEN
timeout_ms = 1000;
ELSE
timeout_ms = TG_ARGV[4]::integer;
END IF;
CASE
WHEN method = 'GET' THEN
SELECT http_get INTO request_id FROM net.http_get(
url,
params,
headers,
timeout_ms
);
WHEN method = 'POST' THEN
payload = jsonb_build_object(
'old_record', OLD,
'record', NEW,
'type', TG_OP,
'table', TG_TABLE_NAME,
'schema', TG_TABLE_SCHEMA
);
SELECT http_post INTO request_id FROM net.http_post(
url,
payload,
params,
headers,
timeout_ms
);
ELSE
RAISE EXCEPTION 'method argument % is invalid', method;
END CASE;
INSERT INTO supabase_functions.hooks
(hook_table_id, hook_name, request_id)
VALUES
(TG_RELID, TG_NAME, request_id);
RETURN NEW;
END
$function$;
-- Supabase super admin
DO
$$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM pg_roles
WHERE rolname = 'supabase_functions_admin'
)
THEN
CREATE USER supabase_functions_admin NOINHERIT CREATEROLE LOGIN NOREPLICATION;
END IF;
END
$$;
GRANT ALL PRIVILEGES ON SCHEMA supabase_functions TO supabase_functions_admin;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA supabase_functions TO supabase_functions_admin;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA supabase_functions TO supabase_functions_admin;
ALTER USER supabase_functions_admin SET search_path = "supabase_functions";
ALTER table "supabase_functions".migrations OWNER TO supabase_functions_admin;
ALTER table "supabase_functions".hooks OWNER TO supabase_functions_admin;
ALTER function "supabase_functions".http_request() OWNER TO supabase_functions_admin;
GRANT supabase_functions_admin TO postgres;
-- Remove unused supabase_pg_net_admin role
DO
$$
BEGIN
IF EXISTS (
SELECT 1
FROM pg_roles
WHERE rolname = 'supabase_pg_net_admin'
)
THEN
REASSIGN OWNED BY supabase_pg_net_admin TO supabase_admin;
DROP OWNED BY supabase_pg_net_admin;
DROP ROLE supabase_pg_net_admin;
END IF;
END
$$;
-- pg_net grants when extension is already enabled
DO
$$
BEGIN
IF EXISTS (
SELECT 1
FROM pg_extension
WHERE extname = 'pg_net'
)
THEN
GRANT USAGE ON SCHEMA net TO supabase_functions_admin, postgres, anon, authenticated, service_role;
ALTER function net.http_get(url text, params jsonb, headers jsonb, timeout_milliseconds integer) SECURITY DEFINER;
ALTER function net.http_post(url text, body jsonb, params jsonb, headers jsonb, timeout_milliseconds integer) SECURITY DEFINER;
ALTER function net.http_get(url text, params jsonb, headers jsonb, timeout_milliseconds integer) SET search_path = net;
ALTER function net.http_post(url text, body jsonb, params jsonb, headers jsonb, timeout_milliseconds integer) SET search_path = net;
REVOKE ALL ON FUNCTION net.http_get(url text, params jsonb, headers jsonb, timeout_milliseconds integer) FROM PUBLIC;
REVOKE ALL ON FUNCTION net.http_post(url text, body jsonb, params jsonb, headers jsonb, timeout_milliseconds integer) FROM PUBLIC;
GRANT EXECUTE ON FUNCTION net.http_get(url text, params jsonb, headers jsonb, timeout_milliseconds integer) TO supabase_functions_admin, postgres, anon, authenticated, service_role;
GRANT EXECUTE ON FUNCTION net.http_post(url text, body jsonb, params jsonb, headers jsonb, timeout_milliseconds integer) TO supabase_functions_admin, postgres, anon, authenticated, service_role;
END IF;
END
$$;
-- Event trigger for pg_net
CREATE OR REPLACE FUNCTION extensions.grant_pg_net_access()
RETURNS event_trigger
LANGUAGE plpgsql
AS $$
BEGIN
IF EXISTS (
SELECT 1
FROM pg_event_trigger_ddl_commands() AS ev
JOIN pg_extension AS ext
ON ev.objid = ext.oid
WHERE ext.extname = 'pg_net'
)
THEN
GRANT USAGE ON SCHEMA net TO supabase_functions_admin, postgres, anon, authenticated, service_role;
ALTER function net.http_get(url text, params jsonb, headers jsonb, timeout_milliseconds integer) SECURITY DEFINER;
ALTER function net.http_post(url text, body jsonb, params jsonb, headers jsonb, timeout_milliseconds integer) SECURITY DEFINER;
ALTER function net.http_get(url text, params jsonb, headers jsonb, timeout_milliseconds integer) SET search_path = net;
ALTER function net.http_post(url text, body jsonb, params jsonb, headers jsonb, timeout_milliseconds integer) SET search_path = net;
REVOKE ALL ON FUNCTION net.http_get(url text, params jsonb, headers jsonb, timeout_milliseconds integer) FROM PUBLIC;
REVOKE ALL ON FUNCTION net.http_post(url text, body jsonb, params jsonb, headers jsonb, timeout_milliseconds integer) FROM PUBLIC;
GRANT EXECUTE ON FUNCTION net.http_get(url text, params jsonb, headers jsonb, timeout_milliseconds integer) TO supabase_functions_admin, postgres, anon, authenticated, service_role;
GRANT EXECUTE ON FUNCTION net.http_post(url text, body jsonb, params jsonb, headers jsonb, timeout_milliseconds integer) TO supabase_functions_admin, postgres, anon, authenticated, service_role;
END IF;
END;
$$;
COMMENT ON FUNCTION extensions.grant_pg_net_access IS 'Grants access to pg_net';
DO
$$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM pg_event_trigger
WHERE evtname = 'issue_pg_net_access'
) THEN
CREATE EVENT TRIGGER issue_pg_net_access ON ddl_command_end WHEN TAG IN ('CREATE EXTENSION')
EXECUTE PROCEDURE extensions.grant_pg_net_access();
END IF;
END
$$;
INSERT INTO supabase_functions.migrations (version) VALUES ('20210809183423_update_grants');
ALTER function supabase_functions.http_request() SECURITY DEFINER;
ALTER function supabase_functions.http_request() SET search_path = supabase_functions;
REVOKE ALL ON FUNCTION supabase_functions.http_request() FROM PUBLIC;
GRANT EXECUTE ON FUNCTION supabase_functions.http_request() TO postgres, anon, authenticated, service_role;
COMMIT;

View File

@ -1,16 +0,0 @@
// Follow this setup guide to integrate the Deno language server with your editor:
// https://deno.land/manual/getting_started/setup_your_environment
// This enables autocomplete, go to definition, etc.
import { serve } from "https://deno.land/std@0.177.1/http/server.ts"
serve(async () => {
return new Response(
`"Hello from Edge Functions!"`,
{ headers: { "Content-Type": "application/json" } },
)
})
// To invoke:
// curl 'http://localhost:<KONG_HTTP_PORT>/functions/v1/hello' \
// --header 'Authorization: Bearer <anon/service_role API key>'

View File

@ -1,94 +0,0 @@
import { serve } from 'https://deno.land/std@0.131.0/http/server.ts'
import * as jose from 'https://deno.land/x/jose@v4.14.4/index.ts'
console.log('main function started')
const JWT_SECRET = Deno.env.get('JWT_SECRET')
const VERIFY_JWT = Deno.env.get('VERIFY_JWT') === 'true'
function getAuthToken(req: Request) {
const authHeader = req.headers.get('authorization')
if (!authHeader) {
throw new Error('Missing authorization header')
}
const [bearer, token] = authHeader.split(' ')
if (bearer !== 'Bearer') {
throw new Error(`Auth header is not 'Bearer {token}'`)
}
return token
}
async function verifyJWT(jwt: string): Promise<boolean> {
const encoder = new TextEncoder()
const secretKey = encoder.encode(JWT_SECRET)
try {
await jose.jwtVerify(jwt, secretKey)
} catch (err) {
console.error(err)
return false
}
return true
}
serve(async (req: Request) => {
if (req.method !== 'OPTIONS' && VERIFY_JWT) {
try {
const token = getAuthToken(req)
const isValidJWT = await verifyJWT(token)
if (!isValidJWT) {
return new Response(JSON.stringify({ msg: 'Invalid JWT' }), {
status: 401,
headers: { 'Content-Type': 'application/json' },
})
}
} catch (e) {
console.error(e)
return new Response(JSON.stringify({ msg: e.toString() }), {
status: 401,
headers: { 'Content-Type': 'application/json' },
})
}
}
const url = new URL(req.url)
const { pathname } = url
const path_parts = pathname.split('/')
const service_name = path_parts[1]
if (!service_name || service_name === '') {
const error = { msg: 'missing function name in request' }
return new Response(JSON.stringify(error), {
status: 400,
headers: { 'Content-Type': 'application/json' },
})
}
const servicePath = `/home/deno/functions/${service_name}`
console.error(`serving the request with ${servicePath}`)
const memoryLimitMb = 150
const workerTimeoutMs = 1 * 60 * 1000
const noModuleCache = false
const importMapPath = null
const envVarsObj = Deno.env.toObject()
const envVars = Object.keys(envVarsObj).map((k) => [k, envVarsObj[k]])
try {
const worker = await EdgeRuntime.userWorkers.create({
servicePath,
memoryLimitMb,
workerTimeoutMs,
noModuleCache,
importMapPath,
envVars,
})
return await worker.fetch(req)
} catch (e) {
const error = { msg: e.toString() }
return new Response(JSON.stringify(error), {
status: 500,
headers: { 'Content-Type': 'application/json' },
})
}
})

View File

@ -1,232 +0,0 @@
api:
enabled: true
address: 0.0.0.0:9001
sources:
docker_host:
type: docker_logs
exclude_containers:
- supabase-vector
transforms:
project_logs:
type: remap
inputs:
- docker_host
source: |-
.project = "default"
.event_message = del(.message)
.appname = del(.container_name)
del(.container_created_at)
del(.container_id)
del(.source_type)
del(.stream)
del(.label)
del(.image)
del(.host)
del(.stream)
router:
type: route
inputs:
- project_logs
route:
kong: '.appname == "supabase-kong"'
auth: '.appname == "supabase-auth"'
rest: '.appname == "supabase-rest"'
realtime: '.appname == "supabase-realtime"'
storage: '.appname == "supabase-storage"'
functions: '.appname == "supabase-functions"'
db: '.appname == "supabase-db"'
# Ignores non nginx errors since they are related with kong booting up
kong_logs:
type: remap
inputs:
- router.kong
source: |-
req, err = parse_nginx_log(.event_message, "combined")
if err == null {
.timestamp = req.timestamp
.metadata.request.headers.referer = req.referer
.metadata.request.headers.user_agent = req.agent
.metadata.request.headers.cf_connecting_ip = req.client
.metadata.request.method = req.method
.metadata.request.path = req.path
.metadata.request.protocol = req.protocol
.metadata.response.status_code = req.status
}
if err != null {
abort
}
# Ignores non nginx errors since they are related with kong booting up
kong_err:
type: remap
inputs:
- router.kong
source: |-
.metadata.request.method = "GET"
.metadata.response.status_code = 200
parsed, err = parse_nginx_log(.event_message, "error")
if err == null {
.timestamp = parsed.timestamp
.severity = parsed.severity
.metadata.request.host = parsed.host
.metadata.request.headers.cf_connecting_ip = parsed.client
url, err = split(parsed.request, " ")
if err == null {
.metadata.request.method = url[0]
.metadata.request.path = url[1]
.metadata.request.protocol = url[2]
}
}
if err != null {
abort
}
# Gotrue logs are structured json strings which frontend parses directly. But we keep metadata for consistency.
auth_logs:
type: remap
inputs:
- router.auth
source: |-
parsed, err = parse_json(.event_message)
if err == null {
.metadata.timestamp = parsed.time
.metadata = merge!(.metadata, parsed)
}
# PostgREST logs are structured so we separate timestamp from message using regex
rest_logs:
type: remap
inputs:
- router.rest
source: |-
parsed, err = parse_regex(.event_message, r'^(?P<time>.*): (?P<msg>.*)$')
if err == null {
.event_message = parsed.msg
.timestamp = to_timestamp!(parsed.time)
.metadata.host = .project
}
# Realtime logs are structured so we parse the severity level using regex (ignore time because it has no date)
realtime_logs:
type: remap
inputs:
- router.realtime
source: |-
.metadata.project = del(.project)
.metadata.external_id = .metadata.project
parsed, err = parse_regex(.event_message, r'^(?P<time>\d+:\d+:\d+\.\d+) \[(?P<level>\w+)\] (?P<msg>.*)$')
if err == null {
.event_message = parsed.msg
.metadata.level = parsed.level
}
# Storage logs may contain json objects so we parse them for completeness
storage_logs:
type: remap
inputs:
- router.storage
source: |-
.metadata.project = del(.project)
.metadata.tenantId = .metadata.project
parsed, err = parse_json(.event_message)
if err == null {
.event_message = parsed.msg
.metadata.level = parsed.level
.metadata.timestamp = parsed.time
.metadata.context[0].host = parsed.hostname
.metadata.context[0].pid = parsed.pid
}
# Postgres logs some messages to stderr which we map to warning severity level
db_logs:
type: remap
inputs:
- router.db
source: |-
.metadata.host = "db-default"
.metadata.parsed.timestamp = .timestamp
parsed, err = parse_regex(.event_message, r'.*(?P<level>INFO|NOTICE|WARNING|ERROR|LOG|FATAL|PANIC?):.*', numeric_groups: true)
if err != null || parsed == null {
.metadata.parsed.error_severity = "info"
}
if parsed != null {
.metadata.parsed.error_severity = parsed.level
}
if .metadata.parsed.error_severity == "info" {
.metadata.parsed.error_severity = "log"
}
.metadata.parsed.error_severity = upcase!(.metadata.parsed.error_severity)
sinks:
logflare_auth:
type: 'http'
inputs:
- auth_logs
encoding:
codec: 'json'
method: 'post'
request:
retry_max_duration_secs: 10
uri: 'http://analytics:4000/api/logs?source_name=gotrue.logs.prod&api_key=your-super-secret-and-long-logflare-key'
logflare_realtime:
type: 'http'
inputs:
- realtime_logs
encoding:
codec: 'json'
method: 'post'
request:
retry_max_duration_secs: 10
uri: 'http://analytics:4000/api/logs?source_name=realtime.logs.prod&api_key=your-super-secret-and-long-logflare-key'
logflare_rest:
type: 'http'
inputs:
- rest_logs
encoding:
codec: 'json'
method: 'post'
request:
retry_max_duration_secs: 10
uri: 'http://analytics:4000/api/logs?source_name=postgREST.logs.prod&api_key=your-super-secret-and-long-logflare-key'
logflare_db:
type: 'http'
inputs:
- db_logs
encoding:
codec: 'json'
method: 'post'
request:
retry_max_duration_secs: 10
# We must route the sink through kong because ingesting logs before logflare is fully initialised will
# lead to broken queries from studio. This works by the assumption that containers are started in the
# following order: vector > db > logflare > kong
uri: 'http://kong:8000/analytics/v1/api/logs?source_name=postgres.logs&api_key=your-super-secret-and-long-logflare-key'
logflare_functions:
type: 'http'
inputs:
- router.functions
encoding:
codec: 'json'
method: 'post'
request:
retry_max_duration_secs: 10
uri: 'http://analytics:4000/api/logs?source_name=deno-relay-logs&api_key=your-super-secret-and-long-logflare-key'
logflare_storage:
type: 'http'
inputs:
- storage_logs
encoding:
codec: 'json'
method: 'post'
request:
retry_max_duration_secs: 10
uri: 'http://analytics:4000/api/logs?source_name=storage.logs.prod.2&api_key=your-super-secret-and-long-logflare-key'
logflare_kong:
type: 'http'
inputs:
- kong_logs
- kong_err
encoding:
codec: 'json'
method: 'post'
request:
retry_max_duration_secs: 10
uri: 'http://analytics:4000/api/logs?source_name=cloudflare.logs.prod&api_key=your-super-secret-and-long-logflare-key'