From d3c53e63539bade5cbd716edf7e9af68ba15ed08 Mon Sep 17 00:00:00 2001 From: AmineDiro Date: Mon, 22 Jul 2024 10:09:02 +0200 Subject: [PATCH] feat: move parsers quivr core (#2884) # Description - Moved `quivr-api` parser to `quivr_core.processor.implementations` by Dynamically creating classes on the fly that inherit from `ProcessorBase` - Defined a priority based based system to automagically register the "important" processor that we can import at runtime - Wrote extensive tests for the registry - Added support file extensions ### Next steps - Find a way to have correct LSP autocomplete on the dynamically generated processors - Test that processor are imported correctly based on the installed packages in environment ( using tox) ? --- backend/core/poetry.lock | 48 +- backend/core/pyproject.toml | 3 +- backend/core/quivr_core/__init__.py | 2 +- backend/core/quivr_core/brain/brain.py | 70 +-- .../core/quivr_core/brain/brain_defaults.py | 55 +++ backend/core/quivr_core/files/__init__.py | 3 + backend/core/quivr_core/files/file.py | 126 ++++++ .../processor/implementations/__init__.py | 0 .../processor/implementations/default.py | 123 ++++++ .../simple_txt_processor.py | 27 +- .../{ => implementations}/tika_processor.py | 22 +- .../quivr_core/processor/processor_base.py | 32 +- backend/core/quivr_core/processor/registry.py | 179 ++++++-- .../quivr_core/processor/txt_processor.py | 53 --- .../core/quivr_core/storage/local_storage.py | 2 +- .../core/quivr_core/storage/storage_base.py | 3 + backend/core/tests/conftest.py | 2 +- .../processor/test_default_implementations.py | 18 + backend/core/tests/processor/test_registry.py | 166 ++++++- .../processor/test_simple_txt_processor.py | 4 +- .../tests/processor/test_tika_processor.py | 2 +- .../tests/processor/test_txt_processor.py | 21 +- backend/core/tests/test_quivr_rag.py | 2 +- backend/poetry.lock | 412 +++++++++--------- 24 files changed, 930 insertions(+), 445 deletions(-) create mode 100644 backend/core/quivr_core/brain/brain_defaults.py create mode 100644 backend/core/quivr_core/files/__init__.py create mode 100644 backend/core/quivr_core/files/file.py create mode 100644 backend/core/quivr_core/processor/implementations/__init__.py create mode 100644 backend/core/quivr_core/processor/implementations/default.py rename backend/core/quivr_core/processor/{ => implementations}/simple_txt_processor.py (67%) rename backend/core/quivr_core/processor/{ => implementations}/tika_processor.py (77%) delete mode 100644 backend/core/quivr_core/processor/txt_processor.py create mode 100644 backend/core/tests/processor/test_default_implementations.py diff --git a/backend/core/poetry.lock b/backend/core/poetry.lock index c690d0c41..183bb956b 100644 --- a/backend/core/poetry.lock +++ b/backend/core/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.0 and should not be changed by hand. [[package]] name = "aiofiles" @@ -1137,18 +1137,18 @@ test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout" [[package]] name = "langchain" -version = "0.2.8" +version = "0.2.9" description = "Building applications with LLMs through composability" optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain-0.2.8-py3-none-any.whl", hash = "sha256:53e7dfe50294a14200f33bec22b4e14cb63857ccf0a5500b0d18b0fd51285d58"}, - {file = "langchain-0.2.8.tar.gz", hash = "sha256:7fecb309e3558cde4e5cf7e9ffb7c1ab3f07121c40a7ff3b0c27135f8120c296"}, + {file = "langchain-0.2.9-py3-none-any.whl", hash = "sha256:be23fcb29adbd5059944f1fed08fa575f0739d420b1c4127531e0fbf5663fcca"}, + {file = "langchain-0.2.9.tar.gz", hash = "sha256:cc326a7f6347787a19882928c324433b1a79df629bba45604b2d26495ee5d69c"}, ] [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" -langchain-core = ">=0.2.19,<0.3.0" +langchain-core = ">=0.2.20,<0.3.0" langchain-text-splitters = ">=0.2.0,<0.3.0" langsmith = ">=0.1.17,<0.2.0" numpy = [ @@ -1189,13 +1189,13 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" [[package]] name = "langchain-core" -version = "0.2.20" +version = "0.2.21" description = "Building applications with LLMs through composability" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain_core-0.2.20-py3-none-any.whl", hash = "sha256:16cc4da6f7ebf33accea7af45a70480733dc852ab291030fb6924865bd7caf76"}, - {file = "langchain_core-0.2.20.tar.gz", hash = "sha256:a66c439e085d8c75f822f7650a5551d17bada4003521173c763d875d949e4ed5"}, + {file = "langchain_core-0.2.21-py3-none-any.whl", hash = "sha256:805b1f53e0e2424b83e3673cba1c9354105c5a5e4a1d0d768b1e70d8ac0d604d"}, + {file = "langchain_core-0.2.21.tar.gz", hash = "sha256:3d1e28179a5d496b900ebef45e1471eaae9fb63fc570f89ded78b026fd08ba84"}, ] [package.dependencies] @@ -1211,17 +1211,17 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" [[package]] name = "langchain-openai" -version = "0.1.16" +version = "0.1.17" description = "An integration package connecting OpenAI and LangChain" optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain_openai-0.1.16-py3-none-any.whl", hash = "sha256:bff90e9d0be786495920a7851ae4d55247fb084d3a11d1b15bfe91904ce1cb0f"}, - {file = "langchain_openai-0.1.16.tar.gz", hash = "sha256:3dedcc785a64a488ad6a313cc59b0dccea025d27413b55cec73a20dcf3ded4da"}, + {file = "langchain_openai-0.1.17-py3-none-any.whl", hash = "sha256:30bef5574ecbbbb91b8025b2dc5a1bd81fd62157d3ad1a35d820141f31c5b443"}, + {file = "langchain_openai-0.1.17.tar.gz", hash = "sha256:c5d70ddecdcb93e146f376bdbadbb6ec69de9ac0f402cd5b83de50b655ba85ee"}, ] [package.dependencies] -langchain-core = ">=0.2.17,<0.3.0" +langchain-core = ">=0.2.20,<0.3.0" openai = ">=1.32.0,<2.0.0" tiktoken = ">=0.7,<1" @@ -1241,13 +1241,13 @@ langchain-core = ">=0.2.10,<0.3.0" [[package]] name = "langsmith" -version = "0.1.88" +version = "0.1.92" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.88-py3-none-any.whl", hash = "sha256:460ebb7de440afd150fcea8f54ca8779821f2228cd59e149e5845c9dbe06db16"}, - {file = "langsmith-0.1.88.tar.gz", hash = "sha256:28a07dec19197f4808aa2628d5a3ccafcbe14cc137aef0e607bbd128e7907821"}, + {file = "langsmith-0.1.92-py3-none-any.whl", hash = "sha256:8acb27844ff5263bde14b23425f83ee63996f4d5a8e9998cdeef07fd913137ff"}, + {file = "langsmith-0.1.92.tar.gz", hash = "sha256:681a613a4dc8c8e57c8961c347a39ffcb64d6c697e8ddde1fd8458fcfaef6c13"}, ] [package.dependencies] @@ -1562,13 +1562,13 @@ files = [ [[package]] name = "openai" -version = "1.35.14" +version = "1.36.0" description = "The official Python library for the openai API" optional = true python-versions = ">=3.7.1" files = [ - {file = "openai-1.35.14-py3-none-any.whl", hash = "sha256:adadf8c176e0b8c47ad782ed45dc20ef46438ee1f02c7103c4155cff79c8f68b"}, - {file = "openai-1.35.14.tar.gz", hash = "sha256:394ba1dfd12ecec1d634c50e512d24ff1858bbc2674ffcce309b822785a058de"}, + {file = "openai-1.36.0-py3-none-any.whl", hash = "sha256:82b74ded1fe2ea94abb19a007178bc143675f1b6903cebd63e2968d654bb0a6f"}, + {file = "openai-1.36.0.tar.gz", hash = "sha256:a124baf0e1657d6156e12248642f88489cd030be8655b69bc1c13eb50e71a93d"}, ] [package.dependencies] @@ -2003,13 +2003,13 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments [[package]] name = "pytest-asyncio" -version = "0.23.7" +version = "0.23.8" description = "Pytest support for asyncio" optional = false python-versions = ">=3.8" files = [ - {file = "pytest_asyncio-0.23.7-py3-none-any.whl", hash = "sha256:009b48127fbe44518a547bddd25611551b0e43ccdbf1e67d12479f569832c20b"}, - {file = "pytest_asyncio-0.23.7.tar.gz", hash = "sha256:5f5c72948f4c49e7db4f29f2521d4031f1c27f86e57b046126654083d4770268"}, + {file = "pytest_asyncio-0.23.8-py3-none-any.whl", hash = "sha256:50265d892689a5faefb84df80819d1ecef566eb3549cf915dfb33569359d1ce2"}, + {file = "pytest_asyncio-0.23.8.tar.gz", hash = "sha256:759b10b33a6dc61cce40a8bd5205e302978bbbcc00e279a8b61d9a6a3c82e4d3"}, ] [package.dependencies] @@ -2258,7 +2258,7 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""} name = "regex" version = "2024.5.15" description = "Alternative regular expression module, to replace re." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a81e3cfbae20378d75185171587cbf756015ccb14840702944f014e0d93ea09f"}, @@ -2554,7 +2554,7 @@ test = ["pytest", "tornado (>=4.5)", "typeguard"] name = "tiktoken" version = "0.7.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"}, @@ -2841,4 +2841,4 @@ pdf = [] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "cc08905f149df2f415e1d00010e5b89a371efdcf4059855d597b1b6e9973a536" +content-hash = "6efdd0edd1d7e89007d707fab39d72655dfc570401bfc4b6e1c5c3a7ec37529f" diff --git a/backend/core/pyproject.toml b/backend/core/pyproject.toml index ef9ef0fdf..03a1640d5 100644 --- a/backend/core/pyproject.toml +++ b/backend/core/pyproject.toml @@ -11,11 +11,12 @@ python = "^3.11" pydantic = "^2.7.4" langchain-core = "^0.2.10" httpx = "^0.27.0" +rich = "^13.7.1" +tiktoken = "^0.7.0" aiofiles = ">=23.0.0,<25.0.0" faiss-cpu = { version = "^1.8.0.post1", optional = true } langchain-community = { version = "^0.2.6", optional = true } langchain-openai = { version = "^0.1.14", optional = true } -rich = "^13.7.1" [tool.poetry.extras] base = ["langchain-community", "faiss-cpu", "langchain-openai"] diff --git a/backend/core/quivr_core/__init__.py b/backend/core/quivr_core/__init__.py index b3573845d..1fcda2808 100644 --- a/backend/core/quivr_core/__init__.py +++ b/backend/core/quivr_core/__init__.py @@ -28,7 +28,7 @@ def register_entries(): name, spec.value.replace(":", "."), errtxt=err_msg, - override=True, + append=True, ) diff --git a/backend/core/quivr_core/brain/brain.py b/backend/core/quivr_core/brain/brain.py index d5573b335..6cd726a14 100644 --- a/backend/core/quivr_core/brain/brain.py +++ b/backend/core/quivr_core/brain/brain.py @@ -14,64 +14,20 @@ from rich.panel import Panel from quivr_core.brain.info import BrainInfo, ChatHistoryInfo from quivr_core.chat import ChatHistory -from quivr_core.config import LLMEndpointConfig, RAGConfig +from quivr_core.config import RAGConfig +from quivr_core.files.file import load_qfile from quivr_core.llm import LLMEndpoint from quivr_core.models import ParsedRAGChunkResponse, ParsedRAGResponse, SearchResult from quivr_core.processor.registry import get_processor_class from quivr_core.quivr_rag import QuivrQARAG -from quivr_core.storage.file import load_qfile from quivr_core.storage.local_storage import TransparentStorage from quivr_core.storage.storage_base import StorageBase +from .brain_defaults import build_default_vectordb, default_embedder, default_llm + logger = logging.getLogger("quivr_core") -async def _build_default_vectordb( - docs: list[Document], embedder: Embeddings -) -> VectorStore: - try: - from langchain_community.vectorstores import FAISS - - logger.debug("Using Faiss-CPU as vector store.") - # TODO(@aminediro) : embedding call is not concurrent for all documents but waits - # We can actually wait on all processing - if len(docs) > 0: - vector_db = await FAISS.afrom_documents(documents=docs, embedding=embedder) - return vector_db - else: - raise ValueError("can't initialize brain without documents") - - except ImportError as e: - raise ImportError( - "Please provide a valid vector store or install quivr-core['base'] package for using the default one." - ) from e - - -def _default_embedder() -> Embeddings: - try: - from langchain_openai import OpenAIEmbeddings - - logger.debug("Loaded OpenAIEmbeddings as default LLM for brain") - embedder = OpenAIEmbeddings() - return embedder - except ImportError as e: - raise ImportError( - "Please provide a valid Embedder or install quivr-core['base'] package for using the defaultone." - ) from e - - -def _default_llm() -> LLMEndpoint: - try: - logger.debug("Loaded ChatOpenAI as default LLM for brain") - llm = LLMEndpoint.from_config(LLMEndpointConfig()) - return llm - - except ImportError as e: - raise ImportError( - "Please provide a valid BaseLLM or install quivr-core['base'] package" - ) from e - - async def process_files( storage: StorageBase, skip_file_error: bool, **processor_kwargs: dict[str, Any] ) -> list[Document]: @@ -80,6 +36,7 @@ async def process_files( try: if file.file_extension: processor_cls = get_processor_class(file.file_extension) + logger.debug(f"processing {file} using class {processor_cls.__name__}") processor = processor_cls(**processor_kwargs) docs = await processor.process_file(file) knowledge.extend(docs) @@ -171,18 +128,21 @@ class Brain: skip_file_error: bool = False, ): if llm is None: - llm = _default_llm() + llm = default_llm() if embedder is None: - embedder = _default_embedder() + embedder = default_embedder() brain_id = uuid4() # TODO: run in parallel using tasks + for path in file_paths: file = await load_qfile(brain_id, path) await storage.upload_file(file) + logger.debug(f"uploaded all files to {storage}") + # Parse files docs = await process_files( storage=storage, @@ -191,10 +151,12 @@ class Brain: # Building brain's vectordb if vector_db is None: - vector_db = await _build_default_vectordb(docs, embedder) + vector_db = await build_default_vectordb(docs, embedder) else: await vector_db.aadd_documents(docs) + logger.debug(f"added {len(docs)} chunks to vectordb") + return cls( id=brain_id, name=name, @@ -241,16 +203,16 @@ class Brain: embedder: Embeddings | None = None, ) -> Self: if llm is None: - llm = _default_llm() + llm = default_llm() if embedder is None: - embedder = _default_embedder() + embedder = default_embedder() brain_id = uuid4() # Building brain's vectordb if vector_db is None: - vector_db = await _build_default_vectordb(langchain_documents, embedder) + vector_db = await build_default_vectordb(langchain_documents, embedder) else: await vector_db.aadd_documents(langchain_documents) diff --git a/backend/core/quivr_core/brain/brain_defaults.py b/backend/core/quivr_core/brain/brain_defaults.py new file mode 100644 index 000000000..a0cf71cde --- /dev/null +++ b/backend/core/quivr_core/brain/brain_defaults.py @@ -0,0 +1,55 @@ +import logging + +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores import VectorStore + +from quivr_core.config import LLMEndpointConfig +from quivr_core.llm import LLMEndpoint + +logger = logging.getLogger("quivr_core") + + +async def build_default_vectordb( + docs: list[Document], embedder: Embeddings +) -> VectorStore: + try: + from langchain_community.vectorstores import FAISS + + logger.debug("Using Faiss-CPU as vector store.") + # TODO(@aminediro) : embedding call is usually not concurrent for all documents but waits + if len(docs) > 0: + vector_db = await FAISS.afrom_documents(documents=docs, embedding=embedder) + return vector_db + else: + raise ValueError("can't initialize brain without documents") + + except ImportError as e: + raise ImportError( + "Please provide a valid vector store or install quivr-core['base'] package for using the default one." + ) from e + + +def default_embedder() -> Embeddings: + try: + from langchain_openai import OpenAIEmbeddings + + logger.debug("Loaded OpenAIEmbeddings as default LLM for brain") + embedder = OpenAIEmbeddings() + return embedder + except ImportError as e: + raise ImportError( + "Please provide a valid Embedder or install quivr-core['base'] package for using the defaultone." + ) from e + + +def default_llm() -> LLMEndpoint: + try: + logger.debug("Loaded ChatOpenAI as default LLM for brain") + llm = LLMEndpoint.from_config(LLMEndpointConfig()) + return llm + + except ImportError as e: + raise ImportError( + "Please provide a valid BaseLLM or install quivr-core['base'] package" + ) from e diff --git a/backend/core/quivr_core/files/__init__.py b/backend/core/quivr_core/files/__init__.py new file mode 100644 index 000000000..6e070b670 --- /dev/null +++ b/backend/core/quivr_core/files/__init__.py @@ -0,0 +1,3 @@ +from .file import QuivrFile + +__all__ = ["QuivrFile"] diff --git a/backend/core/quivr_core/files/file.py b/backend/core/quivr_core/files/file.py new file mode 100644 index 000000000..3bb6f1781 --- /dev/null +++ b/backend/core/quivr_core/files/file.py @@ -0,0 +1,126 @@ +import hashlib +import mimetypes +import os +import warnings +from contextlib import asynccontextmanager +from enum import Enum +from pathlib import Path +from typing import Any, AsyncGenerator, AsyncIterable +from uuid import UUID, uuid4 + +import aiofiles + + +class FileExtension(str, Enum): + txt = ".txt" + pdf = ".pdf" + csv = ".csv" + docx = ".docx" + pptx = ".pptx" + xls = ".xls" + xlsx = ".xlsx" + md = ".md" + mdx = ".mdx" + markdown = ".markdown" + epub = ".epub" + html = ".html" + odt = ".odt" + py = ".py" + ipynb = ".ipynb" + + +def get_file_extension(file_path: Path) -> FileExtension | str: + try: + mime_type, _ = mimetypes.guess_type(file_path.name) + if mime_type: + mime_ext = mimetypes.guess_extension(mime_type) + if mime_ext: + return FileExtension(mime_ext) + return FileExtension(file_path.suffix) + except ValueError: + warnings.warn( + f"File {file_path.name} extension isn't recognized. Make sure you have registered a parser for {file_path.suffix}", + stacklevel=2, + ) + return file_path.suffix + + +async def load_qfile(brain_id: UUID, path: str | Path): + if not isinstance(path, Path): + path = Path(path) + + if not path.exists(): + raise FileExistsError(f"file {path} doesn't exist") + + file_size = os.stat(path).st_size + + async with aiofiles.open(path, mode="rb") as f: + file_md5 = hashlib.md5(await f.read()).hexdigest() + + try: + # NOTE: when loading from existing storage, file name will be uuid + id = UUID(path.name) + except ValueError: + id = uuid4() + + return QuivrFile( + id=id, + brain_id=brain_id, + path=path, + original_filename=path.name, + file_extension=get_file_extension(path), + file_size=file_size, + file_md5=file_md5, + ) + + +class QuivrFile: + __slots__ = [ + "id", + "brain_id", + "path", + "original_filename", + "file_size", + "file_extension", + "file_md5", + ] + + def __init__( + self, + id: UUID, + original_filename: str, + path: Path, + brain_id: UUID, + file_md5: str, + file_extension: FileExtension | str, + file_size: int | None = None, + ) -> None: + self.id = id + self.brain_id = brain_id + self.path = path + self.original_filename = original_filename + self.file_size = file_size + self.file_extension = file_extension + self.file_md5 = file_md5 + + def __repr__(self) -> str: + return f"QuivrFile-{self.id} original_filename:{self.original_filename}" + + @asynccontextmanager + async def open(self) -> AsyncGenerator[AsyncIterable[bytes], None]: + # TODO(@aminediro) : match on path type + f = await aiofiles.open(self.path, mode="rb") + try: + yield f + finally: + await f.close() + + @property + def metadata(self) -> dict[str, Any]: + return { + "qfile_id": self.id, + "qfile_path": self.path, + "original_file_name": self.original_filename, + "file_md4": self.file_md5, + "file_size": self.file_size, + } diff --git a/backend/core/quivr_core/processor/implementations/__init__.py b/backend/core/quivr_core/processor/implementations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/core/quivr_core/processor/implementations/default.py b/backend/core/quivr_core/processor/implementations/default.py new file mode 100644 index 000000000..33552b37c --- /dev/null +++ b/backend/core/quivr_core/processor/implementations/default.py @@ -0,0 +1,123 @@ +from typing import Any, List, Type, TypeVar + +import tiktoken +from langchain_community.document_loaders import ( + BibtexLoader, + CSVLoader, + Docx2txtLoader, + NotebookLoader, + PythonLoader, + UnstructuredEPubLoader, + UnstructuredExcelLoader, + UnstructuredFileLoader, + UnstructuredHTMLLoader, + UnstructuredMarkdownLoader, + UnstructuredPDFLoader, +) +from langchain_community.document_loaders.base import BaseLoader +from langchain_community.document_loaders.text import TextLoader +from langchain_core.documents import Document +from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter + +from quivr_core.files.file import FileExtension, QuivrFile +from quivr_core.processor.processor_base import ProcessorBase +from quivr_core.processor.splitter import SplitterConfig + +enc = tiktoken.get_encoding("cl100k_base") + + +P = TypeVar("P", bound=BaseLoader) + + +class ProcessorInit(ProcessorBase): + def __init__(self, *args, **loader_kwargs) -> None: + pass + + +# FIXME(@aminediro): +# dynamically creates Processor classes. Maybe redo this for finer control over instanciation +# processor classes are opaque as we don't know what params they would have -> not easy to have lsp completion +def _build_processor( + cls_name: str, load_cls: Type[P], cls_extensions: List[FileExtension | str] +) -> Type[ProcessorInit]: + class _Processor(ProcessorBase): + supported_extensions = cls_extensions + + def __init__( + self, + splitter: TextSplitter | None = None, + splitter_config: SplitterConfig = SplitterConfig(), + **loader_kwargs: dict[str, Any], + ) -> None: + self.loader_cls = load_cls + self.loader_kwargs = loader_kwargs + + self.splitter_config = splitter_config + + if splitter: + self.text_splitter = splitter + else: + self.text_splitter = ( + RecursiveCharacterTextSplitter.from_tiktoken_encoder( + chunk_size=splitter_config.chunk_size, + chunk_overlap=splitter_config.chunk_overlap, + ) + ) + + @property + def processor_metadata(self) -> dict[str, Any]: + return { + "processor_cls": self.loader_cls.__name__, + "splitter": self.splitter_config.model_dump(), + } + + async def process_file_inner(self, file: QuivrFile) -> list[Document]: + if "__init__" in self.loader_cls.__dict__: + # NOTE: mypy can't correctly type this as BaseLoader doesn't have a constructor method + loader = self.loader_cls(file.path, **self.loader_kwargs) # type: ignore + else: + loader = self.loader_cls() + + documents = await loader.aload() + docs = self.text_splitter.split_documents(documents) + + for doc in docs: + doc.metadata = {"chunk_size": len(enc.encode(doc.page_content))} + + return docs + + return type(cls_name, (ProcessorInit,), dict(_Processor.__dict__)) + + +CSVProcessor = _build_processor("CSVProcessor", CSVLoader, [FileExtension.csv]) +TikTokenTxtProcessor = _build_processor( + "TikTokenTxtProcessor", TextLoader, [FileExtension.txt] +) +DOCXProcessor = _build_processor("DOCXProcessor", Docx2txtLoader, [FileExtension.docx]) +XLSXProcessor = _build_processor( + "XLSXProcessor", UnstructuredExcelLoader, [FileExtension.xlsx, FileExtension.xls] +) +PPTProcessor = _build_processor( + "PPTProcessor", UnstructuredFileLoader, [FileExtension.pptx] +) +MarkdownProcessor = _build_processor( + "MarkdownProcessor", + UnstructuredMarkdownLoader, + [FileExtension.md, FileExtension.mdx, FileExtension.markdown], +) +EpubProcessor = _build_processor( + "EpubProcessor", UnstructuredEPubLoader, [FileExtension.epub] +) +BibTexProcessor = _build_processor( + "BibTexProcessor", BibtexLoader, [FileExtension.epub] +) +ODTProcessor = _build_processor( + "ODTProcessor", UnstructuredPDFLoader, [FileExtension.odt] +) +HTMLProcessor = _build_processor( + "HTMLProcessor", UnstructuredHTMLLoader, [FileExtension.html] +) +PythonProcessor = _build_processor("PythonProcessor", PythonLoader, [FileExtension.py]) +NotebookProcessor = _build_processor( + "NotebookProcessor", NotebookLoader, [FileExtension.ipynb] +) diff --git a/backend/core/quivr_core/processor/simple_txt_processor.py b/backend/core/quivr_core/processor/implementations/simple_txt_processor.py similarity index 67% rename from backend/core/quivr_core/processor/simple_txt_processor.py rename to backend/core/quivr_core/processor/implementations/simple_txt_processor.py index ba05a6281..d7dd05ad9 100644 --- a/backend/core/quivr_core/processor/simple_txt_processor.py +++ b/backend/core/quivr_core/processor/implementations/simple_txt_processor.py @@ -1,13 +1,12 @@ -from importlib.metadata import version -from uuid import uuid4 +from typing import Any import aiofiles from langchain_core.documents import Document +from quivr_core.files.file import QuivrFile from quivr_core.processor.processor_base import ProcessorBase from quivr_core.processor.registry import FileExtension from quivr_core.processor.splitter import SplitterConfig -from quivr_core.storage.file import QuivrFile def recursive_character_splitter( @@ -36,24 +35,18 @@ class SimpleTxtProcessor(ProcessorBase): super().__init__(**kwargs) self.splitter_config = splitter_config - async def process_file(self, file: QuivrFile) -> list[Document]: - self.check_supported(file) - file_metadata = file.metadata + @property + def processor_metadata(self) -> dict[str, Any]: + return { + "processor_cls": "SimpleTxtProcessor", + "splitter": self.splitter_config.model_dump(), + } + async def process_file_inner(self, file: QuivrFile) -> list[Document]: async with aiofiles.open(file.path, mode="r") as f: content = await f.read() - doc = Document( - page_content=content, - metadata={ - "id": uuid4(), - "chunk_size": len(content), - "chunk_overlap": self.splitter_config.chunk_overlap, - "parser_name": self.__class__.__name__, - "quivr_core_version": version("quivr-core"), - **file_metadata, - }, - ) + doc = Document(page_content=content) docs = recursive_character_splitter( doc, self.splitter_config.chunk_size, self.splitter_config.chunk_overlap diff --git a/backend/core/quivr_core/processor/tika_processor.py b/backend/core/quivr_core/processor/implementations/tika_processor.py similarity index 77% rename from backend/core/quivr_core/processor/tika_processor.py rename to backend/core/quivr_core/processor/implementations/tika_processor.py index 06525a999..907527d1b 100644 --- a/backend/core/quivr_core/processor/tika_processor.py +++ b/backend/core/quivr_core/processor/implementations/tika_processor.py @@ -1,15 +1,14 @@ import logging -from importlib.metadata import version from typing import AsyncIterable import httpx from langchain_core.documents import Document from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter +from quivr_core.files.file import QuivrFile from quivr_core.processor.processor_base import ProcessorBase from quivr_core.processor.registry import FileExtension from quivr_core.processor.splitter import SplitterConfig -from quivr_core.storage.file import QuivrFile logger = logging.getLogger("quivr_core") @@ -52,23 +51,16 @@ class TikaProcessor(ProcessorBase): logger.debug(f"tika url error :{e}. retrying for the {retry} time...") raise RuntimeError("can't send parse request to tika server") - async def process_file(self, file: QuivrFile) -> list[Document]: - self.check_supported(file) + @property + def processor_metadata(self): + return { + "chunk_overlap": self.splitter_config.chunk_overlap, + } + async def process_file_inner(self, file: QuivrFile) -> list[Document]: async with file.open() as f: txt = await self._send_parse_tika(f) document = Document(page_content=txt) - # Use the default splitter docs = self.text_splitter.split_documents([document]) - file_metadata = file.metadata - - for doc in docs: - doc.metadata = { - "chunk_size": len(doc.page_content), - "chunk_overlap": self.splitter_config.chunk_overlap, - "parser_name": self.__class__.__name__, - "quivr_core_version": version("quivr-core"), - **file_metadata, - } return docs diff --git a/backend/core/quivr_core/processor/processor_base.py b/backend/core/quivr_core/processor/processor_base.py index 93cd8e2e7..52ec302b5 100644 --- a/backend/core/quivr_core/processor/processor_base.py +++ b/backend/core/quivr_core/processor/processor_base.py @@ -1,8 +1,11 @@ from abc import ABC, abstractmethod +from importlib.metadata import version +from typing import Any +from uuid import uuid4 from langchain_core.documents import Document -from quivr_core.storage.file import FileExtension, QuivrFile +from quivr_core.files.file import FileExtension, QuivrFile # TODO: processors should be cached somewhere ? @@ -11,10 +14,29 @@ from quivr_core.storage.file import FileExtension, QuivrFile class ProcessorBase(ABC): supported_extensions: list[FileExtension | str] - @abstractmethod - async def process_file(self, file: QuivrFile) -> list[Document]: - raise NotImplementedError - def check_supported(self, file: QuivrFile): if file.file_extension not in self.supported_extensions: raise ValueError(f"can't process a file of type {file.file_extension}") + + @property + @abstractmethod + def processor_metadata(self) -> dict[str, Any]: + raise NotImplementedError + + async def process_file(self, file: QuivrFile) -> list[Document]: + self.check_supported(file) + docs = await self.process_file_inner(file) + for idx, doc in enumerate(docs): + doc.metadata = { + "id": uuid4(), + "chunk_index": idx, + "quivr_core_version": version("quivr-core"), + **file.metadata, + **doc.metadata, + **self.processor_metadata, + } + return docs + + @abstractmethod + async def process_file_inner(self, file: QuivrFile) -> list[Document]: + raise NotImplementedError diff --git a/backend/core/quivr_core/processor/registry.py b/backend/core/quivr_core/processor/registry.py index 516ee916c..8663fb909 100644 --- a/backend/core/quivr_core/processor/registry.py +++ b/backend/core/quivr_core/processor/registry.py @@ -1,35 +1,127 @@ import importlib +import logging import types -from typing import Type, TypedDict +from dataclasses import dataclass, field +from heapq import heappop, heappush +from typing import Type, TypeAlias -from quivr_core.storage.file import FileExtension +from quivr_core.files.file import FileExtension +from quivr_core.processor.implementations.default import ( + BibTexProcessor, + CSVProcessor, + DOCXProcessor, + EpubProcessor, + HTMLProcessor, + MarkdownProcessor, + NotebookProcessor, + ODTProcessor, + PPTProcessor, + PythonProcessor, + TikTokenTxtProcessor, + XLSXProcessor, +) from .processor_base import ProcessorBase +logger = logging.getLogger("quivr_core") + +_LOWEST_PRIORITY = 100 + _registry: dict[str, Type[ProcessorBase]] = {} -# external, read only +# external, read only. Contains the actual processors that we are imported and ready to use registry = types.MappingProxyType(_registry) -class ProcEntry(TypedDict): - cls_mod: str - err: str | None +@dataclass(order=True) +class ProcEntry: + priority: int + cls_mod: str = field(compare=False) + err: str | None = field(compare=False) +ProcMapping: TypeAlias = dict[FileExtension | str, list[ProcEntry]] + # Register based on mimetypes -known_processors: dict[FileExtension | str, ProcEntry] = { - FileExtension.txt: ProcEntry( - cls_mod="quivr_core.processor.simple_txt_processor.SimpleTxtProcessor", - err="Please install quivr_core[base] to use TikTokenTxtProcessor ", - ), - FileExtension.pdf: ProcEntry( - cls_mod="quivr_core.processor.tika_processor.TikaProcessor", - err=None, - ), +base_processors: ProcMapping = { + FileExtension.txt: [ + ProcEntry( + cls_mod="quivr_core.processor.implementations.simple_txt_processor.SimpleTxtProcessor", + err=None, + priority=_LOWEST_PRIORITY, + ) + ], + FileExtension.pdf: [ + ProcEntry( + cls_mod="quivr_core.processor.implementations.tika_processor.TikaProcessor", + err=None, + priority=_LOWEST_PRIORITY, + ) + ], } +def _append_proc_mapping( + mapping: ProcMapping, + file_ext: FileExtension | str, + cls_mod: str, + errtxt: str, + priority: int | None, +): + if file_ext in mapping: + prev_proc = heappop(mapping[file_ext]) + proc_entry = ProcEntry( + priority=priority if priority is not None else prev_proc.priority - 1, + cls_mod=cls_mod, + err=errtxt, + ) + # Repush the previous processor + heappush(mapping[file_ext], prev_proc) + heappush(mapping[file_ext], proc_entry) + else: + proc_entry = ProcEntry( + priority=priority if priority is not None else _LOWEST_PRIORITY, + cls_mod=cls_mod, + err=errtxt, + ) + mapping[file_ext] = [proc_entry] + + +def defaults_to_proc_entries( + base_processors: ProcMapping, +) -> ProcMapping: + # TODO(@aminediro) : how can a user change the order of the processor ? + # NOTE: order of this list is important as resolution of `get_processor_class` depends on it + for processor in [ + CSVProcessor, + TikTokenTxtProcessor, + DOCXProcessor, + XLSXProcessor, + PPTProcessor, + MarkdownProcessor, + EpubProcessor, + BibTexProcessor, + ODTProcessor, + HTMLProcessor, + PythonProcessor, + NotebookProcessor, + ]: + for ext in processor.supported_extensions: + ext_str = ext.value if isinstance(ext, FileExtension) else ext + _append_proc_mapping( + mapping=base_processors, + file_ext=ext, + cls_mod=f"quivr_core.processor.implementations.default.{processor.__name__}", + errtxt=f"can't import {processor.__name__}. Please install quivr-core[{ext_str}] to access {processor.__name__}", + priority=None, + ) + + return base_processors + + +known_processors = defaults_to_proc_entries(base_processors) + + def get_processor_class(file_extension: FileExtension | str) -> Type[ProcessorBase]: """Fetch processor class from registry @@ -43,43 +135,64 @@ def get_processor_class(file_extension: FileExtension | str) -> Type[ProcessorBa """ if file_extension not in registry: + # Either you registered it from module or it's in the known processors if file_extension not in known_processors: raise ValueError(f"Extension not known: {file_extension}") - entry = known_processors[file_extension] - try: - register_processor(file_extension, _import_class(entry["cls_mod"])) - except ImportError as e: - raise ImportError(entry["err"]) from e + entries = known_processors[file_extension] + while entries: + proc_entry = heappop(entries) + try: + register_processor(file_extension, _import_class(proc_entry.cls_mod)) + break + except ImportError: + logger.warn( + f"{proc_entry.err}. Falling to the next available processor for {file_extension}" + ) + if len(entries) == 0 and file_extension not in registry: + raise ImportError(f"can't find any processor for {file_extension}") cls = registry[file_extension] return cls def register_processor( - file_type: FileExtension | str, + file_ext: FileExtension | str, proc_cls: str | Type[ProcessorBase], + append: bool = True, override: bool = False, - errtxt=None, + errtxt: str | None = None, + priority: int | None = None, ): if isinstance(proc_cls, str): - if file_type in known_processors and override is False: - if proc_cls != known_processors[file_type]["cls_mod"]: + if file_ext in known_processors and append is False: + if all(proc_cls != proc.cls_mod for proc in known_processors[file_ext]): raise ValueError( - f"Processor for ({file_type}) already in the registry and override is False" + f"Processor for ({file_ext}) already in the registry and append is False" ) else: - known_processors[file_type] = ProcEntry( - cls_mod=proc_cls, - err=errtxt or f"{proc_cls} import failed for processor of {file_type}", - ) + if all(proc_cls != proc.cls_mod for proc in known_processors[file_ext]): + _append_proc_mapping( + known_processors, + file_ext=file_ext, + cls_mod=proc_cls, + errtxt=errtxt + or f"{proc_cls} import failed for processor of {file_ext}", + priority=priority, + ) + else: + logger.info(f"{proc_cls} already in registry...") + else: - if file_type in registry and override is False: - if _registry[file_type] is not proc_cls: + assert issubclass( + proc_cls, ProcessorBase + ), f"{proc_cls} should be a subclass of quivr_core.processor.ProcessorBase" + if file_ext in registry and override is False: + if _registry[file_ext] is not proc_cls: raise ValueError( - f"Processor for ({file_type}) already in the registry and override is False" + f"Processor for ({file_ext}) already in the registry and append is False" ) else: - _registry[file_type] = proc_cls + _registry[file_ext] = proc_cls def _import_class(full_mod_path: str): diff --git a/backend/core/quivr_core/processor/txt_processor.py b/backend/core/quivr_core/processor/txt_processor.py deleted file mode 100644 index 5d2266c20..000000000 --- a/backend/core/quivr_core/processor/txt_processor.py +++ /dev/null @@ -1,53 +0,0 @@ -from importlib.metadata import version -from uuid import uuid4 - -from langchain_community.document_loaders.text import TextLoader -from langchain_core.documents import Document -from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter - -from quivr_core.processor.processor_base import ProcessorBase -from quivr_core.processor.registry import FileExtension -from quivr_core.processor.splitter import SplitterConfig -from quivr_core.storage.file import QuivrFile - - -class TikTokenTxtProcessor(ProcessorBase): - supported_extensions = [FileExtension.txt] - - def __init__( - self, - splitter: TextSplitter | None = None, - splitter_config: SplitterConfig = SplitterConfig(), - ) -> None: - self.loader_cls = TextLoader - - self.splitter_config = splitter_config - - if splitter: - self.text_splitter = splitter - else: - self.text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( - chunk_size=splitter_config.chunk_size, - chunk_overlap=splitter_config.chunk_overlap, - ) - - async def process_file(self, file: QuivrFile) -> list[Document]: - self.check_supported(file) - - loader = self.loader_cls(file.path) - documents = await loader.aload() - docs = self.text_splitter.split_documents(documents) - - file_metadata = file.metadata - - for doc in docs: - doc.metadata = { - "id": uuid4(), - "chunk_size": len(doc.page_content), - "chunk_overlap": self.splitter_config.chunk_overlap, - "parser_name": self.__class__.__name__, - "quivr_core_version": version("quivr-core"), - **file_metadata, - } - - return docs diff --git a/backend/core/quivr_core/storage/local_storage.py b/backend/core/quivr_core/storage/local_storage.py index 5c32e5663..c7ebe8d52 100644 --- a/backend/core/quivr_core/storage/local_storage.py +++ b/backend/core/quivr_core/storage/local_storage.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Set from uuid import UUID -from quivr_core.storage.file import QuivrFile +from quivr_core.files.file import QuivrFile from quivr_core.storage.storage_base import StorageBase diff --git a/backend/core/quivr_core/storage/storage_base.py b/backend/core/quivr_core/storage/storage_base.py index 746501501..9fa385355 100644 --- a/backend/core/quivr_core/storage/storage_base.py +++ b/backend/core/quivr_core/storage/storage_base.py @@ -16,6 +16,9 @@ class StorageBase(ABC): ) return super().__init_subclass__(**kwargs) + def __repr__(self) -> str: + return f"storage_type: {self.name}" + @abstractmethod def nb_files(self) -> int: raise Exception("Unimplemented nb_files method") diff --git a/backend/core/tests/conftest.py b/backend/core/tests/conftest.py index d44aa7783..b362762b4 100644 --- a/backend/core/tests/conftest.py +++ b/backend/core/tests/conftest.py @@ -11,8 +11,8 @@ from langchain_core.runnables.utils import AddableDict from langchain_core.vectorstores import InMemoryVectorStore from quivr_core.config import LLMEndpointConfig +from quivr_core.files.file import FileExtension, QuivrFile from quivr_core.llm import LLMEndpoint -from quivr_core.storage.file import FileExtension, QuivrFile @pytest.fixture(scope="function") diff --git a/backend/core/tests/processor/test_default_implementations.py b/backend/core/tests/processor/test_default_implementations.py new file mode 100644 index 000000000..32d5dc513 --- /dev/null +++ b/backend/core/tests/processor/test_default_implementations.py @@ -0,0 +1,18 @@ +from langchain_community.document_loaders.base import BaseLoader + +from quivr_core.files.file import FileExtension +from quivr_core.processor.implementations.default import _build_processor +from quivr_core.processor.processor_base import ProcessorBase + + +def test___build_processor(): + cls = _build_processor("TestCLS", BaseLoader, [FileExtension.txt]) + + assert cls.__name__ == "TestCLS" + assert issubclass(cls, ProcessorBase) + assert "__init__" in cls.__dict__ + assert cls.supported_extensions == [FileExtension.txt] + proc = cls() + assert hasattr(proc, "loader_cls") + # FIXME: proper mypy typing + assert proc.loader_cls == BaseLoader # type: ignore diff --git a/backend/core/tests/processor/test_registry.py b/backend/core/tests/processor/test_registry.py index bcf49855f..86342f5df 100644 --- a/backend/core/tests/processor/test_registry.py +++ b/backend/core/tests/processor/test_registry.py @@ -1,27 +1,118 @@ +import logging +from heapq import heappop + import pytest from langchain_core.documents import Document from quivr_core import registry +from quivr_core.files.file import FileExtension, QuivrFile +from quivr_core.processor.implementations.default import TikTokenTxtProcessor +from quivr_core.processor.implementations.tika_processor import TikaProcessor from quivr_core.processor.processor_base import ProcessorBase from quivr_core.processor.registry import ( + _LOWEST_PRIORITY, + ProcEntry, + ProcMapping, + _append_proc_mapping, _import_class, + available_processors, get_processor_class, + known_processors, register_processor, ) -from quivr_core.processor.simple_txt_processor import SimpleTxtProcessor -from quivr_core.processor.tika_processor import TikaProcessor -from quivr_core.storage.file import FileExtension, QuivrFile -def test_get_processor_cls(): +# TODO : check what are the defaults without the extra [base] +@pytest.mark.base +def test_get_default_processors_cls(): cls = get_processor_class(FileExtension.txt) - assert cls == SimpleTxtProcessor + assert cls == TikTokenTxtProcessor + cls = get_processor_class(FileExtension.pdf) assert cls == TikaProcessor +def test_append_proc_mapping_empty(): + proc_mapping = {} + + _append_proc_mapping( + proc_mapping, + file_ext=FileExtension.txt, + cls_mod="test.test", + errtxt="error", + priority=None, + ) + assert len(proc_mapping) == 1 + assert len(proc_mapping[FileExtension.txt]) == 1 + assert proc_mapping[FileExtension.txt][0] == ProcEntry( + priority=_LOWEST_PRIORITY, cls_mod="test.test", err="error" + ) + + +def test_append_proc_mapping_priority(): + proc_mapping: ProcMapping = { + FileExtension.txt: [ + ProcEntry( + cls_mod="quivr_core.processor.implementations.simple_txt_processor.SimpleTxtProcessor", + err=None, + priority=_LOWEST_PRIORITY, + ) + ], + } + _append_proc_mapping( + proc_mapping, + file_ext=FileExtension.txt, + cls_mod="test.test", + errtxt="error", + priority=0, + ) + + assert len(proc_mapping[FileExtension.txt]) == 2 + # Procs are appended in order + assert heappop(proc_mapping[FileExtension.txt]) == ProcEntry( + priority=0, cls_mod="test.test", err="error" + ) + + +def test_append_proc_mapping(): + proc_mapping: ProcMapping = { + FileExtension.txt: [ + ProcEntry( + cls_mod="quivr_core.processor.implementations.simple_txt_processor.SimpleTxtProcessor", + err=None, + priority=_LOWEST_PRIORITY, + ) + ], + } + _append_proc_mapping( + proc_mapping, + file_ext=FileExtension.txt, + cls_mod="test.test", + errtxt="error", + priority=None, + ) + + assert len(proc_mapping[FileExtension.txt]) == 2 + # Procs are appended in order + assert heappop(proc_mapping[FileExtension.txt]) == ProcEntry( + priority=_LOWEST_PRIORITY - 1, cls_mod="test.test", err="error" + ) + assert heappop(proc_mapping[FileExtension.txt]) == ProcEntry( + cls_mod="quivr_core.processor.implementations.simple_txt_processor.SimpleTxtProcessor", + err=None, + priority=_LOWEST_PRIORITY, + ) + + +def test_known_processors(): + assert len(known_processors) == 15 + assert all( + ext in known_processors for ext in list(FileExtension) + ), "Some file extensions don't have a default processor" + + def test__import_class(): - mod_path = "quivr_core.processor.tika_processor.TikaProcessor" + mod_path = "quivr_core.processor.implementations.tika_processor.TikaProcessor" mod = _import_class(mod_path) assert mod == TikaProcessor @@ -34,9 +125,34 @@ def test__import_class(): _import_class(mod_path) +@pytest.mark.skip +def test_get_processor_cls_import_error(caplog): + # TODO: run this in a env without necessary processors of some type + # use caplog to get logging.warnings + with pytest.raises(ImportError): + get_processor_class(".pdf") + + def test_get_processor_cls_error(): with pytest.raises(ValueError): - get_processor_class(".docx") + get_processor_class(".sdfkj") + + +def test_register_new_proc_noappend(): + with pytest.raises(ValueError): + register_processor(FileExtension.txt, "test.", append=False) + + +def test_register_new_proc_append(caplog): + n = len(known_processors[FileExtension.txt]) + register_processor(FileExtension.txt, "test.", append=True) + assert len(known_processors[FileExtension.txt]) == n + 1 + + with caplog.at_level(logging.INFO, logger="quivr_core"): + register_processor(FileExtension.txt, "test.", append=True) + assert caplog.record_tuples == [ + ("quivr_core", logging.INFO, "test. already in registry...") + ] def test_register_new_proc(): @@ -55,13 +171,47 @@ def test_register_new_proc(): assert cls == TestProcessor +def test_register_non_processor(): + class NOTPROC: + supported_extensions = [".pdf"] + + with pytest.raises(AssertionError): + register_processor(".pdf", NOTPROC) # type: ignore + + def test_register_override_proc(): class TestProcessor(ProcessorBase): supported_extensions = [".pdf"] - async def process_file(self, file: QuivrFile) -> list[Document]: + @property + def processor_metadata(self): + return {} + + async def process_file_inner(self, file: QuivrFile) -> list[Document]: return [] register_processor(".pdf", TestProcessor, override=True) cls = get_processor_class(FileExtension.pdf) assert cls == TestProcessor + + +def test_register_override_error(): + # Register class to pdf + _ = get_processor_class(FileExtension.pdf) + + class TestProcessor(ProcessorBase): + supported_extensions = [FileExtension.pdf] + + @property + def processor_metadata(self): + return {} + + async def process_file_inner(self, file: QuivrFile) -> list[Document]: + return [] + + with pytest.raises(ValueError): + register_processor(".pdf", TestProcessor, override=False) + + +def test_available_processors(): + assert 15 == len(available_processors()) diff --git a/backend/core/tests/processor/test_simple_txt_processor.py b/backend/core/tests/processor/test_simple_txt_processor.py index 0af9cee98..126ee1cac 100644 --- a/backend/core/tests/processor/test_simple_txt_processor.py +++ b/backend/core/tests/processor/test_simple_txt_processor.py @@ -1,12 +1,12 @@ import pytest from langchain_core.documents import Document -from quivr_core.processor.simple_txt_processor import ( +from quivr_core.files.file import FileExtension +from quivr_core.processor.implementations.simple_txt_processor import ( SimpleTxtProcessor, recursive_character_splitter, ) from quivr_core.processor.splitter import SplitterConfig -from quivr_core.storage.file import FileExtension def test_recursive_character_splitter(): diff --git a/backend/core/tests/processor/test_tika_processor.py b/backend/core/tests/processor/test_tika_processor.py index e71146687..ad98d67d3 100644 --- a/backend/core/tests/processor/test_tika_processor.py +++ b/backend/core/tests/processor/test_tika_processor.py @@ -1,6 +1,6 @@ import pytest -from quivr_core.processor.tika_processor import TikaProcessor +from quivr_core.processor.implementations.tika_processor import TikaProcessor # TODO: TIKA server should be set diff --git a/backend/core/tests/processor/test_txt_processor.py b/backend/core/tests/processor/test_txt_processor.py index 8b892d32f..ca96563ed 100644 --- a/backend/core/tests/processor/test_txt_processor.py +++ b/backend/core/tests/processor/test_txt_processor.py @@ -1,11 +1,10 @@ -from importlib.metadata import version from uuid import uuid4 import pytest +from quivr_core.files.file import FileExtension, QuivrFile +from quivr_core.processor.implementations.default import TikTokenTxtProcessor from quivr_core.processor.splitter import SplitterConfig -from quivr_core.processor.txt_processor import TikTokenTxtProcessor -from quivr_core.storage.file import FileExtension, QuivrFile # TODO: TIKA server should be set @@ -25,21 +24,21 @@ def txt_qfile(temp_data_file): @pytest.mark.base @pytest.mark.asyncio async def test_process_txt(txt_qfile): - tparser = TikTokenTxtProcessor( - splitter_config=SplitterConfig(chunk_size=20, chunk_overlap=0) - ) + splitter_config = SplitterConfig(chunk_size=20, chunk_overlap=0) + tparser = TikTokenTxtProcessor(splitter_config=splitter_config) doc = await tparser.process_file(txt_qfile) assert len(doc) > 0 assert doc[0].page_content == "This is some test data." - # assert dict1.items() <= dict2.items() + print(doc[0].metadata) assert ( doc[0].metadata.items() >= { - "chunk_size": len(doc[0].page_content), - "chunk_overlap": 0, - "parser_name": tparser.__class__.__name__, - "quivr_core_version": version("quivr-core"), + "chunk_index": 0, + "original_file_name": "data.txt", + "chunk_size": 6, + "processor_cls": "TextLoader", + "splitter": {"chunk_size": 20, "chunk_overlap": 0}, **txt_qfile.metadata, }.items() ) diff --git a/backend/core/tests/test_quivr_rag.py b/backend/core/tests/test_quivr_rag.py index 31ed142a9..822191466 100644 --- a/backend/core/tests/test_quivr_rag.py +++ b/backend/core/tests/test_quivr_rag.py @@ -9,7 +9,7 @@ from quivr_core.models import ParsedRAGChunkResponse, RAGResponseMetadata from quivr_core.quivr_rag import QuivrQARAG -@pytest.fixture +@pytest.fixture(scope="function") def mock_chain_qa_stream(monkeypatch, chunks_stream_answer): class MockQAChain: async def astream(self, *args, **kwargs): diff --git a/backend/poetry.lock b/backend/poetry.lock index 7d8922044..c28d81dcc 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiofiles" @@ -413,17 +413,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.34.144" +version = "1.34.145" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.144-py3-none-any.whl", hash = "sha256:b8433d481d50b68a0162c0379c0dd4aabfc3d1ad901800beb5b87815997511c1"}, - {file = "boto3-1.34.144.tar.gz", hash = "sha256:2f3e88b10b8fcc5f6100a9d74cd28230edc9d4fa226d99dd40a3ab38ac213673"}, + {file = "boto3-1.34.145-py3-none-any.whl", hash = "sha256:69d5afb7a017d07dd6bdfb680d2912d5d369b3fafa0a45161207d9f393b14d7e"}, + {file = "boto3-1.34.145.tar.gz", hash = "sha256:ac770fb53dde1743aec56bd8e56b7ee2e2f5ad42a37825968ec4ff8428822640"}, ] [package.dependencies] -botocore = ">=1.34.144,<1.35.0" +botocore = ">=1.34.145,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -432,13 +432,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.144" +version = "1.34.145" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.144-py3-none-any.whl", hash = "sha256:a2cf26e1bf10d5917a2285e50257bc44e94a1d16574f282f3274f7a5d8d1f08b"}, - {file = "botocore-1.34.144.tar.gz", hash = "sha256:4215db28d25309d59c99507f1f77df9089e5bebbad35f6e19c7c44ec5383a3e8"}, + {file = "botocore-1.34.145-py3-none-any.whl", hash = "sha256:2e72e262de02adcb0264ac2bac159a28f55dbba8d9e52aa0308773a42950dff5"}, + {file = "botocore-1.34.145.tar.gz", hash = "sha256:edf0fb4c02186ae29b76263ac5fda18b0a085d334a310551c9984407cf1079e6"}, ] [package.dependencies] @@ -795,13 +795,13 @@ files = [ [[package]] name = "cohere" -version = "5.5.8" +version = "5.6.1" description = "" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "cohere-5.5.8-py3-none-any.whl", hash = "sha256:e1ed84b90eadd13c6a68ee28e378a0bb955f8945eadc6eb7ee126b3399cafd54"}, - {file = "cohere-5.5.8.tar.gz", hash = "sha256:84ce7666ff8fbdf4f41fb5f6ca452ab2639a514bc88967a2854a9b1b820d6ea0"}, + {file = "cohere-5.6.1-py3-none-any.whl", hash = "sha256:1c8bcd39a54622d64b83cafb865f102cd2565ce091b0856fd5ce11bf7169109a"}, + {file = "cohere-5.6.1.tar.gz", hash = "sha256:5d7efda64f0e512d4cc35aa04b17a6f74b3d8c175a99f2797991a7f31dfac349"}, ] [package.dependencies] @@ -1301,69 +1301,68 @@ stone = ">=2,<3.3.3" [[package]] name = "duckdb" -version = "0.10.3" +version = "1.0.0" description = "DuckDB in-process database" optional = false python-versions = ">=3.7.0" files = [ - {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd25cc8d001c09a19340739ba59d33e12a81ab285b7a6bed37169655e1cefb31"}, - {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f9259c637b917ca0f4c63887e8d9b35ec248f5d987c886dfc4229d66a791009"}, - {file = "duckdb-0.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b48f5f1542f1e4b184e6b4fc188f497be8b9c48127867e7d9a5f4a3e334f88b0"}, - {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e327f7a3951ea154bb56e3fef7da889e790bd9a67ca3c36afc1beb17d3feb6d6"}, - {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d8b20ed67da004b4481973f4254fd79a0e5af957d2382eac8624b5c527ec48c"}, - {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d37680b8d7be04e4709db3a66c8b3eb7ceba2a5276574903528632f2b2cc2e60"}, - {file = "duckdb-0.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d34b86d6a2a6dfe8bb757f90bfe7101a3bd9e3022bf19dbddfa4b32680d26a9"}, - {file = "duckdb-0.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:73b1cb283ca0f6576dc18183fd315b4e487a545667ffebbf50b08eb4e8cdc143"}, - {file = "duckdb-0.10.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d917dde19fcec8cadcbef1f23946e85dee626ddc133e1e3f6551f15a61a03c61"}, - {file = "duckdb-0.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46757e0cf5f44b4cb820c48a34f339a9ccf83b43d525d44947273a585a4ed822"}, - {file = "duckdb-0.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:338c14d8ac53ac4aa9ec03b6f1325ecfe609ceeb72565124d489cb07f8a1e4eb"}, - {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:651fcb429602b79a3cf76b662a39e93e9c3e6650f7018258f4af344c816dab72"}, - {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3ae3c73b98b6215dab93cc9bc936b94aed55b53c34ba01dec863c5cab9f8e25"}, - {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56429b2cfe70e367fb818c2be19f59ce2f6b080c8382c4d10b4f90ba81f774e9"}, - {file = "duckdb-0.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b46c02c2e39e3676b1bb0dc7720b8aa953734de4fd1b762e6d7375fbeb1b63af"}, - {file = "duckdb-0.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:bcd460feef56575af2c2443d7394d405a164c409e9794a4d94cb5fdaa24a0ba4"}, - {file = "duckdb-0.10.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e229a7c6361afbb0d0ab29b1b398c10921263c52957aefe3ace99b0426fdb91e"}, - {file = "duckdb-0.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:732b1d3b6b17bf2f32ea696b9afc9e033493c5a3b783c292ca4b0ee7cc7b0e66"}, - {file = "duckdb-0.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f5380d4db11fec5021389fb85d614680dc12757ef7c5881262742250e0b58c75"}, - {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:468a4e0c0b13c55f84972b1110060d1b0f854ffeb5900a178a775259ec1562db"}, - {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fa1e7ff8d18d71defa84e79f5c86aa25d3be80d7cb7bc259a322de6d7cc72da"}, - {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed1063ed97c02e9cf2e7fd1d280de2d1e243d72268330f45344c69c7ce438a01"}, - {file = "duckdb-0.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:22f2aad5bb49c007f3bfcd3e81fdedbc16a2ae41f2915fc278724ca494128b0c"}, - {file = "duckdb-0.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:8f9e2bb00a048eb70b73a494bdc868ce7549b342f7ffec88192a78e5a4e164bd"}, - {file = "duckdb-0.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a6c2fc49875b4b54e882d68703083ca6f84b27536d57d623fc872e2f502b1078"}, - {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a66c125d0c30af210f7ee599e7821c3d1a7e09208196dafbf997d4e0cfcb81ab"}, - {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99dd7a1d901149c7a276440d6e737b2777e17d2046f5efb0c06ad3b8cb066a6"}, - {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5ec3bbdb209e6095d202202893763e26c17c88293b88ef986b619e6c8b6715bd"}, - {file = "duckdb-0.10.3-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:2b3dec4ef8ed355d7b7230b40950b30d0def2c387a2e8cd7efc80b9d14134ecf"}, - {file = "duckdb-0.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:04129f94fb49bba5eea22f941f0fb30337f069a04993048b59e2811f52d564bc"}, - {file = "duckdb-0.10.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d75d67024fc22c8edfd47747c8550fb3c34fb1cbcbfd567e94939ffd9c9e3ca7"}, - {file = "duckdb-0.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f3796e9507c02d0ddbba2e84c994fae131da567ce3d9cbb4cbcd32fadc5fbb26"}, - {file = "duckdb-0.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:78e539d85ebd84e3e87ec44d28ad912ca4ca444fe705794e0de9be3dd5550c11"}, - {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a99b67ac674b4de32073e9bc604b9c2273d399325181ff50b436c6da17bf00a"}, - {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1209a354a763758c4017a1f6a9f9b154a83bed4458287af9f71d84664ddb86b6"}, - {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3b735cea64aab39b67c136ab3a571dbf834067f8472ba2f8bf0341bc91bea820"}, - {file = "duckdb-0.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:816ffb9f758ed98eb02199d9321d592d7a32a6cb6aa31930f4337eb22cfc64e2"}, - {file = "duckdb-0.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:1631184b94c3dc38b13bce4045bf3ae7e1b0ecbfbb8771eb8d751d8ffe1b59b3"}, - {file = "duckdb-0.10.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb98c35fc8dd65043bc08a2414dd9f59c680d7e8656295b8969f3f2061f26c52"}, - {file = "duckdb-0.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e75c9f5b6a92b2a6816605c001d30790f6d67ce627a2b848d4d6040686efdf9"}, - {file = "duckdb-0.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae786eddf1c2fd003466e13393b9348a44b6061af6fe7bcb380a64cac24e7df7"}, - {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9387da7b7973707b0dea2588749660dd5dd724273222680e985a2dd36787668"}, - {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:538f943bf9fa8a3a7c4fafa05f21a69539d2c8a68e557233cbe9d989ae232899"}, - {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6930608f35025a73eb94252964f9f19dd68cf2aaa471da3982cf6694866cfa63"}, - {file = "duckdb-0.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:03bc54a9cde5490918aad82d7d2a34290e3dfb78d5b889c6626625c0f141272a"}, - {file = "duckdb-0.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:372b6e3901d85108cafe5df03c872dfb6f0dbff66165a0cf46c47246c1957aa0"}, - {file = "duckdb-0.10.3.tar.gz", hash = "sha256:c5bd84a92bc708d3a6adffe1f554b94c6e76c795826daaaf482afc3d9c636971"}, + {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4a8ce2d1f9e1c23b9bab3ae4ca7997e9822e21563ff8f646992663f66d050211"}, + {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:19797670f20f430196e48d25d082a264b66150c264c1e8eae8e22c64c2c5f3f5"}, + {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b71c342090fe117b35d866a91ad6bffce61cd6ff3e0cff4003f93fc1506da0d8"}, + {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25dd69f44ad212c35ae2ea736b0e643ea2b70f204b8dff483af1491b0e2a4cec"}, + {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8da5f293ecb4f99daa9a9352c5fd1312a6ab02b464653a0c3a25ab7065c45d4d"}, + {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3207936da9967ddbb60644ec291eb934d5819b08169bc35d08b2dedbe7068c60"}, + {file = "duckdb-1.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1128d6c9c33e883b1f5df6b57c1eb46b7ab1baf2650912d77ee769aaa05111f9"}, + {file = "duckdb-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:02310d263474d0ac238646677feff47190ffb82544c018b2ff732a4cb462c6ef"}, + {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:75586791ab2702719c284157b65ecefe12d0cca9041da474391896ddd9aa71a4"}, + {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:83bb415fc7994e641344f3489e40430ce083b78963cb1057bf714ac3a58da3ba"}, + {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:bee2e0b415074e84c5a2cefd91f6b5ebeb4283e7196ba4ef65175a7cef298b57"}, + {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa5a4110d2a499312609544ad0be61e85a5cdad90e5b6d75ad16b300bf075b90"}, + {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa389e6a382d4707b5f3d1bc2087895925ebb92b77e9fe3bfb23c9b98372fdc"}, + {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ede6f5277dd851f1a4586b0c78dc93f6c26da45e12b23ee0e88c76519cbdbe0"}, + {file = "duckdb-1.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0b88cdbc0d5c3e3d7545a341784dc6cafd90fc035f17b2f04bf1e870c68456e5"}, + {file = "duckdb-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd1693cdd15375156f7fff4745debc14e5c54928589f67b87fb8eace9880c370"}, + {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:c65a7fe8a8ce21b985356ee3ec0c3d3b3b2234e288e64b4cfb03356dbe6e5583"}, + {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_universal2.whl", hash = "sha256:e5a8eda554379b3a43b07bad00968acc14dd3e518c9fbe8f128b484cf95e3d16"}, + {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:a1b6acdd54c4a7b43bd7cb584975a1b2ff88ea1a31607a2b734b17960e7d3088"}, + {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a677bb1b6a8e7cab4a19874249d8144296e6e39dae38fce66a80f26d15e670df"}, + {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:752e9d412b0a2871bf615a2ede54be494c6dc289d076974eefbf3af28129c759"}, + {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3aadb99d098c5e32d00dc09421bc63a47134a6a0de9d7cd6abf21780b678663c"}, + {file = "duckdb-1.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83b7091d4da3e9301c4f9378833f5ffe934fb1ad2b387b439ee067b2c10c8bb0"}, + {file = "duckdb-1.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:6a8058d0148b544694cb5ea331db44f6c2a00a7b03776cc4dd1470735c3d5ff7"}, + {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e40cb20e5ee19d44bc66ec99969af791702a049079dc5f248c33b1c56af055f4"}, + {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7bce1bc0de9af9f47328e24e6e7e39da30093179b1c031897c042dd94a59c8e"}, + {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8355507f7a04bc0a3666958f4414a58e06141d603e91c0fa5a7c50e49867fb6d"}, + {file = "duckdb-1.0.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:39f1a46f5a45ad2886dc9b02ce5b484f437f90de66c327f86606d9ba4479d475"}, + {file = "duckdb-1.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d29ba477b27ae41676b62c8fae8d04ee7cbe458127a44f6049888231ca58fa"}, + {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:1bea713c1925918714328da76e79a1f7651b2b503511498ccf5e007a7e67d49e"}, + {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_universal2.whl", hash = "sha256:bfe67f3bcf181edbf6f918b8c963eb060e6aa26697d86590da4edc5707205450"}, + {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:dbc6093a75242f002be1d96a6ace3fdf1d002c813e67baff52112e899de9292f"}, + {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba1881a2b11c507cee18f8fd9ef10100be066fddaa2c20fba1f9a664245cd6d8"}, + {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:445d0bb35087c522705c724a75f9f1c13f1eb017305b694d2686218d653c8142"}, + {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:224553432e84432ffb9684f33206572477049b371ce68cc313a01e214f2fbdda"}, + {file = "duckdb-1.0.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d3914032e47c4e76636ad986d466b63fdea65e37be8a6dfc484ed3f462c4fde4"}, + {file = "duckdb-1.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:af9128a2eb7e1bb50cd2c2020d825fb2946fdad0a2558920cd5411d998999334"}, + {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:dd2659a5dbc0df0de68f617a605bf12fe4da85ba24f67c08730984a0892087e8"}, + {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_universal2.whl", hash = "sha256:ac5a4afb0bc20725e734e0b2c17e99a274de4801aff0d4e765d276b99dad6d90"}, + {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:2c5a53bee3668d6e84c0536164589d5127b23d298e4c443d83f55e4150fafe61"}, + {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b980713244d7708b25ee0a73de0c65f0e5521c47a0e907f5e1b933d79d972ef6"}, + {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21cbd4f9fe7b7a56eff96c3f4d6778770dd370469ca2212eddbae5dd63749db5"}, + {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed228167c5d49888c5ef36f6f9cbf65011c2daf9dcb53ea8aa7a041ce567b3e4"}, + {file = "duckdb-1.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:46d8395fbcea7231fd5032a250b673cc99352fef349b718a23dea2c0dd2b8dec"}, + {file = "duckdb-1.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:6ad1fc1a4d57e7616944166a5f9417bdbca1ea65c490797e3786e3a42e162d8a"}, + {file = "duckdb-1.0.0.tar.gz", hash = "sha256:a2a059b77bc7d5b76ae9d88e267372deff19c291048d59450c431e166233d453"}, ] [[package]] name = "duckduckgo-search" -version = "6.2.0" +version = "6.2.1" description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine." optional = false python-versions = ">=3.8" files = [ - {file = "duckduckgo_search-6.2.0-py3-none-any.whl", hash = "sha256:54d8e2a745630fde9fa016835dcc98f551a4f56b7d851bde1706d2f221cba006"}, - {file = "duckduckgo_search-6.2.0.tar.gz", hash = "sha256:df2d31996122675a775a4327b1fffff1b14bd0541c683fcc4134fa167541c746"}, + {file = "duckduckgo_search-6.2.1-py3-none-any.whl", hash = "sha256:1a03f799b85fdfa08d5e6478624683f373b9dc35e6f145544b9cab72a4f575fa"}, + {file = "duckduckgo_search-6.2.1.tar.gz", hash = "sha256:d664ec096193e3fb43bdfae4b0ad9c04e44094b58f41998adcdd20a86ee1ed74"}, ] [package.dependencies] @@ -2097,13 +2096,13 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] [[package]] name = "gotrue" -version = "2.5.5" +version = "2.6.0" description = "Python Client Library for Supabase Auth" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "gotrue-2.5.5-py3-none-any.whl", hash = "sha256:081ee6ff53fddaad71b3ee17258a117aeca402ac43bc0b174c185483a0f4b5d6"}, - {file = "gotrue-2.5.5.tar.gz", hash = "sha256:2eb2bc63121a7775716bfb4dbc85ea928c23ebfc4481fa758aeccb955138b155"}, + {file = "gotrue-2.6.0-py3-none-any.whl", hash = "sha256:950df07fd9492fcbbda61f7230a26d76dcf9a361c362f29f6348c09a5931c6df"}, + {file = "gotrue-2.6.0.tar.gz", hash = "sha256:a01a9e7156ee9493f351b35c70663b4ba99e3e8b241730ca88e1da477ff88d11"}, ] [package.dependencies] @@ -2360,13 +2359,13 @@ files = [ [[package]] name = "huggingface-hub" -version = "0.23.4" +version = "0.24.0" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.23.4-py3-none-any.whl", hash = "sha256:3a0b957aa87150addf0cc7bd71b4d954b78e749850e1e7fb29ebbd2db64ca037"}, - {file = "huggingface_hub-0.23.4.tar.gz", hash = "sha256:35d99016433900e44ae7efe1c209164a5a81dbbcd53a52f99c281dcd7ce22431"}, + {file = "huggingface_hub-0.24.0-py3-none-any.whl", hash = "sha256:7ad92edefb93d8145c061f6df8d99df2ff85f8379ba5fac8a95aca0642afa5d7"}, + {file = "huggingface_hub-0.24.0.tar.gz", hash = "sha256:6c7092736b577d89d57b3cdfea026f1b0dc2234ae783fa0d59caf1bf7d52dfa7"}, ] [package.dependencies] @@ -2379,17 +2378,17 @@ tqdm = ">=4.42.1" typing-extensions = ">=3.7.4.3" [package.extras] -all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] cli = ["InquirerPy (==0.3.4)"] -dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] hf-transfer = ["hf-transfer (>=0.1.4)"] inference = ["aiohttp", "minijinja (>=1.0)"] -quality = ["mypy (==1.5.1)", "ruff (>=0.3.0)"] +quality = ["mypy (==1.5.1)", "ruff (>=0.5.0)"] tensorflow = ["graphviz", "pydot", "tensorflow"] tensorflow-testing = ["keras (<3.0)", "tensorflow"] -testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] -torch = ["safetensors", "torch"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["safetensors[torch]", "torch"] typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] [[package]] @@ -3012,18 +3011,18 @@ zookeeper = ["kazoo (>=2.8.0)"] [[package]] name = "langchain" -version = "0.2.8" +version = "0.2.9" description = "Building applications with LLMs through composability" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain-0.2.8-py3-none-any.whl", hash = "sha256:53e7dfe50294a14200f33bec22b4e14cb63857ccf0a5500b0d18b0fd51285d58"}, - {file = "langchain-0.2.8.tar.gz", hash = "sha256:7fecb309e3558cde4e5cf7e9ffb7c1ab3f07121c40a7ff3b0c27135f8120c296"}, + {file = "langchain-0.2.9-py3-none-any.whl", hash = "sha256:be23fcb29adbd5059944f1fed08fa575f0739d420b1c4127531e0fbf5663fcca"}, + {file = "langchain-0.2.9.tar.gz", hash = "sha256:cc326a7f6347787a19882928c324433b1a79df629bba45604b2d26495ee5d69c"}, ] [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" -langchain-core = ">=0.2.19,<0.3.0" +langchain-core = ">=0.2.20,<0.3.0" langchain-text-splitters = ">=0.2.0,<0.3.0" langsmith = ">=0.1.17,<0.2.0" numpy = [ @@ -3085,13 +3084,13 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" [[package]] name = "langchain-core" -version = "0.2.19" +version = "0.2.21" description = "Building applications with LLMs through composability" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain_core-0.2.19-py3-none-any.whl", hash = "sha256:5b3cd34395be274c89e822c84f0e03c4da14168c177a83921c5b9414ac7a0651"}, - {file = "langchain_core-0.2.19.tar.gz", hash = "sha256:13043a83e5c9ab58b9f5ce2a56896e7e88b752e8891b2958960a98e71801471e"}, + {file = "langchain_core-0.2.21-py3-none-any.whl", hash = "sha256:805b1f53e0e2424b83e3673cba1c9354105c5a5e4a1d0d768b1e70d8ac0d604d"}, + {file = "langchain_core-0.2.21.tar.gz", hash = "sha256:3d1e28179a5d496b900ebef45e1471eaae9fb63fc570f89ded78b026fd08ba84"}, ] [package.dependencies] @@ -3122,17 +3121,17 @@ langchain-core = ">=0.2.10,<0.3.0" [[package]] name = "langchain-openai" -version = "0.1.16" +version = "0.1.17" description = "An integration package connecting OpenAI and LangChain" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain_openai-0.1.16-py3-none-any.whl", hash = "sha256:bff90e9d0be786495920a7851ae4d55247fb084d3a11d1b15bfe91904ce1cb0f"}, - {file = "langchain_openai-0.1.16.tar.gz", hash = "sha256:3dedcc785a64a488ad6a313cc59b0dccea025d27413b55cec73a20dcf3ded4da"}, + {file = "langchain_openai-0.1.17-py3-none-any.whl", hash = "sha256:30bef5574ecbbbb91b8025b2dc5a1bd81fd62157d3ad1a35d820141f31c5b443"}, + {file = "langchain_openai-0.1.17.tar.gz", hash = "sha256:c5d70ddecdcb93e146f376bdbadbb6ec69de9ac0f402cd5b83de50b655ba85ee"}, ] [package.dependencies] -langchain-core = ">=0.2.17,<0.3.0" +langchain-core = ">=0.2.20,<0.3.0" openai = ">=1.32.0,<2.0.0" tiktoken = ">=0.7,<1" @@ -3166,13 +3165,13 @@ six = "*" [[package]] name = "langfuse" -version = "2.39.1" +version = "2.39.2" description = "A client library for accessing langfuse" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langfuse-2.39.1-py3-none-any.whl", hash = "sha256:a8f517150e60605cf5e52556f550aeaf8143774f7d1d6dc55e4fa9e8742c55e3"}, - {file = "langfuse-2.39.1.tar.gz", hash = "sha256:39f5ed2baa06bfe3ef4c4d102efd748931d5f17217ccc17d1b98a02e0dd72992"}, + {file = "langfuse-2.39.2-py3-none-any.whl", hash = "sha256:ef2016704c0366026b93c6613e5ea8e0a2a0bfdbee1a887aa43fb08c6a255e10"}, + {file = "langfuse-2.39.2.tar.gz", hash = "sha256:bb1264722d4968309f3608af01186e0395bcd398a30792579a5f51d5fe1f4902"}, ] [package.dependencies] @@ -3191,27 +3190,27 @@ openai = ["openai (>=0.27.8)"] [[package]] name = "langgraph" -version = "0.1.8" +version = "0.1.9" description = "Building stateful, multi-actor applications with LLMs" optional = false python-versions = "<4.0,>=3.9.0" files = [ - {file = "langgraph-0.1.8-py3-none-any.whl", hash = "sha256:cc70cc04c5cdfa0d76d2d10421554496e8827b5ad02f16ecd8b2d96ece101b47"}, - {file = "langgraph-0.1.8.tar.gz", hash = "sha256:2612e39b049ca31f29a9dc16e9cf29351881d6d1afe139b6364f53513a3a51cf"}, + {file = "langgraph-0.1.9-py3-none-any.whl", hash = "sha256:b3b5698686ae71fbf0cb2439f34d8a840f061c0e5ddc76d618674c3611ed787a"}, + {file = "langgraph-0.1.9.tar.gz", hash = "sha256:9ab6150d4b46089f8ea484fc68b1b28e0dd3adb7e383f0b8520ec04b7f6d5938"}, ] [package.dependencies] -langchain-core = ">=0.2.15,<0.3" +langchain-core = ">=0.2.19,<0.3" [[package]] name = "langsmith" -version = "0.1.86" +version = "0.1.92" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.86-py3-none-any.whl", hash = "sha256:55ed80cc6e98f9761f9b3ec3c49e01f6745d13e40bef80d9f831acabfd9a8a1e"}, - {file = "langsmith-0.1.86.tar.gz", hash = "sha256:2e66577817253327b99b727588c3173fbba217fe0ca07ac6b7cdd23fc4894104"}, + {file = "langsmith-0.1.92-py3-none-any.whl", hash = "sha256:8acb27844ff5263bde14b23425f83ee63996f4d5a8e9998cdeef07fd913137ff"}, + {file = "langsmith-0.1.92.tar.gz", hash = "sha256:681a613a4dc8c8e57c8961c347a39ffcb64d6c697e8ddde1fd8458fcfaef6c13"}, ] [package.dependencies] @@ -3258,13 +3257,13 @@ tesseract = ["pytesseract"] [[package]] name = "litellm" -version = "1.41.23" +version = "1.41.24" description = "Library to easily interface with LLM API providers" optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.41.23-py3-none-any.whl", hash = "sha256:ccfe7763e694ae43b50229fc78bb999a18507b654ec2046c55c6e2a2ea48bf96"}, - {file = "litellm-1.41.23.tar.gz", hash = "sha256:320afcd172fb936f1297ce135075e3397141cf245cdb936f01871c9d6ed56516"}, + {file = "litellm-1.41.24-py3-none-any.whl", hash = "sha256:e20047ee2789cddb704e7f27b2c123ce95093287a3c32a4e3d95daf751d71014"}, + {file = "litellm-1.41.24.tar.gz", hash = "sha256:fba8668d17830b6444c74a7bc2e22c2dcfbb1283a847e86c430a52d9ee94a8ec"}, ] [package.dependencies] @@ -3393,13 +3392,13 @@ wrapt = "*" [[package]] name = "llama-index-embeddings-openai" -version = "0.1.10" +version = "0.1.11" description = "llama-index embeddings openai integration" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_embeddings_openai-0.1.10-py3-none-any.whl", hash = "sha256:c3cfa83b537ded34d035fc172a945dd444c87fb58a89b02dfbf785b675f9f681"}, - {file = "llama_index_embeddings_openai-0.1.10.tar.gz", hash = "sha256:1bc1fc9b46773a12870c5d3097d3735d7ca33805f12462a8e35ae8a6e5ce1cf6"}, + {file = "llama_index_embeddings_openai-0.1.11-py3-none-any.whl", hash = "sha256:e20806fc4baff6b8f5274decf2c1ca7c5c737648e01865475ffada164e32e173"}, + {file = "llama_index_embeddings_openai-0.1.11.tar.gz", hash = "sha256:6025e229e375201788a9b14d6ebe470329907576cba5f6b7b832c3d68f39db30"}, ] [package.dependencies] @@ -3461,13 +3460,13 @@ query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "l [[package]] name = "llama-index-llms-openai" -version = "0.1.25" +version = "0.1.26" description = "llama-index llms openai integration" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_llms_openai-0.1.25-py3-none-any.whl", hash = "sha256:d1922ad2f2bb4697a6ee2f61793aeb2f5c5606302639559dd9bb0a1d6ab9e73f"}, - {file = "llama_index_llms_openai-0.1.25.tar.gz", hash = "sha256:49750f71d58e762a597ce639a2ccb119195c47aefa8a48c55c77be8a5cec4bc5"}, + {file = "llama_index_llms_openai-0.1.26-py3-none-any.whl", hash = "sha256:1ad8e4eb02f9410c2091749d4d9aa9db4452646b595eb5eb937edbc496fb65fe"}, + {file = "llama_index_llms_openai-0.1.26.tar.gz", hash = "sha256:08a408cd53af4cd4623dd5807be4cbbd5e5b3ca01272128cd678d667343e4d5d"}, ] [package.dependencies] @@ -3475,12 +3474,13 @@ llama-index-core = ">=0.10.24,<0.11.0" [[package]] name = "llama-index-multi-modal-llms-openai" -version = "0.1.7" +version = "0.1.8" description = "llama-index multi-modal-llms openai integration" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_multi_modal_llms_openai-0.1.7-py3-none-any.whl", hash = "sha256:a86fa4a8f8372da31b978cc28d14da75ce6a39f42b1eea90cd3ac93017644766"}, + {file = "llama_index_multi_modal_llms_openai-0.1.8-py3-none-any.whl", hash = "sha256:16ae72ac3c5201ebd1d4b62203930c1768149ec85c3e477e5e51ed2ef8db1067"}, + {file = "llama_index_multi_modal_llms_openai-0.1.8.tar.gz", hash = "sha256:5e2c94a6415a2509cad035ccea34461959ae327a5900d3e820417e9ebb9a13ec"}, ] [package.dependencies] @@ -3556,13 +3556,13 @@ llama-parse = ">=0.4.0" [[package]] name = "llama-parse" -version = "0.4.7" +version = "0.4.9" description = "Parse files into RAG-Optimized formats." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_parse-0.4.7-py3-none-any.whl", hash = "sha256:666041aa55f1af1e2ed4e86b482b623defcb4b12a2a874acf1812ec59eefdb80"}, - {file = "llama_parse-0.4.7.tar.gz", hash = "sha256:d02220ba6ed55b1b654ab8b0570f00062222911bd16c93a58f71f7b128b7481e"}, + {file = "llama_parse-0.4.9-py3-none-any.whl", hash = "sha256:71974a57a73d642608cc406942bee4e7fc1a713fa410f51df67da509479ba544"}, + {file = "llama_parse-0.4.9.tar.gz", hash = "sha256:657f8fa5f7d399f14c0454fc05cae6034da0373f191df6cfca17a1b4a704ef87"}, ] [package.dependencies] @@ -4039,13 +4039,13 @@ tests = ["pytest (>=4.6)"] [[package]] name = "msal" -version = "1.29.0" +version = "1.30.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." optional = false python-versions = ">=3.7" files = [ - {file = "msal-1.29.0-py3-none-any.whl", hash = "sha256:6b301e63f967481f0cc1a3a3bac0cf322b276855bc1b0955468d9deb3f33d511"}, - {file = "msal-1.29.0.tar.gz", hash = "sha256:8f6725f099752553f9b2fe84125e2a5ebe47b49f92eacca33ebedd3a9ebaae25"}, + {file = "msal-1.30.0-py3-none-any.whl", hash = "sha256:423872177410cb61683566dc3932db7a76f661a5d2f6f52f02a047f101e1c1de"}, + {file = "msal-1.30.0.tar.gz", hash = "sha256:b4bf00850092e465157d814efa24a18f788284c9a479491024d62903085ea2fb"}, ] [package.dependencies] @@ -4661,13 +4661,13 @@ sympy = "*" [[package]] name = "openai" -version = "1.35.14" +version = "1.36.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.35.14-py3-none-any.whl", hash = "sha256:adadf8c176e0b8c47ad782ed45dc20ef46438ee1f02c7103c4155cff79c8f68b"}, - {file = "openai-1.35.14.tar.gz", hash = "sha256:394ba1dfd12ecec1d634c50e512d24ff1858bbc2674ffcce309b822785a058de"}, + {file = "openai-1.36.0-py3-none-any.whl", hash = "sha256:82b74ded1fe2ea94abb19a007178bc143675f1b6903cebd63e2968d654bb0a6f"}, + {file = "openai-1.36.0.tar.gz", hash = "sha256:a124baf0e1657d6156e12248642f88489cd030be8655b69bc1c13eb50e71a93d"}, ] [package.dependencies] @@ -4871,18 +4871,18 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] [[package]] name = "pandasai" -version = "2.2.10" +version = "2.2.12" description = "Chat with your database (SQL, CSV, pandas, polars, mongodb, noSQL, etc). PandasAI makes data analysis conversational using LLMs (GPT 3.5 / 4, Anthropic, VertexAI) and RAG." optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,!=3.8.*,>=3.9" files = [ - {file = "pandasai-2.2.10-py3-none-any.whl", hash = "sha256:72ce173e9265892d6424a6540b4d2a75cbbf0b0170435c541ab5f18f4724afef"}, - {file = "pandasai-2.2.10.tar.gz", hash = "sha256:022a6ed3c39558b03d61eac1fdf0de77c2dca7357dd1d36f251ba30460095252"}, + {file = "pandasai-2.2.12-py3-none-any.whl", hash = "sha256:4d84c441347f440cd0077bbd19ccaf2cdbf98b1ea03d521aea15cf19de454670"}, + {file = "pandasai-2.2.12.tar.gz", hash = "sha256:d4c4c32bf853f2198c58d697cb3e4ff183544c9051dd12e0edc147105729daa7"}, ] [package.dependencies] astor = ">=0.8.1,<0.9.0" -duckdb = "<1" +duckdb = ">=1.0.0,<2.0.0" faker = ">=19.12.0,<20.0.0" jinja2 = ">=3.1.3,<4.0.0" matplotlib = ">=3.7.1,<4.0.0" @@ -5286,18 +5286,18 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p [[package]] name = "postgrest" -version = "0.16.8" +version = "0.16.9" description = "PostgREST client for Python. This library provides an ORM interface to PostgREST." optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "postgrest-0.16.8-py3-none-any.whl", hash = "sha256:c353a24452f51ab9760cf2b884c4b7457a2653ff36444e66b12615bc4cc8e23e"}, - {file = "postgrest-0.16.8.tar.gz", hash = "sha256:7b3802a514dc1e0fc8b5bbdeb2c99af35a0bd910e4ddb17855ca4e3422350c84"}, + {file = "postgrest-0.16.9-py3-none-any.whl", hash = "sha256:8a20a256e86c4181575d271ddd77152b305313890ecc7d2df5b25aeb330bd9a4"}, + {file = "postgrest-0.16.9.tar.gz", hash = "sha256:fee42e89d265e904e823d9602803980016128ff7dde0ce1e869014cf1fd2c19d"}, ] [package.dependencies] deprecation = ">=2.1.0,<3.0.0" -httpx = ">=0.24,<0.28" +httpx = {version = ">=0.24,<0.28", extras = ["http2"]} pydantic = ">=1.9,<3.0" strenum = ">=0.4.9,<0.5.0" @@ -5542,65 +5542,57 @@ files = [ [package.extras] tests = ["pytest"] -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] - [[package]] name = "pyarrow" -version = "16.1.0" +version = "17.0.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" files = [ - {file = "pyarrow-16.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9"}, - {file = "pyarrow-16.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a"}, - {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef"}, - {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848"}, - {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c"}, - {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd"}, - {file = "pyarrow-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff"}, - {file = "pyarrow-16.1.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c"}, - {file = "pyarrow-16.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c"}, - {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6"}, - {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147"}, - {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e"}, - {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b"}, - {file = "pyarrow-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b"}, - {file = "pyarrow-16.1.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f"}, - {file = "pyarrow-16.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a"}, - {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c"}, - {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2"}, - {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628"}, - {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7"}, - {file = "pyarrow-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444"}, - {file = "pyarrow-16.1.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f"}, - {file = "pyarrow-16.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f"}, - {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2"}, - {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83"}, - {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15"}, - {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed"}, - {file = "pyarrow-16.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710"}, - {file = "pyarrow-16.1.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55"}, - {file = "pyarrow-16.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e"}, - {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4"}, - {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5"}, - {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa"}, - {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3"}, - {file = "pyarrow-16.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a"}, - {file = "pyarrow-16.1.0.tar.gz", hash = "sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315"}, + {file = "pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07"}, + {file = "pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655"}, + {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545"}, + {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2"}, + {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8"}, + {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047"}, + {file = "pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087"}, + {file = "pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977"}, + {file = "pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3"}, + {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15"}, + {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597"}, + {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420"}, + {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4"}, + {file = "pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03"}, + {file = "pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22"}, + {file = "pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053"}, + {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a"}, + {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc"}, + {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a"}, + {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b"}, + {file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"}, + {file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"}, + {file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"}, + {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"}, + {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"}, + {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"}, + {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"}, + {file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"}, + {file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"}, + {file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"}, + {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"}, + {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"}, + {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"}, + {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"}, + {file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"}, + {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"}, ] [package.dependencies] numpy = ">=1.16.6" +[package.extras] +test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] + [[package]] name = "pyarrow-hotfix" version = "0.6" @@ -6083,7 +6075,6 @@ python-versions = ">=3.8" files = [ {file = "PyMuPDFb-1.24.6-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:21e3ed890f736def68b9a031122ae1fb854d5cb9a53aa144b6e2ca3092416a6b"}, {file = "PyMuPDFb-1.24.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8704d2dfadc9448ce184597d8b0f9c30143e379ac948a517f9c4db7c0c71ed51"}, - {file = "PyMuPDFb-1.24.6-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:01662584d5cfa7a91f77585f13fc23a12291cfd76a57e0a28dd5a56bf521cb2c"}, {file = "PyMuPDFb-1.24.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1f7657353529ae3f88575c83ee49eac9adea311a034b9c97248a65cee7df0e5"}, {file = "PyMuPDFb-1.24.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cebc2cedb870d1e1168e2f502eb06f05938f6df69103b0853a2b329611ec19a7"}, {file = "PyMuPDFb-1.24.6-py3-none-win32.whl", hash = "sha256:ac4b865cd1e239db04674f85e02844a0e405f8255ee7a74dfee0d86aad0d3576"}, @@ -6225,13 +6216,13 @@ dev = ["pytest (>=8.1.1)"] [[package]] name = "pyright" -version = "1.1.371" +version = "1.1.372" description = "Command line wrapper for pyright" optional = false python-versions = ">=3.7" files = [ - {file = "pyright-1.1.371-py3-none-any.whl", hash = "sha256:cce52e42ff73943243e7e5e24f2a59dee81b97d99f4e3cf97370b27e8a1858cd"}, - {file = "pyright-1.1.371.tar.gz", hash = "sha256:777b508b92dda2db476214c400ce043aad8d8f3dd0e10d284c96e79f298308b5"}, + {file = "pyright-1.1.372-py3-none-any.whl", hash = "sha256:25b15fb8967740f0949fd35b963777187f0a0404c0bd753cc966ec139f3eaa0b"}, + {file = "pyright-1.1.372.tar.gz", hash = "sha256:a9f5e0daa955daaa17e3d1ef76d3623e75f8afd5e37b437d3ff84d5b38c15420"}, ] [package.dependencies] @@ -6288,13 +6279,13 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments [[package]] name = "pytest-asyncio" -version = "0.23.7" +version = "0.23.8" description = "Pytest support for asyncio" optional = false python-versions = ">=3.8" files = [ - {file = "pytest_asyncio-0.23.7-py3-none-any.whl", hash = "sha256:009b48127fbe44518a547bddd25611551b0e43ccdbf1e67d12479f569832c20b"}, - {file = "pytest_asyncio-0.23.7.tar.gz", hash = "sha256:5f5c72948f4c49e7db4f29f2521d4031f1c27f86e57b046126654083d4770268"}, + {file = "pytest_asyncio-0.23.8-py3-none-any.whl", hash = "sha256:50265d892689a5faefb84df80819d1ecef566eb3549cf915dfb33569359d1ce2"}, + {file = "pytest_asyncio-0.23.8.tar.gz", hash = "sha256:759b10b33a6dc61cce40a8bd5205e302978bbbcc00e279a8b61d9a6a3c82e4d3"}, ] [package.dependencies] @@ -6306,23 +6297,23 @@ testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] [[package]] name = "pytest-celery" -version = "1.0.0" +version = "1.0.1" description = "Pytest plugin for Celery" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "pytest_celery-1.0.0-py3-none-any.whl", hash = "sha256:c10bc7d16daa3ae4a5784efcbd1855d610c0e087c21d185e52fa018b3a6c4249"}, - {file = "pytest_celery-1.0.0.tar.gz", hash = "sha256:17a066b1554d4fa8797d4928e8b8cda1bfb441dae4688ca29fdbde28ffa49ff7"}, + {file = "pytest_celery-1.0.1-py3-none-any.whl", hash = "sha256:8f0068f0b5deb3123c76ae56327d40ece488c622daee54b3c5ff968c503df841"}, + {file = "pytest_celery-1.0.1.tar.gz", hash = "sha256:8ab12f2f16946e131c315efce2d71fa3b74a05269077fde04f96a6048b249377"}, ] [package.dependencies] celery = "*" debugpy = ">=1.8.1,<2.0.0" -docker = ">=7.0.0,<8.0.0" +docker = ">=7.1.0,<8.0.0" psutil = ">=5.9.7" pytest-docker-tools = ">=3.1.3" -retry = ">=0.9.2" setuptools = ">=69.1.0" +tenacity = ">=8.5.0" [package.extras] all = ["python-memcached", "redis"] @@ -6777,7 +6768,7 @@ url = "api" [[package]] name = "quivr-core" -version = "0.0.9" +version = "0.0.10" description = "Quivr core RAG package" optional = false python-versions = "^3.11" @@ -6792,6 +6783,8 @@ langchain-community = {version = "^0.2.6", optional = true} langchain-core = "^0.2.10" langchain-openai = {version = "^0.1.14", optional = true} pydantic = "^2.7.4" +rich = "^13.7.1" +tiktoken = "^0.7.0" [package.extras] base = ["faiss-cpu (>=1.8.0.post1,<2.0.0)", "langchain-community (>=0.2.6,<0.3.0)", "langchain-openai (>=0.1.14,<0.2.0)"] @@ -7152,21 +7145,6 @@ files = [ requests = ">=2.31.0" typing-extensions = "*" -[[package]] -name = "retry" -version = "0.9.2" -description = "Easy to use retry decorator." -optional = false -python-versions = "*" -files = [ - {file = "retry-0.9.2-py2.py3-none-any.whl", hash = "sha256:ccddf89761fa2c726ab29391837d4327f819ea14d244c232a1d24c67a2f98606"}, - {file = "retry-0.9.2.tar.gz", hash = "sha256:f8bfa8b99b69c4506d6f5bd3b0aabf77f98cdb17f3c9fc3f5ca820033336fba4"}, -] - -[package.dependencies] -decorator = ">=3.4.2" -py = ">=1.4.26,<2.0.0" - [[package]] name = "rich" version = "13.7.1" @@ -7567,18 +7545,19 @@ tornado = ["tornado (>=6)"] [[package]] name = "setuptools" -version = "70.3.0" +version = "71.0.3" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-70.3.0-py3-none-any.whl", hash = "sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc"}, - {file = "setuptools-70.3.0.tar.gz", hash = "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5"}, + {file = "setuptools-71.0.3-py3-none-any.whl", hash = "sha256:f501b6e6db709818dc76882582d9c516bf3b67b948864c5fa1d1624c09a49207"}, + {file = "setuptools-71.0.3.tar.gz", hash = "sha256:3d8531791a27056f4a38cd3e54084d8b1c4228ff9cf3f2d7dd075ec99f9fd70d"}, ] [package.extras] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.10.0)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (<7.4)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.10.0)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "sgmllib3k" @@ -7734,13 +7713,13 @@ sqlcipher = ["sqlcipher3_binary"] [[package]] name = "sqlglot" -version = "25.5.1" +version = "25.6.1" description = "An easily customizable SQL parser and transpiler" optional = false python-versions = ">=3.7" files = [ - {file = "sqlglot-25.5.1-py3-none-any.whl", hash = "sha256:80019318158069edc11e6892f74c696e5579a5588da2a0ce11dd7e215a3cb318"}, - {file = "sqlglot-25.5.1.tar.gz", hash = "sha256:c167eac4536dd6ed202fee5107b76b8cb05db85550a47e8448bf6452c4780158"}, + {file = "sqlglot-25.6.1-py3-none-any.whl", hash = "sha256:c1fcbaa00429979f16fb8cea20279a8b3f5312e76d97abb8f8c6a9b21be450d7"}, + {file = "sqlglot-25.6.1.tar.gz", hash = "sha256:ea40f3bf8452e2c1a696fe120163190bd67e49b346336e7db6d34400b57b7601"}, ] [package.dependencies] @@ -7932,13 +7911,13 @@ files = [ [[package]] name = "supabase" -version = "2.5.1" +version = "2.5.3" description = "Supabase client for Python." optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "supabase-2.5.1-py3-none-any.whl", hash = "sha256:74a1f24f04fede1967ef084b50dea688228f7b10eb2f9d73350fe2251a865188"}, - {file = "supabase-2.5.1.tar.gz", hash = "sha256:c50e0eba5b03de3abd5ac0f887957ca43558ba44c4d17bb44e73ec454b41734c"}, + {file = "supabase-2.5.3-py3-none-any.whl", hash = "sha256:b5e9711f5cc0f5c5f96c4e2b4f043641ccfb63125b3d33eae9c26c9d37bf6e29"}, + {file = "supabase-2.5.3.tar.gz", hash = "sha256:71570226d0399d7f7203a08c44b734d6b873c7c3cc0dbe74fd9cdc8b4513c2bb"}, ] [package.dependencies] @@ -7965,13 +7944,13 @@ httpx = {version = ">=0.24,<0.28", extras = ["http2"]} [[package]] name = "sympy" -version = "1.13.0" +version = "1.13.1" description = "Computer algebra system (CAS) in Python" optional = false python-versions = ">=3.8" files = [ - {file = "sympy-1.13.0-py3-none-any.whl", hash = "sha256:6b0b32a4673fb91bd3cac3b55406c8e01d53ae22780be467301cc452f6680c92"}, - {file = "sympy-1.13.0.tar.gz", hash = "sha256:3b6af8f4d008b9a1a6a4268b335b984b23835f26d1d60b0526ebc71d48a25f57"}, + {file = "sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8"}, + {file = "sympy-1.13.1.tar.gz", hash = "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f"}, ] [package.dependencies] @@ -7996,13 +7975,13 @@ widechars = ["wcwidth"] [[package]] name = "tavily-python" -version = "0.3.4" +version = "0.3.5" description = "Python wrapper for the Tavily API" optional = false python-versions = ">=3.6" files = [ - {file = "tavily_python-0.3.4-py3-none-any.whl", hash = "sha256:7fd41ce5422ccfeade8dab564f5c524b0cc683cc02cbc7bc9625a3275c31355a"}, - {file = "tavily_python-0.3.4.tar.gz", hash = "sha256:acc3bb08668d05ee2decf94dba17811bc434a18c78acb7a1ca7c483b3a316f38"}, + {file = "tavily_python-0.3.5-py3-none-any.whl", hash = "sha256:5ee7a7a5eb06674dfb68606608b5c6feea411755e0ea4ec14aa6c4a2cbcaa696"}, + {file = "tavily_python-0.3.5.tar.gz", hash = "sha256:415db6e98d9f03f0879c33bd3d0d3d766f2c1333fc7559fdce82cc8514a7127b"}, ] [package.dependencies] @@ -9535,5 +9514,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "4f318e2f8561fac7ad1f4aa603e5264accf857d6e7e1bfd9fcf770279f6a2e9a" - +content-hash = "963b9ff228f2478505802a0e2915d2802e9d92396e33cde549f33bdb07f2a30d"