mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-15 09:32:22 +03:00
ef90e8e672
# Description
Major PR which, among other things, introduces the possibility of easily
customizing the retrieval workflows. Workflows are based on LangGraph,
and can be customized using a [yaml configuration
file](core/tests/test_llm_endpoint.py), and adding the implementation of
the nodes logic into
[quivr_rag_langgraph.py](1a0c98437a/backend/core/quivr_core/quivr_rag_langgraph.py
)
This is a first, simple implementation that will significantly evolve in
the coming weeks to enable more complex workflows (for instance, with
conditional nodes). We also plan to adopt a similar approach for the
ingestion part, i.e. to enable user to easily customize the ingestion
pipeline.
Closes CORE-195, CORE-203, CORE-204
## Checklist before requesting a review
Please delete options that are not relevant.
- [X] My code follows the style guidelines of this project
- [X] I have performed a self-review of my code
- [X] I have commented hard-to-understand areas
- [X] I have ideally added tests that prove my fix is effective or that
my feature works
- [X] New and existing unit tests pass locally with my changes
- [X] Any dependent changes have been merged
## Screenshots (if appropriate):
595 lines
14 KiB
Plaintext
595 lines
14 KiB
Plaintext
# generated by rye
|
|
# use `rye lock` or `rye sync` to update this lockfile
|
|
#
|
|
# last locked with the following flags:
|
|
# pre: false
|
|
# features: []
|
|
# all-features: true
|
|
# with-sources: false
|
|
# generate-hashes: false
|
|
# universal: true
|
|
|
|
-e file:.
|
|
aiohappyeyeballs==2.4.0
|
|
# via aiohttp
|
|
aiohttp==3.10.5
|
|
# via langchain
|
|
# via langchain-community
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
aiosignal==1.3.1
|
|
# via aiohttp
|
|
annotated-types==0.7.0
|
|
# via pydantic
|
|
antlr4-python3-runtime==4.9.3
|
|
# via omegaconf
|
|
anyio==4.4.0
|
|
# via httpx
|
|
# via openai
|
|
attrs==24.2.0
|
|
# via aiohttp
|
|
backoff==2.2.1
|
|
# via unstructured
|
|
beautifulsoup4==4.12.3
|
|
# via llama-index-readers-file
|
|
# via unstructured
|
|
cachetools==5.5.0
|
|
# via google-auth
|
|
certifi==2024.7.4
|
|
# via httpcore
|
|
# via httpx
|
|
# via requests
|
|
# via unstructured-client
|
|
cffi==1.17.0 ; platform_python_implementation != 'PyPy'
|
|
# via cryptography
|
|
chardet==5.2.0
|
|
# via unstructured
|
|
charset-normalizer==3.3.2
|
|
# via pdfminer-six
|
|
# via requests
|
|
# via unstructured-client
|
|
click==8.1.7
|
|
# via nltk
|
|
cobble==0.1.4
|
|
# via mammoth
|
|
colorama==0.4.6 ; platform_system == 'Windows'
|
|
# via click
|
|
# via tqdm
|
|
coloredlogs==15.0.1
|
|
# via onnxruntime
|
|
contourpy==1.2.1
|
|
# via matplotlib
|
|
cryptography==43.0.0
|
|
# via pdfminer-six
|
|
cycler==0.12.1
|
|
# via matplotlib
|
|
dataclasses-json==0.6.7
|
|
# via langchain-community
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via unstructured
|
|
# via unstructured-client
|
|
deepdiff==7.0.1
|
|
# via unstructured-client
|
|
deprecated==1.2.14
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via pikepdf
|
|
dirtyjson==1.0.8
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
distro==1.9.0
|
|
# via openai
|
|
effdet==0.4.1
|
|
# via unstructured
|
|
emoji==2.12.1
|
|
# via unstructured
|
|
filelock==3.15.4
|
|
# via huggingface-hub
|
|
# via torch
|
|
# via transformers
|
|
# via triton
|
|
filetype==1.2.0
|
|
# via unstructured
|
|
fire==0.6.0
|
|
# via pdf2docx
|
|
flatbuffers==24.3.25
|
|
# via onnxruntime
|
|
fonttools==4.53.1
|
|
# via matplotlib
|
|
# via pdf2docx
|
|
frozenlist==1.4.1
|
|
# via aiohttp
|
|
# via aiosignal
|
|
fsspec==2024.6.1
|
|
# via huggingface-hub
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via torch
|
|
google-api-core==2.19.1
|
|
# via google-cloud-vision
|
|
google-auth==2.34.0
|
|
# via google-api-core
|
|
# via google-cloud-vision
|
|
google-cloud-vision==3.7.4
|
|
# via unstructured
|
|
googleapis-common-protos==1.63.2
|
|
# via google-api-core
|
|
# via grpcio-status
|
|
greenlet==3.0.3
|
|
# via sqlalchemy
|
|
grpcio==1.65.5
|
|
# via google-api-core
|
|
# via grpcio-status
|
|
grpcio-status==1.65.5
|
|
# via google-api-core
|
|
h11==0.14.0
|
|
# via httpcore
|
|
httpcore==1.0.5
|
|
# via httpx
|
|
httpx==0.27.0
|
|
# via llama-cloud
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via openai
|
|
# via unstructured-client
|
|
huggingface-hub==0.24.6
|
|
# via timm
|
|
# via tokenizers
|
|
# via transformers
|
|
# via unstructured-inference
|
|
humanfriendly==10.0
|
|
# via coloredlogs
|
|
idna==3.7
|
|
# via anyio
|
|
# via httpx
|
|
# via requests
|
|
# via unstructured-client
|
|
# via yarl
|
|
iopath==0.1.10
|
|
# via layoutparser
|
|
jinja2==3.1.4
|
|
# via torch
|
|
jiter==0.5.0
|
|
# via openai
|
|
joblib==1.4.2
|
|
# via nltk
|
|
jsonpatch==1.33
|
|
# via langchain-core
|
|
jsonpath-python==1.0.6
|
|
# via unstructured-client
|
|
jsonpointer==3.0.0
|
|
# via jsonpatch
|
|
kiwisolver==1.4.5
|
|
# via matplotlib
|
|
langchain==0.2.14
|
|
# via langchain-community
|
|
# via megaparse
|
|
langchain-community==0.2.12
|
|
# via megaparse
|
|
langchain-core==0.2.33
|
|
# via langchain
|
|
# via langchain-community
|
|
# via langchain-openai
|
|
# via langchain-text-splitters
|
|
# via megaparse
|
|
langchain-openai==0.1.22
|
|
# via megaparse
|
|
langchain-text-splitters==0.2.2
|
|
# via langchain
|
|
langdetect==1.0.9
|
|
# via unstructured
|
|
langsmith==0.1.99
|
|
# via langchain
|
|
# via langchain-community
|
|
# via langchain-core
|
|
layoutparser==0.3.4
|
|
# via unstructured-inference
|
|
llama-cloud==0.0.13
|
|
# via llama-index-indices-managed-llama-cloud
|
|
llama-index==0.10.67.post1
|
|
# via megaparse
|
|
llama-index-agent-openai==0.2.9
|
|
# via llama-index
|
|
# via llama-index-program-openai
|
|
llama-index-cli==0.1.13
|
|
# via llama-index
|
|
llama-index-core==0.10.67
|
|
# via llama-index
|
|
# via llama-index-agent-openai
|
|
# via llama-index-cli
|
|
# via llama-index-embeddings-openai
|
|
# via llama-index-indices-managed-llama-cloud
|
|
# via llama-index-llms-openai
|
|
# via llama-index-multi-modal-llms-openai
|
|
# via llama-index-program-openai
|
|
# via llama-index-question-gen-openai
|
|
# via llama-index-readers-file
|
|
# via llama-index-readers-llama-parse
|
|
# via llama-parse
|
|
llama-index-embeddings-openai==0.1.11
|
|
# via llama-index
|
|
# via llama-index-cli
|
|
llama-index-indices-managed-llama-cloud==0.2.7
|
|
# via llama-index
|
|
llama-index-legacy==0.9.48.post3
|
|
# via llama-index
|
|
llama-index-llms-openai==0.1.29
|
|
# via llama-index
|
|
# via llama-index-agent-openai
|
|
# via llama-index-cli
|
|
# via llama-index-multi-modal-llms-openai
|
|
# via llama-index-program-openai
|
|
# via llama-index-question-gen-openai
|
|
llama-index-multi-modal-llms-openai==0.1.9
|
|
# via llama-index
|
|
llama-index-program-openai==0.1.7
|
|
# via llama-index
|
|
# via llama-index-question-gen-openai
|
|
llama-index-question-gen-openai==0.1.3
|
|
# via llama-index
|
|
llama-index-readers-file==0.1.33
|
|
# via llama-index
|
|
llama-index-readers-llama-parse==0.1.6
|
|
# via llama-index
|
|
llama-parse==0.4.9
|
|
# via llama-index-readers-llama-parse
|
|
# via megaparse
|
|
lxml==5.3.0
|
|
# via pikepdf
|
|
# via python-docx
|
|
# via python-pptx
|
|
# via unstructured
|
|
mammoth==1.8.0
|
|
# via megaparse
|
|
markupsafe==2.1.5
|
|
# via jinja2
|
|
marshmallow==3.21.3
|
|
# via dataclasses-json
|
|
# via unstructured-client
|
|
matplotlib==3.9.2
|
|
# via pycocotools
|
|
# via unstructured-inference
|
|
mpmath==1.3.0
|
|
# via sympy
|
|
multidict==6.0.5
|
|
# via aiohttp
|
|
# via yarl
|
|
mypy-extensions==1.0.0
|
|
# via typing-inspect
|
|
# via unstructured-client
|
|
nest-asyncio==1.6.0
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via unstructured-client
|
|
networkx==3.3
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via torch
|
|
nltk==3.9.1
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via unstructured
|
|
numpy==1.26.4
|
|
# via contourpy
|
|
# via langchain
|
|
# via langchain-community
|
|
# via layoutparser
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via matplotlib
|
|
# via onnx
|
|
# via onnxruntime
|
|
# via opencv-python
|
|
# via opencv-python-headless
|
|
# via pandas
|
|
# via pdf2docx
|
|
# via pycocotools
|
|
# via scipy
|
|
# via torchvision
|
|
# via transformers
|
|
# via unstructured
|
|
nvidia-cublas-cu12==12.1.3.1 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via nvidia-cudnn-cu12
|
|
# via nvidia-cusolver-cu12
|
|
# via torch
|
|
nvidia-cuda-cupti-cu12==12.1.105 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via torch
|
|
nvidia-cuda-nvrtc-cu12==12.1.105 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via torch
|
|
nvidia-cuda-runtime-cu12==12.1.105 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via torch
|
|
nvidia-cudnn-cu12==9.1.0.70 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via torch
|
|
nvidia-cufft-cu12==11.0.2.54 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via torch
|
|
nvidia-curand-cu12==10.3.2.106 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via torch
|
|
nvidia-cusolver-cu12==11.4.5.107 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via torch
|
|
nvidia-cusparse-cu12==12.1.0.106 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via nvidia-cusolver-cu12
|
|
# via torch
|
|
nvidia-nccl-cu12==2.20.5 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via torch
|
|
nvidia-nvjitlink-cu12==12.6.20 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via nvidia-cusolver-cu12
|
|
# via nvidia-cusparse-cu12
|
|
nvidia-nvtx-cu12==12.1.105 ; platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via torch
|
|
omegaconf==2.3.0
|
|
# via effdet
|
|
onnx==1.16.2
|
|
# via unstructured
|
|
# via unstructured-inference
|
|
onnxruntime==1.19.0
|
|
# via unstructured-inference
|
|
openai==1.41.1
|
|
# via langchain-openai
|
|
# via llama-index-agent-openai
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via llama-index-llms-openai
|
|
opencv-python==4.10.0.84
|
|
# via layoutparser
|
|
# via unstructured-inference
|
|
opencv-python-headless==4.10.0.84
|
|
# via pdf2docx
|
|
ordered-set==4.1.0
|
|
# via deepdiff
|
|
orjson==3.10.7
|
|
# via langsmith
|
|
packaging==24.1
|
|
# via huggingface-hub
|
|
# via langchain-core
|
|
# via marshmallow
|
|
# via matplotlib
|
|
# via onnxruntime
|
|
# via pikepdf
|
|
# via transformers
|
|
# via unstructured-client
|
|
# via unstructured-pytesseract
|
|
pandas==2.2.2
|
|
# via layoutparser
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
pdf2docx==0.5.8
|
|
# via megaparse
|
|
pdf2image==1.17.0
|
|
# via layoutparser
|
|
# via unstructured
|
|
pdfminer-six==20231228
|
|
# via pdfplumber
|
|
# via unstructured
|
|
pdfplumber==0.11.4
|
|
# via layoutparser
|
|
# via megaparse
|
|
pikepdf==9.1.1
|
|
# via unstructured
|
|
pillow==10.4.0
|
|
# via layoutparser
|
|
# via llama-index-core
|
|
# via matplotlib
|
|
# via pdf2image
|
|
# via pdfplumber
|
|
# via pikepdf
|
|
# via pillow-heif
|
|
# via python-pptx
|
|
# via torchvision
|
|
# via unstructured-pytesseract
|
|
pillow-heif==0.18.0
|
|
# via unstructured
|
|
portalocker==2.10.1
|
|
# via iopath
|
|
proto-plus==1.24.0
|
|
# via google-api-core
|
|
# via google-cloud-vision
|
|
protobuf==5.27.3
|
|
# via google-api-core
|
|
# via google-cloud-vision
|
|
# via googleapis-common-protos
|
|
# via grpcio-status
|
|
# via onnx
|
|
# via onnxruntime
|
|
# via proto-plus
|
|
psutil==6.0.0
|
|
# via unstructured
|
|
pyasn1==0.6.0
|
|
# via pyasn1-modules
|
|
# via rsa
|
|
pyasn1-modules==0.4.0
|
|
# via google-auth
|
|
pycocotools==2.0.8
|
|
# via effdet
|
|
pycparser==2.22 ; platform_python_implementation != 'PyPy'
|
|
# via cffi
|
|
pycryptodome==3.20.0
|
|
# via megaparse
|
|
pydantic==2.8.2
|
|
# via langchain
|
|
# via langchain-core
|
|
# via langsmith
|
|
# via llama-cloud
|
|
# via openai
|
|
pydantic-core==2.20.1
|
|
# via pydantic
|
|
pymupdf==1.24.9
|
|
# via pdf2docx
|
|
pymupdfb==1.24.9
|
|
# via pymupdf
|
|
pyparsing==3.1.2
|
|
# via matplotlib
|
|
pypdf==4.3.1
|
|
# via llama-index-readers-file
|
|
# via unstructured
|
|
# via unstructured-client
|
|
pypdfium2==4.30.0
|
|
# via pdfplumber
|
|
pyreadline3==3.4.1 ; sys_platform == 'win32'
|
|
# via humanfriendly
|
|
python-dateutil==2.9.0.post0
|
|
# via matplotlib
|
|
# via pandas
|
|
# via unstructured-client
|
|
python-docx==1.1.2
|
|
# via megaparse
|
|
# via pdf2docx
|
|
python-dotenv==1.0.1
|
|
# via megaparse
|
|
python-iso639==2024.4.27
|
|
# via unstructured
|
|
python-magic==0.4.27
|
|
# via unstructured
|
|
python-multipart==0.0.9
|
|
# via unstructured-inference
|
|
python-pptx==1.0.2
|
|
# via megaparse
|
|
pytz==2024.1
|
|
# via pandas
|
|
pywin32==306 ; platform_system == 'Windows'
|
|
# via portalocker
|
|
pyyaml==6.0.2
|
|
# via huggingface-hub
|
|
# via langchain
|
|
# via langchain-community
|
|
# via langchain-core
|
|
# via layoutparser
|
|
# via llama-index-core
|
|
# via omegaconf
|
|
# via timm
|
|
# via transformers
|
|
rapidfuzz==3.9.6
|
|
# via unstructured
|
|
# via unstructured-inference
|
|
regex==2024.7.24
|
|
# via nltk
|
|
# via tiktoken
|
|
# via transformers
|
|
requests==2.32.3
|
|
# via google-api-core
|
|
# via huggingface-hub
|
|
# via langchain
|
|
# via langchain-community
|
|
# via langsmith
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via requests-toolbelt
|
|
# via tiktoken
|
|
# via transformers
|
|
# via unstructured
|
|
# via unstructured-client
|
|
requests-toolbelt==1.0.0
|
|
# via unstructured-client
|
|
rsa==4.9
|
|
# via google-auth
|
|
safetensors==0.4.4
|
|
# via timm
|
|
# via transformers
|
|
scipy==1.14.0
|
|
# via layoutparser
|
|
setuptools==73.0.0
|
|
# via torch
|
|
six==1.16.0
|
|
# via fire
|
|
# via langdetect
|
|
# via python-dateutil
|
|
# via unstructured-client
|
|
sniffio==1.3.1
|
|
# via anyio
|
|
# via httpx
|
|
# via openai
|
|
soupsieve==2.6
|
|
# via beautifulsoup4
|
|
sqlalchemy==2.0.32
|
|
# via langchain
|
|
# via langchain-community
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
striprtf==0.0.26
|
|
# via llama-index-readers-file
|
|
sympy==1.13.2
|
|
# via onnxruntime
|
|
# via torch
|
|
tabulate==0.9.0
|
|
# via unstructured
|
|
tenacity==8.5.0
|
|
# via langchain
|
|
# via langchain-community
|
|
# via langchain-core
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
termcolor==2.4.0
|
|
# via fire
|
|
tiktoken==0.7.0
|
|
# via langchain-openai
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
timm==1.0.8
|
|
# via effdet
|
|
# via unstructured-inference
|
|
tokenizers==0.19.1
|
|
# via transformers
|
|
torch==2.4.0
|
|
# via effdet
|
|
# via timm
|
|
# via torchvision
|
|
# via unstructured-inference
|
|
torchvision==0.19.0
|
|
# via effdet
|
|
# via timm
|
|
tqdm==4.66.5
|
|
# via huggingface-hub
|
|
# via iopath
|
|
# via llama-index-core
|
|
# via nltk
|
|
# via openai
|
|
# via transformers
|
|
# via unstructured
|
|
transformers==4.44.0
|
|
# via unstructured-inference
|
|
triton==3.0.0 ; python_version < '3.13' and platform_machine == 'x86_64' and platform_system == 'Linux'
|
|
# via torch
|
|
typing-extensions==4.12.2
|
|
# via emoji
|
|
# via huggingface-hub
|
|
# via iopath
|
|
# via langchain-core
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via openai
|
|
# via pydantic
|
|
# via pydantic-core
|
|
# via python-docx
|
|
# via python-pptx
|
|
# via sqlalchemy
|
|
# via torch
|
|
# via typing-inspect
|
|
# via unstructured
|
|
# via unstructured-client
|
|
typing-inspect==0.9.0
|
|
# via dataclasses-json
|
|
# via llama-index-core
|
|
# via llama-index-legacy
|
|
# via unstructured-client
|
|
tzdata==2024.1
|
|
# via pandas
|
|
unstructured==0.15.5
|
|
# via megaparse
|
|
unstructured-client==0.25.5
|
|
# via unstructured
|
|
unstructured-inference==0.7.36
|
|
# via unstructured
|
|
unstructured-pytesseract==0.3.13
|
|
# via unstructured
|
|
urllib3==2.2.2
|
|
# via requests
|
|
# via unstructured-client
|
|
wrapt==1.16.0
|
|
# via deprecated
|
|
# via llama-index-core
|
|
# via unstructured
|
|
xlsxwriter==3.2.0
|
|
# via python-pptx
|
|
yarl==1.9.4
|
|
# via aiohttp
|