mirror of
https://github.com/QuivrHQ/quivr.git
synced 2025-01-05 23:03:53 +03:00
ef90e8e672
# Description
Major PR which, among other things, introduces the possibility of easily
customizing the retrieval workflows. Workflows are based on LangGraph,
and can be customized using a [yaml configuration
file](core/tests/test_llm_endpoint.py), and adding the implementation of
the nodes logic into
[quivr_rag_langgraph.py](1a0c98437a/backend/core/quivr_core/quivr_rag_langgraph.py
)
This is a first, simple implementation that will significantly evolve in
the coming weeks to enable more complex workflows (for instance, with
conditional nodes). We also plan to adopt a similar approach for the
ingestion part, i.e. to enable user to easily customize the ingestion
pipeline.
Closes CORE-195, CORE-203, CORE-204
## Checklist before requesting a review
Please delete options that are not relevant.
- [X] My code follows the style guidelines of this project
- [X] I have performed a self-review of my code
- [X] I have commented hard-to-understand areas
- [X] I have ideally added tests that prove my fix is effective or that
my feature works
- [X] New and existing unit tests pass locally with my changes
- [X] Any dependent changes have been merged
## Screenshots (if appropriate):
48 lines
1.5 KiB
Docker
48 lines
1.5 KiB
Docker
# Using a slim version for a smaller base image
|
|
FROM python:3.11.6-slim-bullseye
|
|
|
|
WORKDIR /app
|
|
# Install GEOS library, Rust, and other dependencies, then clean up
|
|
RUN apt-get clean && apt-get update && apt-get install -y \
|
|
libgeos-dev \
|
|
libcurl4-openssl-dev \
|
|
libssl-dev \
|
|
binutils \
|
|
curl \
|
|
git \
|
|
autoconf \
|
|
automake \
|
|
build-essential \
|
|
libtool \
|
|
python-dev \
|
|
build-essential \
|
|
# Additional dependencies for document handling
|
|
libmagic-dev \
|
|
poppler-utils \
|
|
tesseract-ocr \
|
|
libreoffice \
|
|
libpq-dev \
|
|
gcc \
|
|
pandoc && \
|
|
rm -rf /var/lib/apt/lists/* && apt-get clean
|
|
|
|
COPY requirements.lock pyproject.toml README.md ./
|
|
COPY api/pyproject.toml api/README.md ./api/
|
|
COPY api/quivr_api/__init__.py ./api/quivr_api/__init__.py
|
|
COPY core/pyproject.toml core/README.md ./core/
|
|
COPY core/quivr_core/__init__.py ./core/quivr_core/__init__.py
|
|
COPY worker/pyproject.toml worker/README.md ./worker/
|
|
COPY worker/quivr_worker/__init__.py ./worker/quivr_worker/__init__.py
|
|
COPY core/MegaParse/pyproject.toml core/MegaParse/README.md ./core/MegaParse/
|
|
COPY core/MegaParse/megaparse/__init__.py ./core/MegaParse/megaparse/__init__.py
|
|
|
|
RUN PYTHONDONTWRITEBYTECODE=1 pip install --no-cache-dir -r requirements.lock
|
|
|
|
RUN playwright install --with-deps && \
|
|
python -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()" && \
|
|
python -c "import nltk;nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger_eng')"
|
|
|
|
COPY . .
|
|
|
|
EXPOSE 5050
|