chore: update Dockerfile dependencies and copy files (#3277)

# Description

Please include a summary of the changes and the related issue. Please
also include relevant motivation and context.

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):
This commit is contained in:
Stan Girard 2024-09-30 12:26:50 +02:00 committed by GitHub
parent 11f5448008
commit f334dc0f49
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 34 additions and 7 deletions

View File

@ -3,22 +3,28 @@ FROM python:3.11.6-slim-bullseye
WORKDIR /app WORKDIR /app
# Install runtime dependencies # Install runtime dependencies
RUN apt-get update && apt-get install -y \ RUN apt-get clean && apt-get update && apt-get install -y \
libgeos-dev \ libgeos-dev \
libcurl4-openssl-dev \ libcurl4-openssl-dev \
libssl-dev \ libssl-dev \
binutils \ binutils \
pandoc \
curl \ curl \
git \ git \
autoconf \
automake \
build-essential \
libtool \
python-dev \
build-essential \
# Additional dependencies for document handling
libmagic-dev \
poppler-utils \ poppler-utils \
tesseract-ocr \ tesseract-ocr \
libmagic-dev \
libreoffice \ libreoffice \
libpq-dev \ libpq-dev \
gcc \ gcc \
wget \ pandoc && \
&& rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/* && apt-get clean
# Install Supabase CLI # Install Supabase CLI
RUN ARCHITECTURE=$(uname -m) && \ RUN ARCHITECTURE=$(uname -m) && \
@ -32,7 +38,15 @@ RUN ARCHITECTURE=$(uname -m) && \
rm supabase_1.163.6_linux_arm64.deb; \ rm supabase_1.163.6_linux_arm64.deb; \
fi fi
COPY . . COPY requirements.lock pyproject.toml README.md ./
COPY api/pyproject.toml api/README.md ./api/
COPY api/quivr_api/__init__.py ./api/quivr_api/__init__.py
COPY core/pyproject.toml core/README.md ./core/
COPY core/quivr_core/__init__.py ./core/quivr_core/__init__.py
COPY worker/pyproject.toml worker/README.md ./worker/
COPY worker/quivr_worker/__init__.py ./worker/quivr_worker/__init__.py
COPY core/MegaParse/pyproject.toml core/MegaParse/README.md ./core/MegaParse/
COPY core/MegaParse/megaparse/__init__.py ./core/MegaParse/megaparse/__init__.py
RUN PYTHONDONTWRITEBYTECODE=1 pip install --no-cache-dir -r requirements.lock RUN PYTHONDONTWRITEBYTECODE=1 pip install --no-cache-dir -r requirements.lock
@ -43,4 +57,5 @@ RUN playwright install --with-deps && \
ENV PYTHONPATH=/app ENV PYTHONPATH=/app
COPY . .
EXPOSE 5050 EXPOSE 5050

View File

@ -6,6 +6,7 @@ from abc import ABC, abstractmethod
from datetime import datetime from datetime import datetime
from io import BytesIO from io import BytesIO
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional, Union
from urllib.parse import urlparse
import dropbox import dropbox
import markdownify import markdownify
@ -25,7 +26,18 @@ from quivr_api.modules.sync.service.sync_notion import SyncNotionService
from quivr_api.modules.sync.utils.normalize import remove_special_characters from quivr_api.modules.sync.utils.normalize import remove_special_characters
logger = get_logger(__name__) logger = get_logger(__name__)
redis_client = redis.Redis(host="redis", port=int(os.getenv("REDIS_PORT", 6379)), db=0)
# Parse the CELERY_BROKER_URL
broker_url = os.getenv("CELERY_BROKER_URL", "redis://redis:6379/0")
parsed_url = urlparse(broker_url)
# Create the Redis client using the parsed URL
redis_client = redis.Redis(
host=parsed_url.hostname,
port=parsed_url.port,
password=parsed_url.password,
db=int(parsed_url.path.lstrip('/'))
)
class BaseSync(ABC): class BaseSync(ABC):