quivr/backend/Dockerfile.dev
Stan Girard e33d497598
feat(crawler): Add Playwright for web crawling (#2562)
This pull request adds the Playwright library for web crawling. It
includes the necessary dependencies and updates the code to use
Playwright for crawling websites.
2024-05-08 07:20:35 -07:00

54 lines
1.3 KiB
Docker

# Using a slim version for a smaller base image
FROM python:3.11.6-slim-bullseye@sha256:0c1fbb294096d842ad795ee232d783cab436c90b034210fe894f2bb2f2be7626
ARG DEV_MODE
ENV DEV_MODE=$DEV_MODE
# Install GEOS library, Rust, and other dependencies, then clean up
RUN apt-get clean && apt-get update && apt-get install -y \
libgeos-dev \
libcurl4-openssl-dev \
libssl-dev \
binutils \
pandoc \
curl \
git \
poppler-utils \
tesseract-ocr \
autoconf \
automake \
build-essential \
libtool \
python-dev \
build-essential \
# Additional dependencies for document handling
libmagic-dev \
poppler-utils \
tesseract-ocr \
libreoffice \
libpq-dev \
gcc \
pandoc && \
rm -rf /var/lib/apt/lists/* && apt-get clean
# Add Rust binaries to the PATH
ENV PATH="/root/.cargo/bin:${PATH}"
# Copy just the requirements first
COPY ./requirements.txt .
# Upgrade pip
RUN pip install --upgrade pip
# Increase timeout to wait for the new installation
RUN pip install --no-cache-dir -r requirements.txt --timeout 200 && \
playwright install --with-deps
WORKDIR /code
# Copy the rest of the application
COPY . .
EXPOSE 5050
CMD ["uvicorn", "main:app","--reload", "--host", "0.0.0.0", "--port", "5050", "--workers", "6"]