quivr/backend/api/tests/file_process/test_pdf_process.py
AmineDiro 2e75de4039
feat(backend): quivr-monorepo and quivr-core package (#2765)
# Description

closes #2722.

- Creates `quivr-monorepo` 
- Separates `quivr-core`
- Update dockerfiles and docker-compose

---------

Co-authored-by: aminediro <aminediro@github.com>
2024-06-27 03:51:01 -07:00

34 lines
962 B
Python

import os
from tempfile import NamedTemporaryFile
import pytest
from langchain_community.document_loaders import UnstructuredPDFLoader
from quivr_api.models.files import File
@pytest.fixture
def pdf_file():
file_path = "tests/file_process/dummy.pdf"
file_name = os.path.basename(file_path)
with NamedTemporaryFile(
suffix="_" + file_name, # pyright: ignore reportPrivateUsage=none
) as tmp_file:
with open(file_path, "rb") as f:
content = f.read()
tmp_file.write(content)
tmp_file.flush()
yield File(
file_name="dummy",
tmp_file_path=tmp_file.name,
file_extension="pdf",
bytes_content=content,
file_size=len(content),
)
def test_pdf_process(pdf_file):
pdf_file.compute_documents(UnstructuredPDFLoader)
assert len(pdf_file.documents) > 0
assert pdf_file.documents[0].page_content == "Dummy PDF download"