From e0cf37791b07e17302acaa4bfc0d28ead32e018f Mon Sep 17 00:00:00 2001 From: Stan Girard Date: Tue, 6 Jun 2023 11:18:33 +0200 Subject: [PATCH] feat(pdf): added new pdf miner that works --- backend/parsers/pdf.py | 4 ++-- backend/requirements.txt | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/parsers/pdf.py b/backend/parsers/pdf.py index 1415a8d2b..c1614142e 100644 --- a/backend/parsers/pdf.py +++ b/backend/parsers/pdf.py @@ -1,8 +1,8 @@ from fastapi import UploadFile -from langchain.document_loaders import PyPDFLoader +from langchain.document_loaders import PyMuPDFLoader from .common import process_file def process_pdf(file: UploadFile, enable_summarization, user): - return process_file(file, PyPDFLoader, ".pdf", enable_summarization, user) + return process_file(file, PyMuPDFLoader, ".pdf", enable_summarization, user) diff --git a/backend/requirements.txt b/backend/requirements.txt index 19fb0f577..b3cf685c1 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,3 +1,4 @@ +pymupdf==1.22.3 langchain==0.0.187 Markdown==3.4.3 openai==0.27.6 @@ -16,4 +17,4 @@ pypandoc==1.11 docx2txt==0.8 guidance==0.0.53 python-jose==3.3.0 -google_cloud_aiplatform==1.25.0 \ No newline at end of file +google_cloud_aiplatform==1.25.0