feat(pdf): added new pdf miner that works

This commit is contained in:
Stan Girard 2023-06-06 11:18:33 +02:00
parent 963fb05682
commit e0cf37791b
2 changed files with 4 additions and 3 deletions

View File

@ -1,8 +1,8 @@
from fastapi import UploadFile from fastapi import UploadFile
from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import PyMuPDFLoader
from .common import process_file from .common import process_file
def process_pdf(file: UploadFile, enable_summarization, user): def process_pdf(file: UploadFile, enable_summarization, user):
return process_file(file, PyPDFLoader, ".pdf", enable_summarization, user) return process_file(file, PyMuPDFLoader, ".pdf", enable_summarization, user)

View File

@ -1,3 +1,4 @@
pymupdf==1.22.3
langchain==0.0.187 langchain==0.0.187
Markdown==3.4.3 Markdown==3.4.3
openai==0.27.6 openai==0.27.6
@ -16,4 +17,4 @@ pypandoc==1.11
docx2txt==0.8 docx2txt==0.8
guidance==0.0.53 guidance==0.0.53
python-jose==3.3.0 python-jose==3.3.0
google_cloud_aiplatform==1.25.0 google_cloud_aiplatform==1.25.0