From bc7e84b1f97cc38fb0cc5bfb817164965414688d Mon Sep 17 00:00:00 2001 From: Stan Girard Date: Sat, 13 May 2023 00:25:12 +0200 Subject: [PATCH] feat(pdf): added pdf loader --- files.py | 5 +++++ loaders/pdf.py | 6 ++++++ main.py | 2 ++ 3 files changed, 13 insertions(+) create mode 100644 loaders/pdf.py diff --git a/files.py b/files.py index 9018d40b2..c3e22ca1c 100644 --- a/files.py +++ b/files.py @@ -5,12 +5,14 @@ from loaders.txt import process_txt from loaders.csv import process_csv from loaders.markdown import process_markdown from utils import compute_sha1_from_content +from loaders.pdf import process_pdf def file_uploader(supabase, openai_key, vector_store): file_processors = { ".txt": process_txt, ".csv": process_csv, ".md": process_markdown, + ".markdown": process_markdown, ".m4a": process_audio, ".mp3": process_audio, ".webm": process_audio, @@ -18,6 +20,7 @@ def file_uploader(supabase, openai_key, vector_store): ".mpga": process_audio, ".wav": process_audio, ".mpeg": process_audio, + ".pdf": process_pdf, } files = st.file_uploader("Upload a file", accept_multiple_files=True, type=list(file_processors.keys())) @@ -26,6 +29,8 @@ def file_uploader(supabase, openai_key, vector_store): for file in files: if file_already_exists(supabase, file): st.write(f"😎 {file.name} is already in the database.") + elif file.size < 1: + st.write(f"💨 {file.name} is empty.") else: file_extension = os.path.splitext(file.name)[-1] if file_extension in file_processors: diff --git a/loaders/pdf.py b/loaders/pdf.py new file mode 100644 index 000000000..846168a2e --- /dev/null +++ b/loaders/pdf.py @@ -0,0 +1,6 @@ +from .common import process_file +from langchain.document_loaders import PyPDFLoader + + +def process_pdf(vector_store, file): + return process_file(vector_store, file, PyPDFLoader, ".pdf") diff --git a/main.py b/main.py index 1cfc0a37a..d134858b8 100644 --- a/main.py +++ b/main.py @@ -43,6 +43,8 @@ if 'chunk_overlap' not in st.session_state: # Create a radio button for user to choose between adding knowledge or asking a question user_choice = st.radio("Choose an action", ('Add Knowledge to the Brain', 'Ask a Question to the Brain')) +st.markdown("---\n\n") + if user_choice == 'Add Knowledge to the Brain': # Display chunk size and overlap selection only when adding knowledge st.sidebar.title("Configuration")