feat(pdf): added pdf loader

This commit is contained in:
Stan Girard 2023-05-13 00:25:12 +02:00
parent 5d66cd5223
commit bc7e84b1f9
3 changed files with 13 additions and 0 deletions

View File

@ -5,12 +5,14 @@ from loaders.txt import process_txt
from loaders.csv import process_csv
from loaders.markdown import process_markdown
from utils import compute_sha1_from_content
from loaders.pdf import process_pdf
def file_uploader(supabase, openai_key, vector_store):
file_processors = {
".txt": process_txt,
".csv": process_csv,
".md": process_markdown,
".markdown": process_markdown,
".m4a": process_audio,
".mp3": process_audio,
".webm": process_audio,
@ -18,6 +20,7 @@ def file_uploader(supabase, openai_key, vector_store):
".mpga": process_audio,
".wav": process_audio,
".mpeg": process_audio,
".pdf": process_pdf,
}
files = st.file_uploader("Upload a file", accept_multiple_files=True, type=list(file_processors.keys()))
@ -26,6 +29,8 @@ def file_uploader(supabase, openai_key, vector_store):
for file in files:
if file_already_exists(supabase, file):
st.write(f"😎 {file.name} is already in the database.")
elif file.size < 1:
st.write(f"💨 {file.name} is empty.")
else:
file_extension = os.path.splitext(file.name)[-1]
if file_extension in file_processors:

6
loaders/pdf.py Normal file
View File

@ -0,0 +1,6 @@
from .common import process_file
from langchain.document_loaders import PyPDFLoader
def process_pdf(vector_store, file):
return process_file(vector_store, file, PyPDFLoader, ".pdf")

View File

@ -43,6 +43,8 @@ if 'chunk_overlap' not in st.session_state:
# Create a radio button for user to choose between adding knowledge or asking a question
user_choice = st.radio("Choose an action", ('Add Knowledge to the Brain', 'Ask a Question to the Brain'))
st.markdown("---\n\n")
if user_choice == 'Add Knowledge to the Brain':
# Display chunk size and overlap selection only when adding knowledge
st.sidebar.title("Configuration")