mirror of
https://github.com/QuivrHQ/quivr.git
synced 2025-01-07 08:07:44 +03:00
feat(pdf): added pdf loader
This commit is contained in:
parent
5d66cd5223
commit
bc7e84b1f9
5
files.py
5
files.py
@ -5,12 +5,14 @@ from loaders.txt import process_txt
|
|||||||
from loaders.csv import process_csv
|
from loaders.csv import process_csv
|
||||||
from loaders.markdown import process_markdown
|
from loaders.markdown import process_markdown
|
||||||
from utils import compute_sha1_from_content
|
from utils import compute_sha1_from_content
|
||||||
|
from loaders.pdf import process_pdf
|
||||||
|
|
||||||
def file_uploader(supabase, openai_key, vector_store):
|
def file_uploader(supabase, openai_key, vector_store):
|
||||||
file_processors = {
|
file_processors = {
|
||||||
".txt": process_txt,
|
".txt": process_txt,
|
||||||
".csv": process_csv,
|
".csv": process_csv,
|
||||||
".md": process_markdown,
|
".md": process_markdown,
|
||||||
|
".markdown": process_markdown,
|
||||||
".m4a": process_audio,
|
".m4a": process_audio,
|
||||||
".mp3": process_audio,
|
".mp3": process_audio,
|
||||||
".webm": process_audio,
|
".webm": process_audio,
|
||||||
@ -18,6 +20,7 @@ def file_uploader(supabase, openai_key, vector_store):
|
|||||||
".mpga": process_audio,
|
".mpga": process_audio,
|
||||||
".wav": process_audio,
|
".wav": process_audio,
|
||||||
".mpeg": process_audio,
|
".mpeg": process_audio,
|
||||||
|
".pdf": process_pdf,
|
||||||
}
|
}
|
||||||
|
|
||||||
files = st.file_uploader("Upload a file", accept_multiple_files=True, type=list(file_processors.keys()))
|
files = st.file_uploader("Upload a file", accept_multiple_files=True, type=list(file_processors.keys()))
|
||||||
@ -26,6 +29,8 @@ def file_uploader(supabase, openai_key, vector_store):
|
|||||||
for file in files:
|
for file in files:
|
||||||
if file_already_exists(supabase, file):
|
if file_already_exists(supabase, file):
|
||||||
st.write(f"😎 {file.name} is already in the database.")
|
st.write(f"😎 {file.name} is already in the database.")
|
||||||
|
elif file.size < 1:
|
||||||
|
st.write(f"💨 {file.name} is empty.")
|
||||||
else:
|
else:
|
||||||
file_extension = os.path.splitext(file.name)[-1]
|
file_extension = os.path.splitext(file.name)[-1]
|
||||||
if file_extension in file_processors:
|
if file_extension in file_processors:
|
||||||
|
6
loaders/pdf.py
Normal file
6
loaders/pdf.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from .common import process_file
|
||||||
|
from langchain.document_loaders import PyPDFLoader
|
||||||
|
|
||||||
|
|
||||||
|
def process_pdf(vector_store, file):
|
||||||
|
return process_file(vector_store, file, PyPDFLoader, ".pdf")
|
2
main.py
2
main.py
@ -43,6 +43,8 @@ if 'chunk_overlap' not in st.session_state:
|
|||||||
# Create a radio button for user to choose between adding knowledge or asking a question
|
# Create a radio button for user to choose between adding knowledge or asking a question
|
||||||
user_choice = st.radio("Choose an action", ('Add Knowledge to the Brain', 'Ask a Question to the Brain'))
|
user_choice = st.radio("Choose an action", ('Add Knowledge to the Brain', 'Ask a Question to the Brain'))
|
||||||
|
|
||||||
|
st.markdown("---\n\n")
|
||||||
|
|
||||||
if user_choice == 'Add Knowledge to the Brain':
|
if user_choice == 'Add Knowledge to the Brain':
|
||||||
# Display chunk size and overlap selection only when adding knowledge
|
# Display chunk size and overlap selection only when adding knowledge
|
||||||
st.sidebar.title("Configuration")
|
st.sidebar.title("Configuration")
|
||||||
|
Loading…
Reference in New Issue
Block a user