From 514f3b352a24386fe34721cbe26cef5b226d7c1d Mon Sep 17 00:00:00 2001 From: Murtaza Date: Mon, 22 May 2023 00:42:41 +0530 Subject: [PATCH] Add support for jupyter notebooks. --- backend/api.py | 2 ++ backend/parsers/notebook.py | 7 +++++++ 2 files changed, 9 insertions(+) create mode 100644 backend/parsers/notebook.py diff --git a/backend/api.py b/backend/api.py index 0c2dbadff..04464bbae 100644 --- a/backend/api.py +++ b/backend/api.py @@ -19,6 +19,7 @@ from parsers.txt import process_txt from parsers.csv import process_csv from parsers.docx import process_docx from parsers.pdf import process_pdf +from parsers.notebook import process_ipnyb from parsers.markdown import process_markdown from parsers.powerpoint import process_powerpoint from parsers.html import process_html @@ -91,6 +92,7 @@ file_processors = { ".pptx": process_powerpoint, ".docx": process_docx, ".epub": process_epub, + ".ipynb": process_ipnyb, } async def filter_file(file: UploadFile, supabase, vector_store, stats_db): diff --git a/backend/parsers/notebook.py b/backend/parsers/notebook.py new file mode 100644 index 000000000..cf05811f5 --- /dev/null +++ b/backend/parsers/notebook.py @@ -0,0 +1,7 @@ +from .common import process_file +from langchain.document_loaders import NotebookLoader +from fastapi import UploadFile + + +def process_ipnyb(vector_store, file: UploadFile, stats_db): + return process_file(vector_store, file, NotebookLoader, "ipynb", stats_db=stats_db)