feat(loaders): added powerpoint and docs

This commit is contained in:
Stan Girard 2023-05-16 16:24:06 +02:00
parent 29eac401b5
commit 6959cd3aba
3 changed files with 14 additions and 0 deletions

View File

@ -9,6 +9,8 @@ from loaders.html import process_html
from utils import compute_sha1_from_content from utils import compute_sha1_from_content
from loaders.pdf import process_pdf from loaders.pdf import process_pdf
from loaders.html import get_html, create_html_file, delete_tempfile from loaders.html import get_html, create_html_file, delete_tempfile
from loaders.powerpoint import process_powerpoint
from loaders.docx import process_docx
import requests import requests
import re import re
import unicodedata import unicodedata
@ -28,6 +30,8 @@ file_processors = {
".mpeg": process_audio, ".mpeg": process_audio,
".pdf": process_pdf, ".pdf": process_pdf,
".html": process_html, ".html": process_html,
".pptx": process_powerpoint,
".docx": process_docx
} }
def file_uploader(supabase, openai_key, vector_store): def file_uploader(supabase, openai_key, vector_store):

5
loaders/docx.py Normal file
View File

@ -0,0 +1,5 @@
from .common import process_file
from langchain.document_loaders import Docx2txtLoader
def process_docx(vector_store, file):
return process_file(vector_store, file, Docx2txtLoader, ".docx")

5
loaders/powerpoint.py Normal file
View File

@ -0,0 +1,5 @@
from .common import process_file
from langchain.document_loaders import UnstructuredPowerPointLoader
def process_powerpoint(vector_store, file):
return process_file(vector_store, file, UnstructuredPowerPointLoader, ".pptx")