feat(loaders): added powerpoint and docs

This commit is contained in:
Stan Girard 2023-05-16 16:24:06 +02:00
parent 29eac401b5
commit 6959cd3aba
3 changed files with 14 additions and 0 deletions

View File

@ -9,6 +9,8 @@ from loaders.html import process_html
from utils import compute_sha1_from_content
from loaders.pdf import process_pdf
from loaders.html import get_html, create_html_file, delete_tempfile
from loaders.powerpoint import process_powerpoint
from loaders.docx import process_docx
import requests
import re
import unicodedata
@ -28,6 +30,8 @@ file_processors = {
".mpeg": process_audio,
".pdf": process_pdf,
".html": process_html,
".pptx": process_powerpoint,
".docx": process_docx
}
def file_uploader(supabase, openai_key, vector_store):

5
loaders/docx.py Normal file
View File

@ -0,0 +1,5 @@
from .common import process_file
from langchain.document_loaders import Docx2txtLoader
def process_docx(vector_store, file):
return process_file(vector_store, file, Docx2txtLoader, ".docx")

5
loaders/powerpoint.py Normal file
View File

@ -0,0 +1,5 @@
from .common import process_file
from langchain.document_loaders import UnstructuredPowerPointLoader
def process_powerpoint(vector_store, file):
return process_file(vector_store, file, UnstructuredPowerPointLoader, ".pptx")