fix: linting errors

This commit is contained in:
mamadoudicko 2023-07-02 15:19:09 +02:00
parent 44c6a2ac93
commit 255826f121
7 changed files with 186 additions and 102 deletions

View File

@ -1,6 +1,6 @@
from uuid import UUID
from logger import get_logger
from logger import get_logger # pyright: ignore reportPrivateUsage=none
from models.settings import common_dependencies
from pydantic import BaseModel
@ -10,16 +10,16 @@ logger = get_logger(__name__)
class User(BaseModel):
id: UUID
email: str
user_openai_api_key: str = None
user_openai_api_key: str = None # pyright: ignore reportPrivateUsage=none
requests_count: int = 0
# [TODO] Rename the user table and its references to 'user_usage'
def create_user(self, date):
def create_user(self, date): # pyright: ignore reportPrivateUsage=none
commons = common_dependencies()
logger.info(f"New user entry in db document for user {self.email}")
return (
commons.supabase.table("users")
commons.supabase.table("users") # pyright: ignore reportPrivateUsage=none
.insert(
{
"user_id": self.id,
@ -31,36 +31,50 @@ class User(BaseModel):
.execute()
)
def get_user_request_stats(self):
def get_user_request_stats(self): # pyright: ignore reportPrivateUsage=none
commons = common_dependencies()
requests_stats = (
commons["supabase"]
requests_stats = ( # pyright: ignore reportPrivateUsage=none
commons["supabase"] # pyright: ignore reportPrivateUsage=none
.from_("users")
.select("*")
.filter("user_id", "eq", self.id)
.execute()
)
return requests_stats.data
return requests_stats.data # pyright: ignore reportPrivateUsage=none
def fetch_user_requests_count(self, date):
def fetch_user_requests_count(
self, date # pyright: ignore reportPrivateUsage=none
): # pyright: ignore reportPrivateUsage=none
commons = common_dependencies()
response = (
commons["supabase"]
response = ( # pyright: ignore reportPrivateUsage=none
commons["supabase"] # pyright: ignore reportPrivateUsage=none
.from_("users")
.select("*")
.filter("user_id", "eq", self.id)
.filter("date", "eq", date)
.execute()
)
userItem = next(iter(response.data or []), {"requests_count": 0})
userItem = next( # pyright: ignore reportPrivateUsage=none
iter(response.data or []), # pyright: ignore reportPrivateUsage=none,
{"requests_count": 0},
)
return userItem["requests_count"]
def increment_user_request_count(self, date):
def increment_user_request_count(
self, date # pyright: ignore reportPrivateUsage=none
): # pyright: ignore reportPrivateUsage=none
commons = common_dependencies()
requests_count = self.fetch_user_requests_count(date) + 1
requests_count = (
self.fetch_user_requests_count( # pyright: ignore reportPrivateUsage=none
date # pyright: ignore reportPrivateUsage=none
)
+ 1
)
logger.info(f"User {self.email} request count updated to {requests_count}")
commons["supabase"].table("users").update(
{"requests_count": requests_count}
).match({"user_id": self.id, "date": date}).execute()
commons["supabase"].table( # pyright: ignore reportPrivateUsage=none
"users"
).update({"requests_count": requests_count}).match(
{"user_id": self.id, "date": date}
).execute()
self.requests_count = requests_count

View File

@ -8,15 +8,17 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from models.files import File
from models.settings import CommonsDep
from utils.file import compute_sha1_from_content
from utils.file import (
compute_sha1_from_content, # pyright: ignore reportPrivateUsage=none
)
async def process_audio(
commons: CommonsDep,
file: File,
enable_summarization: bool,
user,
user_openai_api_key,
user, # pyright: ignore reportPrivateUsage=none
user_openai_api_key, # pyright: ignore reportPrivateUsage=none
):
temp_filename = None
file_sha = ""
@ -26,10 +28,13 @@ async def process_audio(
try:
upload_file = file.file
with tempfile.NamedTemporaryFile(
delete=False, suffix=upload_file.filename
delete=False,
suffix=upload_file.filename, # pyright: ignore reportPrivateUsage=none
) as tmp_file:
await upload_file.seek(0)
content = await upload_file.read()
await upload_file.seek(0) # pyright: ignore reportPrivateUsage=none
content = (
await upload_file.read()
) # pyright: ignore reportPrivateUsage=none
tmp_file.write(content)
tmp_file.flush()
tmp_file.close()
@ -37,7 +42,12 @@ async def process_audio(
temp_filename = tmp_file.name
with open(tmp_file.name, "rb") as audio_file:
transcript = openai.Audio.transcribe("whisper-1", audio_file)
transcript = ( # pyright: ignore reportPrivateUsage=none
openai.Audio.transcribe( # pyright: ignore reportPrivateUsage=none
"whisper-1",
audio_file, # pyright: ignore reportPrivateUsage=none
)
)
file_sha = compute_sha1_from_content(transcript.text.encode("utf-8"))
file_size = len(transcript.text.encode("utf-8"))

View File

@ -5,26 +5,31 @@ from uuid import UUID
from fastapi import APIRouter, Depends, Query, Request, UploadFile
from auth.auth_bearer import AuthBearer, get_current_user
from auth.auth_bearer import (
AuthBearer,
get_current_user, # pyright: ignore reportPrivateUsage=none,,
)
from crawl.crawler import CrawlWebsite
from models.brains import Brain
from models.files import File
from models.settings import common_dependencies
from models.users import User
from parsers.github import process_github
from utils.file import convert_bytes
from utils.processors import filter_file
from parsers.github import process_github # pyright: ignore reportPrivateUsage=none,
from utils.file import convert_bytes # pyright: ignore reportPrivateUsage=none,
from utils.processors import filter_file # pyright: ignore reportPrivateUsage=none,
crawl_router = APIRouter()
@crawl_router.post("/crawl", dependencies=[Depends(AuthBearer())], tags=["Crawl"])
async def crawl_endpoint(
async def crawl_endpoint( # pyright: ignore reportPrivateUsage=none,
request: Request,
crawl_website: CrawlWebsite,
brain_id: UUID = Query(..., description="The ID of the brain"),
enable_summarization: bool = False,
current_user: User = Depends(get_current_user),
current_user: User = Depends(
get_current_user # pyright: ignore reportPrivateUsage=none,
), # pyright: ignore reportPrivateUsage=none,
):
"""
Crawl a website and process the crawled data.
@ -36,7 +41,9 @@ async def crawl_endpoint(
commons = common_dependencies()
if request.headers.get("Openai-Api-Key"):
brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200)
brain.max_brain_size = os.getenv(
"MAX_BRAIN_SIZE_WITH_KEY", 209715200
) # pyright: ignore reportPrivateUsage=none,
file_size = 1000000
remaining_free_space = brain.remaining_brain_size
@ -48,24 +55,30 @@ async def crawl_endpoint(
}
else:
if not crawl_website.checkGithub():
file_path, file_name = crawl_website.process()
(
file_path, # pyright: ignore reportPrivateUsage=none,
file_name, # pyright: ignore reportPrivateUsage=none,
) = crawl_website.process() # pyright: ignore reportPrivateUsage=none,
# Create a SpooledTemporaryFile from the file_path
spooled_file = SpooledTemporaryFile()
with open(file_path, "rb") as f:
with open(file_path, "rb") as f: # pyright: ignore reportPrivateUsage=none,
shutil.copyfileobj(f, spooled_file)
# Pass the SpooledTemporaryFile to UploadFile
uploadFile = UploadFile(file=spooled_file, filename=file_name)
uploadFile = UploadFile(
file=spooled_file, # pyright: ignore reportPrivateUsage=none,
filename=file_name, # pyright: ignore reportPrivateUsage=none,
) # pyright: ignore reportPrivateUsage=none,
file = File(file=uploadFile)
# check remaining free space here !!
message = await filter_file(
message = await filter_file( # pyright: ignore reportPrivateUsage=none,
commons,
file,
enable_summarization,
brain.id,
openai_api_key=request.headers.get("Openai-Api-Key", None),
)
return message
return message # pyright: ignore reportPrivateUsage=none,
else:
# check remaining free space here !!
message = await process_github(

View File

@ -2,7 +2,10 @@ from uuid import UUID
from fastapi import APIRouter, Depends, Query
from auth.auth_bearer import AuthBearer, get_current_user
from auth.auth_bearer import (
AuthBearer,
get_current_user, # pyright: ignore reportPrivateUsage=none,
)
from models.brains import Brain
from models.settings import common_dependencies
from models.users import User
@ -11,18 +14,25 @@ explore_router = APIRouter()
@explore_router.get("/explore/", dependencies=[Depends(AuthBearer())], tags=["Explore"])
async def explore_endpoint(
async def explore_endpoint( # pyright: ignore reportPrivateUsage=none
brain_id: UUID = Query(..., description="The ID of the brain"),
current_user: User = Depends(get_current_user),
current_user: User = Depends(
get_current_user, # pyright: ignore reportPrivateUsage=none
),
):
"""
Retrieve and explore unique user data vectors.
"""
brain = Brain(id=brain_id)
unique_data = brain.get_unique_brain_files()
unique_data = ( # pyright: ignore reportPrivateUsage=none
brain.get_unique_brain_files()
) # pyright: ignore reportPrivateUsage=none
unique_data.sort(key=lambda x: int(x["size"]), reverse=True)
return {"documents": unique_data}
unique_data.sort( # pyright: ignore reportPrivateUsage=none
key=lambda x: int(x["size"]), # pyright: ignore reportPrivateUsage=none
reverse=True, # pyright: ignore reportPrivateUsage=none
)
return {"documents": unique_data} # pyright: ignore reportPrivateUsage=none
@explore_router.delete(
@ -30,7 +40,9 @@ async def explore_endpoint(
)
async def delete_endpoint(
file_name: str,
current_user: User = Depends(get_current_user),
current_user: User = Depends(
get_current_user # pyright: ignore reportPrivateUsage=none
),
brain_id: UUID = Query(..., description="The ID of the brain"), # noqa: B008
):
"""
@ -47,8 +59,11 @@ async def delete_endpoint(
@explore_router.get(
"/explore/{file_name}/", dependencies=[Depends(AuthBearer())], tags=["Explore"]
)
async def download_endpoint(
file_name: str, current_user: User = Depends(get_current_user)
async def download_endpoint( # pyright: ignore reportPrivateUsage=none
file_name: str,
current_user: User = Depends(
get_current_user # pyright: ignore reportPrivateUsage=none
),
):
"""
Download a specific user file by file name.
@ -56,8 +71,8 @@ async def download_endpoint(
# check if user has the right to get the file: add brain_id to the query
commons = common_dependencies()
response = (
commons["supabase"]
response = ( # pyright: ignore reportPrivateUsage=none
commons["supabase"] # pyright: ignore reportPrivateUsage=none
.table("vectors")
.select(
"metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url",
@ -66,5 +81,5 @@ async def download_endpoint(
.match({"metadata->>file_name": file_name})
.execute()
)
documents = response.data
return {"documents": documents}
documents = response.data # pyright: ignore reportPrivateUsage=none
return {"documents": documents} # pyright: ignore reportPrivateUsage=none

View File

@ -3,24 +3,32 @@ from uuid import UUID
from fastapi import APIRouter, Depends, Query, Request, UploadFile
from auth.auth_bearer import AuthBearer, get_current_user
from auth.auth_bearer import (
AuthBearer,
get_current_user, # pyright: ignore reportPrivateUsage=none,
)
from models.brains import Brain
from models.files import File
from models.settings import common_dependencies
from models.users import User
from utils.file import convert_bytes, get_file_size
from utils.processors import filter_file
from utils.file import (
convert_bytes, # pyright: ignore reportPrivateUsage=none,
get_file_size, # pyright: ignore reportPrivateUsage=none,
)
from utils.processors import filter_file # pyright: ignore reportPrivateUsage=none
upload_router = APIRouter()
@upload_router.post("/upload", dependencies=[Depends(AuthBearer())], tags=["Upload"])
async def upload_file(
async def upload_file( # pyright: ignore reportPrivateUsage=none
request: Request,
uploadFile: UploadFile,
brain_id: UUID = Query(..., description="The ID of the brain"),
enable_summarization: bool = False,
current_user: User = Depends(get_current_user),
current_user: User = Depends(
get_current_user # pyright: ignore reportPrivateUsage=none
),
):
"""
Upload a file to the user's storage.
@ -43,10 +51,12 @@ async def upload_file(
commons = common_dependencies()
if request.headers.get("Openai-Api-Key"):
brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200)
brain.max_brain_size = os.getenv(
"MAX_BRAIN_SIZE_WITH_KEY", 209715200
) # pyright: ignore reportPrivateUsage=none
remaining_free_space = brain.remaining_brain_size
file_size = get_file_size(uploadFile)
file_size = get_file_size(uploadFile) # pyright: ignore reportPrivateUsage=none
file = File(file=uploadFile)
if remaining_free_space - file_size < 0:
@ -55,7 +65,7 @@ async def upload_file(
"type": "error",
}
else:
message = await filter_file(
message = await filter_file( # pyright: ignore reportPrivateUsage=none
commons,
file,
enable_summarization,
@ -63,4 +73,4 @@ async def upload_file(
openai_api_key=request.headers.get("Openai-Api-Key", None),
)
return message
return message # pyright: ignore reportPrivateUsage=none

View File

@ -3,35 +3,40 @@ import hashlib
from fastapi import UploadFile
def convert_bytes(bytes, precision=2):
def convert_bytes(bytes, precision=2): # pyright: ignore reportPrivateUsage=none
"""Converts bytes into a human-friendly format."""
abbreviations = ['B', 'KB', 'MB']
abbreviations = ["B", "KB", "MB"]
if bytes <= 0:
return '0 B'
size = bytes
return "0 B"
size = bytes # pyright: ignore reportPrivateUsage=none
index = 0
while size >= 1024 and index < len(abbreviations) - 1:
size /= 1024
size /= 1024 # pyright: ignore reportPrivateUsage=none
index += 1
return f'{size:.{precision}f} {abbreviations[index]}'
return f"{size:.{precision}f} {abbreviations[index]}"
def get_file_size(file: UploadFile):
def get_file_size(file: UploadFile): # pyright: ignore reportPrivateUsage=none
# move the cursor to the end of the file
file.file._file.seek(0, 2)
file_size = file.file._file.tell() # Getting the size of the file
# move the cursor back to the beginning of the file
file.file._file.seek(0, 2) # pyright: ignore reportPrivateUsage=none
file_size = ( # pyright: ignore reportPrivateUsage=none
file.file._file.tell() # pyright: ignore reportPrivateUsage=none
) # Getting the size of the file # pyright: ignore reportPrivateUsage=none
# move the cursor back to the beginning of the file
file.file.seek(0)
return file_size
return file_size # pyright: ignore reportPrivateUsage=none
def compute_sha1_from_file(file_path):
with open(file_path, "rb") as file:
def compute_sha1_from_file(file_path): # pyright: ignore reportPrivateUsage=none
with open(file_path, "rb") as file: # pyright: ignore reportPrivateUsage=none
bytes = file.read()
readable_hash = compute_sha1_from_content(bytes)
return readable_hash
def compute_sha1_from_content(content):
readable_hash = hashlib.sha1(content).hexdigest()
return readable_hash
def compute_sha1_from_content(content): # pyright: ignore reportPrivateUsage=none
readable_hash = hashlib.sha1(
content # pyright: ignore reportPrivateUsage=none
).hexdigest() # pyright: ignore reportPrivateUsage=none
return readable_hash

View File

@ -1,19 +1,21 @@
from models.brains import Brain
from models.files import File
from models.settings import CommonsDep
from parsers.audio import process_audio
from parsers.csv import process_csv
from parsers.docx import process_docx
from parsers.epub import process_epub
from parsers.html import process_html
from parsers.markdown import process_markdown
from parsers.notebook import process_ipnyb
from parsers.odt import process_odt
from parsers.pdf import process_pdf
from parsers.powerpoint import process_powerpoint
from parsers.txt import process_txt
from parsers.audio import process_audio # pyright: ignore reportPrivateUsage=none
from parsers.csv import process_csv # pyright: ignore reportPrivateUsage=none
from parsers.docx import process_docx # pyright: ignore reportPrivateUsage=none
from parsers.epub import process_epub # pyright: ignore reportPrivateUsage=none
from parsers.html import process_html # pyright: ignore reportPrivateUsage=none
from parsers.markdown import process_markdown # pyright: ignore reportPrivateUsage=none
from parsers.notebook import process_ipnyb # pyright: ignore reportPrivateUsage=none
from parsers.odt import process_odt # pyright: ignore reportPrivateUsage=none
from parsers.pdf import process_pdf # pyright: ignore reportPrivateUsage=none
from parsers.powerpoint import (
process_powerpoint, # pyright: ignore reportPrivateUsage=none
)
from parsers.txt import process_txt # pyright: ignore reportPrivateUsage=none
file_processors = {
file_processors = { # pyright: ignore reportPrivateUsage=none
".txt": process_txt,
".csv": process_csv,
".md": process_markdown,
@ -35,49 +37,64 @@ file_processors = {
}
def create_response(message, type):
return {"message": message, "type": type}
def create_response(message, type): # pyright: ignore reportPrivateUsage=none
return {"message": message, "type": type} # pyright: ignore reportPrivateUsage=none
async def filter_file(
async def filter_file( # pyright: ignore reportPrivateUsage=none
commons: CommonsDep,
file: File,
enable_summarization: bool,
brain_id,
openai_api_key,
brain_id, # pyright: ignore reportPrivateUsage=none
openai_api_key, # pyright: ignore reportPrivateUsage=none
):
await file.compute_file_sha1()
print("file sha1", file.file_sha1)
file_exists = file.file_already_exists()
file_exists_in_brain = file.file_already_exists_in_brain(brain_id)
file_exists_in_brain = (
file.file_already_exists_in_brain( # pyright: ignore reportPrivateUsage=none
brain_id, # pyright: ignore reportPrivateUsage=none
)
)
if file_exists_in_brain:
return create_response(
f"🤔 {file.file.filename} already exists in brain {brain_id}.", "warning"
f"🤔 {file.file.filename} already exists in brain {brain_id}." # pyright: ignore reportPrivateUsage=none,
"warning",
)
elif file.file_is_empty():
return create_response(f"{file.file.filename} is empty.", "error")
elif file_exists:
file.link_file_to_brain(brain=Brain(id=brain_id))
return create_response(
f"{file.file.filename} has been uploaded to brain {brain_id}.", "success"
f"{file.file.filename} is empty." # pyright: ignore reportPrivateUsage=none,
"error",
)
elif file_exists:
file.link_file_to_brain(
brain=Brain(id=brain_id) # pyright: ignore reportPrivateUsage=none
) # pyright: ignore reportPrivateUsage=none
return create_response(
f"{file.file.filename} has been uploaded to brain {brain_id}." # pyright: ignore reportPrivateUsage=none,
"success",
)
if file.file_extension in file_processors:
try:
await file_processors[file.file_extension](
commons, file, enable_summarization, brain_id, openai_api_key
await file_processors[file.file_extension]( # pyright: ignore reportPrivateUsage=none
commons,
file,
enable_summarization,
brain_id # pyright: ignore reportPrivateUsage=none,
openai_api_key, # pyright: ignore reportPrivateUsage=none
)
return create_response(
f"{file.file.filename} has been uploaded to brain {brain_id}.",
f"{file.file.filename} has been uploaded to brain {brain_id}." # pyright: ignore reportPrivateUsage=none,
"success",
)
except Exception as e:
# Add more specific exceptions as needed.
print(f"Error processing file: {e}")
return create_response(
f"⚠️ An error occurred while processing {file.file.filename}.", "error"
f"⚠️ An error occurred while processing {file.file.filename}.", "error" # pyright: ignore reportPrivateUsage=none,
)
return create_response(f"{file.file.filename} is not supported.", "error")
return create_response(f"{file.file.filename} is not supported.", "error") # pyright: ignore reportPrivateUsage=none