fix(doc): retrieval to 8 because it takes a long time

This commit is contained in:
Stan Girard 2023-06-19 11:23:58 +02:00
parent f21630c70d
commit 225280f2f5
3 changed files with 3 additions and 3 deletions

View File

@ -32,7 +32,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
query: str,
user_id: str = "none",
table: str = "match_vectors",
k: int = 16,
k: int = 8,
threshold: float = 0.5,
**kwargs: Any
) -> List[Document]:

View File

@ -29,7 +29,7 @@ async def process_github(commons: CommonsDep, repo, enable_summarization, user,
print(documents[:1])
for doc in documents:
if doc.metadata["file_type"] in [".pyc", ".env", ".lock", ".gitignore", ".gitmodules", ".gitattributes", ".gitkeep", ".git"]:
if doc.metadata["file_type"] in [".pyc",".png",".svg", ".env", ".lock", ".gitignore", ".gitmodules", ".gitattributes", ".gitkeep", ".git", ".json"]:
continue
metadata = {
"file_sha1": compute_sha1_from_content(doc.page_content.encode("utf-8")),

View File

@ -57,4 +57,4 @@ async def crawl_endpoint(request: Request,commons: CommonsDep, crawl_website: Cr
message = await filter_file(commons, file, enable_summarization, user=current_user, openai_api_key=request.headers.get('Openai-Api-Key', None))
return message
else:
message = await process_github(crawl_website.url, "false", user=current_user, supabase=commons['supabase'], user_openai_api_key=request.headers.get('Openai-Api-Key', None))
message = await process_github(commons,crawl_website.url, "false", user=current_user, supabase=commons['supabase'], user_openai_api_key=request.headers.get('Openai-Api-Key', None))