From 225280f2f5d7450b3ae9ea8f044b6ecc2fc4151b Mon Sep 17 00:00:00 2001 From: Stan Girard Date: Mon, 19 Jun 2023 11:23:58 +0200 Subject: [PATCH] fix(doc): retrieval to 8 because it takes a long time --- backend/llm/qa.py | 2 +- backend/parsers/github.py | 2 +- backend/routes/crawl_routes.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/llm/qa.py b/backend/llm/qa.py index d7fa03716..ccab97eb6 100644 --- a/backend/llm/qa.py +++ b/backend/llm/qa.py @@ -32,7 +32,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore): query: str, user_id: str = "none", table: str = "match_vectors", - k: int = 16, + k: int = 8, threshold: float = 0.5, **kwargs: Any ) -> List[Document]: diff --git a/backend/parsers/github.py b/backend/parsers/github.py index 3c156819f..e1d0a4dc1 100644 --- a/backend/parsers/github.py +++ b/backend/parsers/github.py @@ -29,7 +29,7 @@ async def process_github(commons: CommonsDep, repo, enable_summarization, user, print(documents[:1]) for doc in documents: - if doc.metadata["file_type"] in [".pyc", ".env", ".lock", ".gitignore", ".gitmodules", ".gitattributes", ".gitkeep", ".git"]: + if doc.metadata["file_type"] in [".pyc",".png",".svg", ".env", ".lock", ".gitignore", ".gitmodules", ".gitattributes", ".gitkeep", ".git", ".json"]: continue metadata = { "file_sha1": compute_sha1_from_content(doc.page_content.encode("utf-8")), diff --git a/backend/routes/crawl_routes.py b/backend/routes/crawl_routes.py index 314d1b5f2..c288c4f60 100644 --- a/backend/routes/crawl_routes.py +++ b/backend/routes/crawl_routes.py @@ -57,4 +57,4 @@ async def crawl_endpoint(request: Request,commons: CommonsDep, crawl_website: Cr message = await filter_file(commons, file, enable_summarization, user=current_user, openai_api_key=request.headers.get('Openai-Api-Key', None)) return message else: - message = await process_github(crawl_website.url, "false", user=current_user, supabase=commons['supabase'], user_openai_api_key=request.headers.get('Openai-Api-Key', None)) + message = await process_github(commons,crawl_website.url, "false", user=current_user, supabase=commons['supabase'], user_openai_api_key=request.headers.get('Openai-Api-Key', None))