mirror of
https://github.com/StanGirard/quivr.git
synced 2025-01-06 03:04:42 +03:00
35f5fe0958
fixed a few bugs
33 lines
1.1 KiB
Python
33 lines
1.1 KiB
Python
from uuid import UUID
|
|
|
|
from logger import get_logger
|
|
from models.settings import get_embeddings, get_supabase_client
|
|
from vectorstore.supabase import CustomSupabaseVectorStore
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
def get_question_context_from_brain(brain_id: UUID, question: str) -> str:
|
|
supabase_client = get_supabase_client()
|
|
embeddings = get_embeddings()
|
|
|
|
vector_store = CustomSupabaseVectorStore(
|
|
supabase_client,
|
|
embeddings,
|
|
table_name="vectors",
|
|
brain_id=str(brain_id),
|
|
)
|
|
documents = vector_store.similarity_search(question)
|
|
## I can't pass more than 2500 tokens to as return value in my array. So i need to remove the docs after i reach 2000 tokens. A token equals 1.5 characters. So 2000 tokens is 3000 characters.
|
|
tokens = 0
|
|
for doc in documents:
|
|
tokens += len(doc.page_content) * 1.5
|
|
if tokens > 3000:
|
|
documents.remove(doc)
|
|
logger.info("documents", documents)
|
|
logger.info("tokens", tokens)
|
|
logger.info("🔥🔥🔥🔥🔥🔥")
|
|
|
|
# aggregate all the documents into one string
|
|
return "\n".join([doc.page_content for doc in documents])
|