quivr/backend/repository/brain/get_question_context_from_brain.py

33 lines
1.1 KiB
Python
Raw Normal View History

from uuid import UUID
2023-11-02 00:33:47 +03:00
from logger import get_logger
from models.settings import get_embeddings, get_supabase_client
from vectorstore.supabase import CustomSupabaseVectorStore
2023-11-02 00:33:47 +03:00
logger = get_logger(__name__)
def get_question_context_from_brain(brain_id: UUID, question: str) -> str:
supabase_client = get_supabase_client()
embeddings = get_embeddings()
vector_store = CustomSupabaseVectorStore(
supabase_client,
embeddings,
table_name="vectors",
brain_id=str(brain_id),
)
documents = vector_store.similarity_search(question)
## I can't pass more than 2500 tokens to as return value in my array. So i need to remove the docs after i reach 2000 tokens. A token equals 1.5 characters. So 2000 tokens is 3000 characters.
tokens = 0
for doc in documents:
tokens += len(doc.page_content) * 1.5
if tokens > 3000:
documents.remove(doc)
2023-11-02 00:33:47 +03:00
logger.info("documents", documents)
logger.info("tokens", tokens)
logger.info("🔥🔥🔥🔥🔥🔥")
# aggregate all the documents into one string
return "\n".join([doc.page_content for doc in documents])