2023-08-27 10:38:41 +03:00
from uuid import UUID
2023-11-02 00:33:47 +03:00
from logger import get_logger
2023-08-27 10:38:41 +03:00
from models . settings import get_embeddings , get_supabase_client
from vectorstore . supabase import CustomSupabaseVectorStore
2023-11-02 00:33:47 +03:00
logger = get_logger ( __name__ )
2023-08-27 10:38:41 +03:00
def get_question_context_from_brain ( brain_id : UUID , question : str ) - > str :
supabase_client = get_supabase_client ( )
embeddings = get_embeddings ( )
vector_store = CustomSupabaseVectorStore (
supabase_client ,
embeddings ,
table_name = " vectors " ,
2023-08-30 16:55:06 +03:00
brain_id = str ( brain_id ) ,
2023-08-27 10:38:41 +03:00
)
documents = vector_store . similarity_search ( question )
2023-11-02 00:09:04 +03:00
## I can't pass more than 2500 tokens to as return value in my array. So i need to remove the docs after i reach 2000 tokens. A token equals 1.5 characters. So 2000 tokens is 3000 characters.
tokens = 0
for doc in documents :
tokens + = len ( doc . page_content ) * 1.5
if tokens > 3000 :
documents . remove ( doc )
2023-11-02 00:33:47 +03:00
logger . info ( " documents " , documents )
logger . info ( " tokens " , tokens )
logger . info ( " 🔥🔥🔥🔥🔥🔥 " )
2023-08-27 10:38:41 +03:00
# aggregate all the documents into one string
return " \n " . join ( [ doc . page_content for doc in documents ] )