mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-13 11:02:33 +03:00
285fe5b960
# Description This PR includes far too many new features: - detection of user intent (closes CORE-211) - treating multiple questions in parallel (closes CORE-212) - using the chat history when answering a question (closes CORE-213) - filtering of retrieved chunks by relevance threshold (closes CORE-217) - dynamic retrieval of chunks (closes CORE-218) - enabling web search via Tavily (closes CORE-220) - enabling agent / assistant to activate tools when relevant to complete the user task (closes CORE-224) Also closes CORE-205 ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate): --------- Co-authored-by: Stan Girard <stan@quivr.app>
152 lines
4.1 KiB
Python
152 lines
4.1 KiB
Python
from dataclasses import asdict
|
|
from uuid import uuid4
|
|
|
|
import pytest
|
|
from langchain_core.documents import Document
|
|
from langchain_core.embeddings import Embeddings
|
|
from quivr_core.brain import Brain
|
|
from quivr_core.rag.entities.chat import ChatHistory
|
|
from quivr_core.llm import LLMEndpoint
|
|
from quivr_core.storage.local_storage import TransparentStorage
|
|
|
|
|
|
@pytest.mark.base
|
|
def test_brain_empty_files_no_vectordb(fake_llm, embedder):
|
|
# Testing no files
|
|
with pytest.raises(ValueError):
|
|
Brain.from_files(
|
|
name="test_brain",
|
|
file_paths=[],
|
|
llm=fake_llm,
|
|
embedder=embedder,
|
|
)
|
|
|
|
|
|
def test_brain_empty_files(fake_llm, embedder, mem_vector_store):
|
|
brain = Brain.from_files(
|
|
name="test_brain",
|
|
file_paths=[],
|
|
llm=fake_llm,
|
|
embedder=embedder,
|
|
vector_db=mem_vector_store,
|
|
)
|
|
assert brain
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_brain_from_files_success(
|
|
fake_llm: LLMEndpoint, embedder, temp_data_file, mem_vector_store
|
|
):
|
|
brain = await Brain.afrom_files(
|
|
name="test_brain",
|
|
file_paths=[temp_data_file],
|
|
embedder=embedder,
|
|
llm=fake_llm,
|
|
vector_db=mem_vector_store,
|
|
)
|
|
assert brain.name == "test_brain"
|
|
assert len(brain.chat_history) == 0
|
|
assert brain.llm == fake_llm
|
|
assert brain.vector_db.embeddings == embedder
|
|
assert isinstance(brain.default_chat, ChatHistory)
|
|
assert len(brain.default_chat) == 0
|
|
|
|
# storage
|
|
assert isinstance(brain.storage, TransparentStorage)
|
|
assert len(await brain.storage.get_files()) == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_brain_from_langchain_docs(embedder, fake_llm, mem_vector_store):
|
|
chunk = Document("content_1", metadata={"id": uuid4()})
|
|
brain = await Brain.afrom_langchain_documents(
|
|
name="test",
|
|
llm=fake_llm,
|
|
langchain_documents=[chunk],
|
|
embedder=embedder,
|
|
vector_db=mem_vector_store,
|
|
)
|
|
# No appended files
|
|
assert len(await brain.storage.get_files()) == 0
|
|
assert len(brain.chat_history) == 0
|
|
|
|
|
|
@pytest.mark.base
|
|
@pytest.mark.asyncio
|
|
async def test_brain_search(
|
|
embedder: Embeddings,
|
|
):
|
|
chunk1 = Document("content_1", metadata={"id": uuid4()})
|
|
chunk2 = Document("content_2", metadata={"id": uuid4()})
|
|
brain = await Brain.afrom_langchain_documents(
|
|
name="test", langchain_documents=[chunk1, chunk2], embedder=embedder
|
|
)
|
|
|
|
k = 2
|
|
result = await brain.asearch("content_1", n_results=k)
|
|
|
|
assert len(result) == k
|
|
assert result[0].chunk == chunk1
|
|
assert result[1].chunk == chunk2
|
|
assert result[0].distance == 0
|
|
assert result[1].distance > result[0].distance
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_brain_get_history(
|
|
fake_llm: LLMEndpoint, embedder, temp_data_file, mem_vector_store
|
|
):
|
|
brain = await Brain.afrom_files(
|
|
name="test_brain",
|
|
file_paths=[temp_data_file],
|
|
embedder=embedder,
|
|
llm=fake_llm,
|
|
vector_db=mem_vector_store,
|
|
)
|
|
|
|
await brain.aask("question")
|
|
await brain.aask("question")
|
|
|
|
assert len(brain.default_chat) == 4
|
|
|
|
|
|
@pytest.mark.base
|
|
@pytest.mark.asyncio
|
|
async def test_brain_ask_streaming(
|
|
fake_llm: LLMEndpoint, embedder, temp_data_file, answers
|
|
):
|
|
brain = await Brain.afrom_files(
|
|
name="test_brain", file_paths=[temp_data_file], embedder=embedder, llm=fake_llm
|
|
)
|
|
|
|
response = ""
|
|
async for chunk in brain.ask_streaming("question"):
|
|
response += chunk.answer
|
|
|
|
assert response == answers[1]
|
|
|
|
|
|
def test_brain_info_empty(fake_llm: LLMEndpoint, embedder, mem_vector_store):
|
|
storage = TransparentStorage()
|
|
id = uuid4()
|
|
brain = Brain(
|
|
name="test",
|
|
id=id,
|
|
llm=fake_llm,
|
|
embedder=embedder,
|
|
storage=storage,
|
|
vector_db=mem_vector_store,
|
|
)
|
|
|
|
assert asdict(brain.info()) == {
|
|
"brain_id": id,
|
|
"brain_name": "test",
|
|
"files_info": asdict(storage.info()),
|
|
"chats_info": {
|
|
"nb_chats": 1, # start with a default chat
|
|
"current_default_chat": brain.default_chat.id,
|
|
"current_chat_history_length": 0,
|
|
},
|
|
"llm_info": asdict(fake_llm.info()),
|
|
}
|