mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-14 17:03:29 +03:00
ef90e8e672
# Description
Major PR which, among other things, introduces the possibility of easily
customizing the retrieval workflows. Workflows are based on LangGraph,
and can be customized using a [yaml configuration
file](core/tests/test_llm_endpoint.py), and adding the implementation of
the nodes logic into
[quivr_rag_langgraph.py](1a0c98437a/backend/core/quivr_core/quivr_rag_langgraph.py
)
This is a first, simple implementation that will significantly evolve in
the coming weeks to enable more complex workflows (for instance, with
conditional nodes). We also plan to adopt a similar approach for the
ingestion part, i.e. to enable user to easily customize the ingestion
pipeline.
Closes CORE-195, CORE-203, CORE-204
## Checklist before requesting a review
Please delete options that are not relevant.
- [X] My code follows the style guidelines of this project
- [X] I have performed a self-review of my code
- [X] I have commented hard-to-understand areas
- [X] I have ideally added tests that prove my fix is effective or that
my feature works
- [X] New and existing unit tests pass locally with my changes
- [X] Any dependent changes have been merged
## Screenshots (if appropriate):
92 lines
3.0 KiB
Python
92 lines
3.0 KiB
Python
import json
|
||
import os
|
||
from pathlib import Path
|
||
from uuid import uuid4
|
||
|
||
import pytest
|
||
from langchain_core.embeddings import DeterministicFakeEmbedding
|
||
from langchain_core.language_models import FakeListChatModel
|
||
from langchain_core.messages.ai import AIMessageChunk
|
||
from langchain_core.runnables.utils import AddableDict
|
||
from langchain_core.vectorstores import InMemoryVectorStore
|
||
from quivr_core.config import LLMEndpointConfig
|
||
from quivr_core.files.file import FileExtension, QuivrFile
|
||
from quivr_core.llm import LLMEndpoint
|
||
|
||
|
||
@pytest.fixture(scope="function")
|
||
def temp_data_file(tmp_path):
|
||
data = "This is some test data."
|
||
temp_file = tmp_path / "data.txt"
|
||
temp_file.write_text(data)
|
||
return temp_file
|
||
|
||
|
||
@pytest.fixture(scope="function")
|
||
def quivr_txt(temp_data_file):
|
||
return QuivrFile(
|
||
id=uuid4(),
|
||
brain_id=uuid4(),
|
||
original_filename=temp_data_file.name,
|
||
path=temp_data_file,
|
||
file_extension=FileExtension.txt,
|
||
file_sha1="123",
|
||
)
|
||
|
||
|
||
@pytest.fixture
|
||
def quivr_pdf():
|
||
return QuivrFile(
|
||
id=uuid4(),
|
||
brain_id=uuid4(),
|
||
original_filename="dummy.pdf",
|
||
path=Path("./tests/processor/data/dummy.pdf"),
|
||
file_extension=FileExtension.pdf,
|
||
file_sha1="13bh234jh234",
|
||
)
|
||
|
||
|
||
@pytest.fixture
|
||
def full_response():
|
||
return "Natural Language Processing (NLP) is a field of artificial intelligence that focuses on the interaction between computers and humans through natural language. The ultimate objective of NLP is to enable computers to understand, interpret, and respond to human language in a way that is both valuable and meaningful. NLP combines computational linguistics—rule-based modeling of human language—with statistical, machine learning, and deep learning models. This combination allows computers to process human language in the form of text or voice data and to understand its full meaning, complete with the speaker or writer’s intent and sentiment. Key tasks in NLP include text and speech recognition, translation, sentiment analysis, and topic segmentation."
|
||
|
||
|
||
@pytest.fixture
|
||
def chunks_stream_answer():
|
||
with open("./tests/chunk_stream_fixture.jsonl", "r") as f:
|
||
raw_chunks = list(f)
|
||
|
||
chunks = []
|
||
for rc in raw_chunks:
|
||
chunk = AddableDict(**json.loads(rc))
|
||
if "answer" in chunk:
|
||
chunk["answer"] = AIMessageChunk(**chunk["answer"])
|
||
chunks.append(chunk)
|
||
return chunks
|
||
|
||
|
||
@pytest.fixture(autouse=True)
|
||
def openai_api_key():
|
||
os.environ["OPENAI_API_KEY"] = "this-is-a-test-key"
|
||
|
||
|
||
@pytest.fixture
|
||
def answers():
|
||
return [f"answer_{i}" for i in range(10)]
|
||
|
||
|
||
@pytest.fixture(scope="function")
|
||
def fake_llm(answers: list[str]):
|
||
llm = FakeListChatModel(responses=answers)
|
||
return LLMEndpoint(llm=llm, llm_config=LLMEndpointConfig(model="fake_model"))
|
||
|
||
|
||
@pytest.fixture(scope="function")
|
||
def embedder():
|
||
return DeterministicFakeEmbedding(size=20)
|
||
|
||
|
||
@pytest.fixture(scope="function")
|
||
def mem_vector_store(embedder):
|
||
return InMemoryVectorStore(embedder)
|