quivr/backend/core/tests/conftest.py
Stan Girard 380cf82706
feat: quivr core 0.1 (#2970)
# Description


# Testing backend 

## Docker setup
1. Copy `.env.example` to `.env`. Some env variables were added :
EMBEDDING_DIM
2. Apply supabase migratrions : 
```sh
supabase stop
supabase db reset
supabase start
```
3. Start backend containers
```
make dev
```
## Local setup 
You can also run backend without docker.
1. Install [`rye`](https://rye.astral.sh/guide/installation/). Choose
the managed python version and set the version to 3.11
2. Run the following: 
```
cd quivr/backend
rye sync
```
3. Source `.venv` virtual env : `source .venv/bin/activate`
4. Run the backend, make sure you are running redis and supabase
API: 
```
LOG_LEVEL=debug uvicorn quivr_api.main:app --log-level debug --reload --host 0.0.0.0 --port 5050 --workers 1
```
Worker: 
```
LOG_LEVEL=debug celery -A quivr_worker.celery_worker worker -l info -E --concurrency 1
```
Notifier: 
```
LOG_LEVEL=debug python worker/quivr_worker/celery_monitor.py
```

---------

Co-authored-by: chloedia <chloedaems0@gmail.com>
Co-authored-by: aminediro <aminedirhoussi1@gmail.com>
Co-authored-by: Antoine Dewez <44063631+Zewed@users.noreply.github.com>
Co-authored-by: Chloé Daems <73901882+chloedia@users.noreply.github.com>
Co-authored-by: Zewed <dewez.antoine2@gmail.com>
2024-09-02 10:20:53 +02:00

92 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import os
from pathlib import Path
from uuid import uuid4
import pytest
from langchain_core.embeddings import DeterministicFakeEmbedding
from langchain_core.language_models import FakeListChatModel
from langchain_core.messages.ai import AIMessageChunk
from langchain_core.runnables.utils import AddableDict
from langchain_core.vectorstores import InMemoryVectorStore
from quivr_core.config import LLMEndpointConfig
from quivr_core.files.file import FileExtension, QuivrFile
from quivr_core.llm import LLMEndpoint
@pytest.fixture(scope="function")
def temp_data_file(tmp_path):
data = "This is some test data."
temp_file = tmp_path / "data.txt"
temp_file.write_text(data)
return temp_file
@pytest.fixture(scope="function")
def quivr_txt(temp_data_file):
return QuivrFile(
id=uuid4(),
brain_id=uuid4(),
original_filename=temp_data_file.name,
path=temp_data_file,
file_extension=FileExtension.txt,
file_sha1="123",
)
@pytest.fixture
def quivr_pdf():
return QuivrFile(
id=uuid4(),
brain_id=uuid4(),
original_filename="dummy.pdf",
path=Path("./tests/processor/data/dummy.pdf"),
file_extension=FileExtension.pdf,
file_sha1="13bh234jh234",
)
@pytest.fixture
def full_response():
return "Natural Language Processing (NLP) is a field of artificial intelligence that focuses on the interaction between computers and humans through natural language. The ultimate objective of NLP is to enable computers to understand, interpret, and respond to human language in a way that is both valuable and meaningful. NLP combines computational linguistics—rule-based modeling of human language—with statistical, machine learning, and deep learning models. This combination allows computers to process human language in the form of text or voice data and to understand its full meaning, complete with the speaker or writers intent and sentiment. Key tasks in NLP include text and speech recognition, translation, sentiment analysis, and topic segmentation."
@pytest.fixture
def chunks_stream_answer():
with open("./tests/chunk_stream_fixture.jsonl", "r") as f:
raw_chunks = list(f)
chunks = []
for rc in raw_chunks:
chunk = AddableDict(**json.loads(rc))
if "answer" in chunk:
chunk["answer"] = AIMessageChunk(**chunk["answer"])
chunks.append(chunk)
return chunks
@pytest.fixture(autouse=True)
def openai_api_key():
os.environ["OPENAI_API_KEY"] = "abcd"
@pytest.fixture
def answers():
return [f"answer_{i}" for i in range(10)]
@pytest.fixture(scope="function")
def fake_llm(answers: list[str]):
llm = FakeListChatModel(responses=answers)
return LLMEndpoint(llm=llm, llm_config=LLMEndpointConfig(model="fake_model"))
@pytest.fixture(scope="function")
def embedder():
return DeterministicFakeEmbedding(size=20)
@pytest.fixture(scope="function")
def mem_vector_store(embedder):
return InMemoryVectorStore(embedder)