mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-14 17:03:29 +03:00
ef90e8e672
# Description
Major PR which, among other things, introduces the possibility of easily
customizing the retrieval workflows. Workflows are based on LangGraph,
and can be customized using a [yaml configuration
file](core/tests/test_llm_endpoint.py), and adding the implementation of
the nodes logic into
[quivr_rag_langgraph.py](1a0c98437a/backend/core/quivr_core/quivr_rag_langgraph.py
)
This is a first, simple implementation that will significantly evolve in
the coming weeks to enable more complex workflows (for instance, with
conditional nodes). We also plan to adopt a similar approach for the
ingestion part, i.e. to enable user to easily customize the ingestion
pipeline.
Closes CORE-195, CORE-203, CORE-204
## Checklist before requesting a review
Please delete options that are not relevant.
- [X] My code follows the style guidelines of this project
- [X] I have performed a self-review of my code
- [X] I have commented hard-to-understand areas
- [X] I have ideally added tests that prove my fix is effective or that
my feature works
- [X] New and existing unit tests pass locally with my changes
- [X] Any dependent changes have been merged
## Screenshots (if appropriate):
114 lines
3.0 KiB
Python
114 lines
3.0 KiB
Python
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any, Dict, Optional
|
|
from uuid import UUID
|
|
|
|
from langchain_core.documents import Document
|
|
from langchain_core.messages import AIMessage, HumanMessage
|
|
from langchain_core.pydantic_v1 import BaseModel as BaseModelV1
|
|
from langchain_core.pydantic_v1 import Field as FieldV1
|
|
from pydantic import BaseModel
|
|
from typing_extensions import TypedDict
|
|
|
|
|
|
class cited_answer(BaseModelV1):
|
|
"""Answer the user question based only on the given sources, and cite the sources used."""
|
|
|
|
answer: str = FieldV1(
|
|
...,
|
|
description="The answer to the user question, which is based only on the given sources.",
|
|
)
|
|
citations: list[int] = FieldV1(
|
|
...,
|
|
description="The integer IDs of the SPECIFIC sources which justify the answer.",
|
|
)
|
|
|
|
followup_questions: list[str] = FieldV1(
|
|
...,
|
|
description="Generate up to 3 follow-up questions that could be asked based on the answer given or context provided.",
|
|
)
|
|
|
|
|
|
class ChatMessage(BaseModelV1):
|
|
chat_id: UUID
|
|
message_id: UUID
|
|
brain_id: UUID | None
|
|
msg: AIMessage | HumanMessage
|
|
message_time: datetime
|
|
metadata: dict[str, Any]
|
|
|
|
|
|
class KnowledgeStatus(str, Enum):
|
|
PROCESSING = "PROCESSING"
|
|
UPLOADED = "UPLOADED"
|
|
ERROR = "ERROR"
|
|
RESERVED = "RESERVED"
|
|
|
|
|
|
class Source(BaseModel):
|
|
name: str
|
|
source_url: str
|
|
type: str
|
|
original_file_name: str
|
|
citation: str
|
|
|
|
|
|
class RawRAGChunkResponse(TypedDict):
|
|
answer: dict[str, Any]
|
|
docs: dict[str, Any]
|
|
|
|
|
|
class RawRAGResponse(TypedDict):
|
|
answer: dict[str, Any]
|
|
docs: dict[str, Any]
|
|
|
|
|
|
class ChatLLMMetadata(BaseModel):
|
|
name: str
|
|
display_name: str | None = None
|
|
description: str | None = None
|
|
image_url: str | None = None
|
|
brain_id: str | None = None
|
|
brain_name: str | None = None
|
|
|
|
|
|
class RAGResponseMetadata(BaseModel):
|
|
citations: list[int] | None = None
|
|
followup_questions: list[str] | None = None
|
|
sources: list[Any] | None = None
|
|
metadata_model: ChatLLMMetadata | None = None
|
|
|
|
|
|
class ParsedRAGResponse(BaseModel):
|
|
answer: str
|
|
metadata: RAGResponseMetadata | None = None
|
|
|
|
|
|
class ParsedRAGChunkResponse(BaseModel):
|
|
answer: str
|
|
metadata: RAGResponseMetadata
|
|
last_chunk: bool = False
|
|
|
|
|
|
class QuivrKnowledge(BaseModel):
|
|
id: UUID
|
|
file_name: str
|
|
brain_ids: list[UUID] | None = None
|
|
url: Optional[str] = None
|
|
extension: str = ".txt"
|
|
mime_type: str = "txt"
|
|
status: KnowledgeStatus = KnowledgeStatus.PROCESSING
|
|
source: Optional[str] = None
|
|
source_link: str | None = None
|
|
file_size: int | None = None # FIXME: Should not be optional @chloedia
|
|
file_sha1: Optional[str] = None # FIXME: Should not be optional @chloedia
|
|
updated_at: Optional[datetime] = None
|
|
created_at: Optional[datetime] = None
|
|
metadata: Optional[Dict[str, str]] = None
|
|
|
|
|
|
# NOTE: for compatibility issues with langchain <-> PydanticV1
|
|
class SearchResult(BaseModelV1):
|
|
chunk: Document
|
|
distance: float
|