mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-15 01:21:48 +03:00
feat(assistant): cdp (#3305)
# Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate): --------- Co-authored-by: Zewed <dewez.antoine2@gmail.com>
This commit is contained in:
parent
c39913956d
commit
b767f19f28
@ -24,6 +24,7 @@ RUN apt-get clean && apt-get update && apt-get install -y \
|
||||
libreoffice \
|
||||
libpq-dev \
|
||||
gcc \
|
||||
libhdf5-serial-dev \
|
||||
pandoc && \
|
||||
rm -rf /var/lib/apt/lists/* && apt-get clean
|
||||
|
||||
@ -46,6 +47,8 @@ COPY core/pyproject.toml core/README.md ./core/
|
||||
COPY core/quivr_core/__init__.py ./core/quivr_core/__init__.py
|
||||
COPY worker/pyproject.toml worker/README.md ./worker/
|
||||
COPY worker/quivr_worker/__init__.py ./worker/quivr_worker/__init__.py
|
||||
COPY worker/diff-assistant/pyproject.toml worker/diff-assistant/README.md ./worker/diff-assistant/
|
||||
COPY worker/diff-assistant/quivr_diff_assistant/__init__.py ./worker/diff-assistant/quivr_diff_assistant/__init__.py
|
||||
COPY core/MegaParse/pyproject.toml core/MegaParse/README.md ./core/MegaParse/
|
||||
COPY core/MegaParse/megaparse/__init__.py ./core/MegaParse/megaparse/__init__.py
|
||||
|
||||
|
@ -23,6 +23,7 @@ RUN apt-get clean && apt-get update && apt-get install -y \
|
||||
libreoffice \
|
||||
libpq-dev \
|
||||
gcc \
|
||||
libhdf5-serial-dev \
|
||||
pandoc && \
|
||||
rm -rf /var/lib/apt/lists/* && apt-get clean
|
||||
|
||||
@ -33,6 +34,8 @@ COPY core/pyproject.toml core/README.md ./core/
|
||||
COPY core/quivr_core/__init__.py ./core/quivr_core/__init__.py
|
||||
COPY worker/pyproject.toml worker/README.md ./worker/
|
||||
COPY worker/quivr_worker/__init__.py ./worker/quivr_worker/__init__.py
|
||||
COPY worker/diff-assistant/pyproject.toml worker/diff-assistant/README.md ./worker/diff-assistant/
|
||||
COPY worker/diff-assistant/quivr_diff_assistant/__init__.py ./worker/diff-assistant/quivr_diff_assistant/__init__.py
|
||||
COPY core/MegaParse/pyproject.toml core/MegaParse/README.md ./core/MegaParse/
|
||||
COPY core/MegaParse/megaparse/__init__.py ./core/MegaParse/megaparse/__init__.py
|
||||
|
||||
|
@ -2,7 +2,7 @@ import io
|
||||
from typing import Annotated, List
|
||||
from uuid import uuid4
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
|
||||
from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile
|
||||
|
||||
from quivr_api.celery_config import celery
|
||||
from quivr_api.logger import get_logger
|
||||
@ -16,6 +16,7 @@ from quivr_api.modules.assistant.dto.outputs import AssistantOutput
|
||||
from quivr_api.modules.assistant.entity.assistant_entity import (
|
||||
AssistantSettings,
|
||||
)
|
||||
from quivr_api.modules.assistant.entity.task_entity import TaskMetadata
|
||||
from quivr_api.modules.assistant.services.tasks_service import TasksService
|
||||
from quivr_api.modules.dependencies import get_service
|
||||
from quivr_api.modules.upload.service.upload_file import (
|
||||
@ -64,12 +65,15 @@ async def create_task(
|
||||
current_user: UserIdentityDep,
|
||||
tasks_service: TasksServiceDep,
|
||||
request: Request,
|
||||
input: InputAssistant,
|
||||
input: str = File(...),
|
||||
files: List[UploadFile] = None,
|
||||
):
|
||||
input = InputAssistant.model_validate_json(input)
|
||||
|
||||
assistant = next(
|
||||
(assistant for assistant in assistants if assistant.id == input.id), None
|
||||
)
|
||||
|
||||
if assistant is None:
|
||||
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||
|
||||
@ -80,7 +84,7 @@ async def create_task(
|
||||
raise HTTPException(status_code=400, detail=error)
|
||||
else:
|
||||
print("Assistant input is valid.")
|
||||
notification_uuid = uuid4()
|
||||
notification_uuid = f"{assistant.name}-{str(uuid4())[:8]}"
|
||||
|
||||
# Process files dynamically
|
||||
for upload_file in files:
|
||||
@ -96,8 +100,14 @@ async def create_task(
|
||||
|
||||
task = CreateTask(
|
||||
assistant_id=input.id,
|
||||
pretty_id=str(notification_uuid),
|
||||
assistant_name=assistant.name,
|
||||
pretty_id=notification_uuid,
|
||||
settings=input.model_dump(mode="json"),
|
||||
task_metadata=TaskMetadata(
|
||||
input_files=[file.filename for file in files]
|
||||
).model_dump(mode="json")
|
||||
if files
|
||||
else None, # type: ignore
|
||||
)
|
||||
|
||||
task_created = await tasks_service.create_task(task, current_user.id)
|
||||
|
@ -1,8 +1,11 @@
|
||||
from quivr_api.modules.assistant.dto.inputs import InputAssistant
|
||||
from quivr_api.modules.assistant.dto.outputs import (
|
||||
AssistantOutput,
|
||||
ConditionalInput,
|
||||
InputBoolean,
|
||||
InputFile,
|
||||
Inputs,
|
||||
InputSelectText,
|
||||
Pricing,
|
||||
)
|
||||
|
||||
@ -166,10 +169,10 @@ def validate_assistant_input(
|
||||
|
||||
assistant1 = AssistantOutput(
|
||||
id=1,
|
||||
name="Assistant 1",
|
||||
description="Assistant 1 description",
|
||||
name="Compliance Check",
|
||||
description="Allows analyzing the compliance of the information contained in documents against charter or regulatory requirements.",
|
||||
pricing=Pricing(),
|
||||
tags=["tag1", "tag2"],
|
||||
tags=["Disabled"],
|
||||
input_description="Input description",
|
||||
output_description="Output description",
|
||||
inputs=Inputs(
|
||||
@ -183,19 +186,66 @@ assistant1 = AssistantOutput(
|
||||
|
||||
assistant2 = AssistantOutput(
|
||||
id=2,
|
||||
name="Assistant 2",
|
||||
description="Assistant 2 description",
|
||||
name="Consistency Check",
|
||||
description="Ensures that the information in one document is replicated identically in another document.",
|
||||
pricing=Pricing(),
|
||||
tags=["tag1", "tag2"],
|
||||
tags=[],
|
||||
input_description="Input description",
|
||||
output_description="Output description",
|
||||
icon_url="https://example.com/icon.png",
|
||||
inputs=Inputs(
|
||||
files=[
|
||||
InputFile(key="file_1", description="File description"),
|
||||
InputFile(key="file_2", description="File description"),
|
||||
InputFile(key="Document 1", description="File description"),
|
||||
InputFile(key="Document 2", description="File description"),
|
||||
],
|
||||
select_texts=[
|
||||
InputSelectText(
|
||||
key="DocumentsType",
|
||||
description="Select Documents Type",
|
||||
options=[
|
||||
"Etiquettes VS Cahier des charges",
|
||||
"Fiche Dev VS Cahier des charges",
|
||||
],
|
||||
),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
assistants = [assistant1, assistant2]
|
||||
assistant3 = AssistantOutput(
|
||||
id=3,
|
||||
name="Difference Detection",
|
||||
description="Highlights differences between one document and another after modifications.",
|
||||
pricing=Pricing(),
|
||||
tags=[],
|
||||
input_description="Input description",
|
||||
output_description="Output description",
|
||||
icon_url="https://example.com/icon.png",
|
||||
inputs=Inputs(
|
||||
files=[
|
||||
InputFile(key="Document 1", description="File description"),
|
||||
InputFile(key="Document 2", description="File description"),
|
||||
],
|
||||
booleans=[
|
||||
InputBoolean(
|
||||
key="Hard-to-Read Document?", description="Boolean description"
|
||||
),
|
||||
],
|
||||
select_texts=[
|
||||
InputSelectText(
|
||||
key="DocumentsType",
|
||||
description="Select Documents Type",
|
||||
options=["Etiquettes", "Cahier des charges"],
|
||||
),
|
||||
],
|
||||
conditional_inputs=[
|
||||
ConditionalInput(
|
||||
key="DocumentsType",
|
||||
conditional_key="Hard-to-Read Document?",
|
||||
condition="equals",
|
||||
value="Etiquettes",
|
||||
),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
assistants = [assistant1, assistant2, assistant3]
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import List, Optional
|
||||
from typing import Dict, List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, root_validator
|
||||
@ -7,7 +7,9 @@ from pydantic import BaseModel, root_validator
|
||||
class CreateTask(BaseModel):
|
||||
pretty_id: str
|
||||
assistant_id: int
|
||||
assistant_name: str
|
||||
settings: dict
|
||||
task_metadata: Dict | None = None
|
||||
|
||||
|
||||
class BrainInput(BaseModel):
|
||||
|
@ -61,6 +61,21 @@ class InputSelectNumber(BaseModel):
|
||||
default: Optional[int] = None
|
||||
|
||||
|
||||
class ConditionalInput(BaseModel):
|
||||
"""
|
||||
Conditional input is a list of inputs that are conditional to the value of another input.
|
||||
key: The key of the input that is conditional.
|
||||
conditional_key: The key that determines if the input is shown.
|
||||
"""
|
||||
|
||||
key: str
|
||||
conditional_key: str
|
||||
condition: Optional[str] = (
|
||||
None # e.g. "equals", "contains", "starts_with", "ends_with", "regex", "in", "not_in", "is_empty", "is_not_empty"
|
||||
)
|
||||
value: Optional[str] = None
|
||||
|
||||
|
||||
class Inputs(BaseModel):
|
||||
files: Optional[List[InputFile]] = None
|
||||
urls: Optional[List[InputUrl]] = None
|
||||
@ -70,6 +85,7 @@ class Inputs(BaseModel):
|
||||
select_texts: Optional[List[InputSelectText]] = None
|
||||
select_numbers: Optional[List[InputSelectNumber]] = None
|
||||
brain: Optional[BrainInput] = None
|
||||
conditional_inputs: Optional[List[ConditionalInput]] = None
|
||||
|
||||
|
||||
class Pricing(BaseModel):
|
||||
|
@ -1,10 +1,15 @@
|
||||
from datetime import datetime
|
||||
from typing import Dict
|
||||
from typing import Dict, List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel
|
||||
from sqlmodel import JSON, TIMESTAMP, BigInteger, Column, Field, SQLModel, text
|
||||
|
||||
|
||||
class TaskMetadata(BaseModel):
|
||||
input_files: Optional[List[str]] = None
|
||||
|
||||
|
||||
class Task(SQLModel, table=True):
|
||||
__tablename__ = "tasks" # type: ignore
|
||||
|
||||
@ -17,6 +22,7 @@ class Task(SQLModel, table=True):
|
||||
),
|
||||
)
|
||||
assistant_id: int
|
||||
assistant_name: str
|
||||
pretty_id: str
|
||||
user_id: UUID
|
||||
status: str = Field(default="pending")
|
||||
@ -29,6 +35,4 @@ class Task(SQLModel, table=True):
|
||||
)
|
||||
settings: Dict = Field(default_factory=dict, sa_column=Column(JSON))
|
||||
answer: str | None = Field(default=None)
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
task_metadata: Dict | None = Field(default_factory=dict, sa_column=Column(JSON))
|
||||
|
@ -3,7 +3,7 @@ from uuid import UUID
|
||||
|
||||
from sqlalchemy import exc
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlmodel import select
|
||||
from sqlmodel import col, select
|
||||
|
||||
from quivr_api.modules.assistant.dto.inputs import CreateTask
|
||||
from quivr_api.modules.assistant.entity.task_entity import Task
|
||||
@ -21,9 +21,11 @@ class TasksRepository(BaseRepository):
|
||||
try:
|
||||
task_to_create = Task(
|
||||
assistant_id=task.assistant_id,
|
||||
assistant_name=task.assistant_name,
|
||||
pretty_id=task.pretty_id,
|
||||
user_id=user_id,
|
||||
settings=task.settings,
|
||||
task_metadata=task.task_metadata, # type: ignore
|
||||
)
|
||||
self.session.add(task_to_create)
|
||||
await self.session.commit()
|
||||
@ -40,7 +42,9 @@ class TasksRepository(BaseRepository):
|
||||
return response.one()
|
||||
|
||||
async def get_tasks_by_user_id(self, user_id: UUID) -> Sequence[Task]:
|
||||
query = select(Task).where(Task.user_id == user_id)
|
||||
query = (
|
||||
select(Task).where(Task.user_id == user_id).order_by(col(Task.id).desc())
|
||||
)
|
||||
response = await self.session.exec(query)
|
||||
return response.all()
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Optional, Tuple, Dict
|
||||
from typing import Dict, Optional, Tuple
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
@ -1,5 +1,5 @@
|
||||
import time
|
||||
import os
|
||||
import time
|
||||
from enum import Enum
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
@ -1,9 +1,10 @@
|
||||
import os
|
||||
from typing import Annotated, List, Optional
|
||||
from uuid import UUID
|
||||
import os
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
from quivr_core.config import RetrievalConfig
|
||||
|
||||
from quivr_api.logger import get_logger
|
||||
from quivr_api.middlewares.auth import AuthBearer, get_current_user
|
||||
@ -36,7 +37,6 @@ from quivr_api.modules.user.entity.user_identity import UserIdentity
|
||||
from quivr_api.modules.vector.service.vector_service import VectorService
|
||||
from quivr_api.utils.telemetry import maybe_send_telemetry
|
||||
from quivr_api.utils.uuid_generator import generate_uuid_from_string
|
||||
from quivr_core.config import RetrievalConfig
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -2,8 +2,8 @@ from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID
|
||||
from pydantic import BaseModel
|
||||
|
||||
from pydantic import BaseModel
|
||||
from quivr_core.models import KnowledgeStatus
|
||||
from sqlalchemy import JSON, TIMESTAMP, Column, text
|
||||
from sqlalchemy.ext.asyncio import AsyncAttrs
|
||||
|
@ -86,4 +86,3 @@ class SupabaseS3Storage(StorageInterface):
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
raise e
|
||||
|
||||
|
@ -527,7 +527,9 @@ async def test_should_process_knowledge_prev_error(
|
||||
assert new.file_sha1
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Bug: UnboundLocalError: cannot access local variable 'response'")
|
||||
@pytest.mark.skip(
|
||||
reason="Bug: UnboundLocalError: cannot access local variable 'response'"
|
||||
)
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_knowledge_storage_path(session: AsyncSession, test_data: TestData):
|
||||
_, [knowledge, _] = test_data
|
||||
|
@ -1,9 +1,8 @@
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from quivr_api.logger import get_logger
|
||||
from quivr_api.modules.dependencies import get_async_session
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
from sqlmodel import text
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@ -20,7 +19,6 @@ async def root():
|
||||
|
||||
@misc_router.get("/healthz", tags=["Health"])
|
||||
async def healthz(session: AsyncSession = Depends(get_async_session)):
|
||||
|
||||
try:
|
||||
result = await session.execute(text("SELECT 1"))
|
||||
if not result:
|
||||
|
@ -2,7 +2,6 @@ import datetime
|
||||
import os
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from quivr_api.utils.uuid_generator import generate_uuid_from_string
|
||||
from quivr_core.brain import Brain as BrainCore
|
||||
from quivr_core.chat import ChatHistory as ChatHistoryCore
|
||||
from quivr_core.config import LLMEndpointConfig, RetrievalConfig
|
||||
@ -29,6 +28,7 @@ from quivr_api.modules.prompt.entity.prompt import Prompt
|
||||
from quivr_api.modules.prompt.service.prompt_service import PromptService
|
||||
from quivr_api.modules.user.entity.user_identity import UserIdentity
|
||||
from quivr_api.modules.vector.service.vector_service import VectorService
|
||||
from quivr_api.utils.uuid_generator import generate_uuid_from_string
|
||||
from quivr_api.vectorstore.supabase import CustomSupabaseVectorStore
|
||||
|
||||
from .utils import generate_source
|
||||
|
@ -68,7 +68,7 @@ async def generate_source(
|
||||
try:
|
||||
file_name = doc.metadata["file_name"]
|
||||
file_path = await knowledge_service.get_knowledge_storage_path(
|
||||
file_name=file_name, brain_id=brain_id
|
||||
file_name=file_name, brain_id=brain_id
|
||||
)
|
||||
if file_path in generated_urls:
|
||||
source_url = generated_urls[file_path]
|
||||
|
@ -93,9 +93,7 @@ class SyncUserRepository:
|
||||
sync_user_id,
|
||||
)
|
||||
query = (
|
||||
self.db.from_("syncs_user")
|
||||
.select("*")
|
||||
.eq("user_id", user_id)
|
||||
self.db.from_("syncs_user").select("*").eq("user_id", user_id)
|
||||
# .neq("status", "REMOVED")
|
||||
)
|
||||
if sync_user_id:
|
||||
@ -170,9 +168,9 @@ class SyncUserRepository:
|
||||
)
|
||||
|
||||
state_str = json.dumps(state)
|
||||
self.db.from_("syncs_user").update(sync_user_input.model_dump(exclude_unset=True)).eq(
|
||||
"user_id", str(sync_user_id)
|
||||
).eq("state", state_str).execute()
|
||||
self.db.from_("syncs_user").update(
|
||||
sync_user_input.model_dump(exclude_unset=True)
|
||||
).eq("user_id", str(sync_user_id)).eq("state", state_str).execute()
|
||||
logger.info("Sync user updated successfully")
|
||||
|
||||
def update_sync_user_status(self, sync_user_id: int, status: str):
|
||||
|
@ -1,9 +1,9 @@
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Sequence
|
||||
from uuid import UUID
|
||||
|
||||
from notion_client import Client
|
||||
import time
|
||||
|
||||
from quivr_api.logger import get_logger
|
||||
from quivr_api.modules.dependencies import BaseService
|
||||
@ -165,7 +165,6 @@ async def store_notion_pages(
|
||||
def fetch_notion_pages(
|
||||
notion_client: Client, start_cursor: str | None = None, iteration: int = 0
|
||||
) -> NotionSearchResult:
|
||||
|
||||
if iteration > 10:
|
||||
return NotionSearchResult(results=[], has_more=False, next_cursor=None)
|
||||
search_result = notion_client.search(
|
||||
@ -177,7 +176,9 @@ def fetch_notion_pages(
|
||||
if "code" in search_result and search_result["code"] == "rate_limited":
|
||||
# Wait 10 seconds
|
||||
time.sleep(10)
|
||||
search_result = fetch_notion_pages(notion_client, start_cursor=start_cursor, iteration=iteration+1)
|
||||
search_result = fetch_notion_pages(
|
||||
notion_client, start_cursor=start_cursor, iteration=iteration + 1
|
||||
)
|
||||
|
||||
return NotionSearchResult.model_validate(search_result)
|
||||
|
||||
|
@ -74,7 +74,9 @@ def test_fetch_limit_notion_pages_now(fetch_response):
|
||||
assert len(result) == 0
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Bug: httpx.ConnectError: [Errno -2] Name or service not known'")
|
||||
@pytest.mark.skip(
|
||||
reason="Bug: httpx.ConnectError: [Errno -2] Name or service not known'"
|
||||
)
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_notion_pages_success(
|
||||
session: AsyncSession,
|
||||
|
@ -271,7 +271,10 @@ async def test_process_sync_file_not_supported(syncutils: SyncUtils):
|
||||
sync_active=sync_active,
|
||||
)
|
||||
|
||||
@pytest.mark.skip(reason="Bug: UnboundLocalError: cannot access local variable 'response'")
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="Bug: UnboundLocalError: cannot access local variable 'response'"
|
||||
)
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_process_sync_file_noprev(
|
||||
monkeypatch,
|
||||
@ -327,8 +330,8 @@ async def test_process_sync_file_noprev(
|
||||
assert created_km.file_sha1 is None
|
||||
assert created_km.created_at is not None
|
||||
assert created_km.metadata == {"sync_file_id": "1"}
|
||||
assert len(created_km.brains)> 0
|
||||
assert created_km.brains[0]["brain_id"]== brain_1.brain_id
|
||||
assert len(created_km.brains) > 0
|
||||
assert created_km.brains[0]["brain_id"] == brain_1.brain_id
|
||||
|
||||
# Assert celery task in correct
|
||||
assert task["args"] == ("process_file_task",)
|
||||
@ -345,8 +348,9 @@ async def test_process_sync_file_noprev(
|
||||
)
|
||||
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Bug: UnboundLocalError: cannot access local variable 'response'")
|
||||
@pytest.mark.skip(
|
||||
reason="Bug: UnboundLocalError: cannot access local variable 'response'"
|
||||
)
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_process_sync_file_with_prev(
|
||||
monkeypatch,
|
||||
@ -424,7 +428,7 @@ async def test_process_sync_file_with_prev(
|
||||
assert created_km.created_at
|
||||
assert created_km.updated_at == created_km.created_at # new line
|
||||
assert created_km.metadata == {"sync_file_id": str(dbfiles[0].id)}
|
||||
assert created_km.brains[0]["brain_id"]== brain_1.brain_id
|
||||
assert created_km.brains[0]["brain_id"] == brain_1.brain_id
|
||||
|
||||
# Check file content changed
|
||||
assert check_file_exists(str(brain_1.brain_id), sync_file.name)
|
||||
|
@ -1,13 +1,14 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import re
|
||||
from enum import Enum
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_openai import ChatOpenAI
|
||||
import base64
|
||||
from pdf2image import convert_from_path
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
# BASE_OCR_PROMPT = """
|
||||
# Transcribe the content of this file into markdown. Be mindful of the formatting.
|
||||
|
@ -1,9 +1,11 @@
|
||||
from docx.document import Document as DocumentObject
|
||||
from docx.oxml.table import CT_Tbl
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
from docx.section import Section
|
||||
from docx.section import _Footer as Footer
|
||||
from docx.section import _Header as Header
|
||||
from docx.table import Table
|
||||
from docx.text.paragraph import Paragraph
|
||||
from docx.section import Section, _Header as Header, _Footer as Footer
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
from docx.oxml.table import CT_Tbl
|
||||
|
||||
|
||||
def print_element(element):
|
||||
|
@ -1,5 +1,4 @@
|
||||
import pytest
|
||||
|
||||
from megaparse.Converter import MegaParse
|
||||
|
||||
|
||||
|
@ -2,7 +2,6 @@ import tempfile
|
||||
|
||||
from quivr_core import Brain
|
||||
from quivr_core.quivr_rag_langgraph import QuivrQARAGLangGraph
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt") as temp_file:
|
||||
|
@ -1,7 +1,7 @@
|
||||
from datetime import datetime
|
||||
from typing import Any, Generator, Tuple, List
|
||||
from uuid import UUID, uuid4
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from typing import Any, Generator, List, Tuple
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
|
||||
|
@ -2,9 +2,9 @@ import os
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Optional
|
||||
from uuid import UUID
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from megaparse.config import MegaparseConfig
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from quivr_core.base_config import QuivrBaseConfig
|
||||
from quivr_core.processor.splitter import SplitterConfig
|
||||
|
@ -1,14 +1,14 @@
|
||||
import datetime
|
||||
from pydantic import ConfigDict, create_model
|
||||
|
||||
from langchain_core.prompts.base import BasePromptTemplate
|
||||
from langchain_core.prompts import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
MessagesPlaceholder,
|
||||
PromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
MessagesPlaceholder,
|
||||
)
|
||||
from langchain_core.prompts.base import BasePromptTemplate
|
||||
from pydantic import ConfigDict, create_model
|
||||
|
||||
|
||||
class CustomPromptsDict(dict):
|
||||
|
@ -1,7 +1,7 @@
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import Annotated, AsyncGenerator, Optional, Sequence, TypedDict
|
||||
from uuid import uuid4
|
||||
from enum import Enum
|
||||
|
||||
# TODO(@aminediro): this is the only dependency to langchain package, we should remove it
|
||||
from langchain.retrievers import ContextualCompressionRetriever
|
||||
@ -12,7 +12,7 @@ from langchain_core.documents import BaseDocumentCompressor, Document
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.messages.ai import AIMessageChunk
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
from langgraph.graph import START, END, StateGraph
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
from langgraph.graph.message import add_messages
|
||||
|
||||
from quivr_core.chat import ChatHistory
|
||||
|
@ -41,7 +41,7 @@ dev-dependencies = [
|
||||
]
|
||||
|
||||
[tool.rye.workspace]
|
||||
members = [".", "core", "worker", "api", "docs", "core/examples/chatbot", "core/MegaParse"]
|
||||
members = [".", "core", "worker", "api", "docs", "core/examples/chatbot", "core/MegaParse", "worker/diff-assistant"]
|
||||
|
||||
[tool.hatch.metadata]
|
||||
allow-direct-references = true
|
||||
|
@ -20,7 +20,10 @@
|
||||
# via quivr-worker
|
||||
-e file:core/MegaParse
|
||||
# via quivr-core
|
||||
# via quivr-diff-assistant
|
||||
-e file:worker
|
||||
-e file:worker/diff-assistant
|
||||
# via quivr-worker
|
||||
aiofiles==23.2.1
|
||||
# via chainlit
|
||||
# via quivr-core
|
||||
@ -43,6 +46,8 @@ anthropic==0.34.1
|
||||
# via langchain-anthropic
|
||||
antlr4-python3-runtime==4.9.3
|
||||
# via omegaconf
|
||||
anyascii==0.3.2
|
||||
# via python-doctr
|
||||
anyio==3.7.1
|
||||
# via anthropic
|
||||
# via asyncer
|
||||
@ -126,6 +131,7 @@ click==8.1.7
|
||||
# via mkdocs
|
||||
# via mkdocstrings
|
||||
# via nltk
|
||||
# via python-oxmsg
|
||||
# via uvicorn
|
||||
click-didyoumean==0.3.1
|
||||
# via celery
|
||||
@ -178,6 +184,7 @@ defusedxml==0.7.1
|
||||
# via fpdf2
|
||||
# via langchain-anthropic
|
||||
# via nbconvert
|
||||
# via python-doctr
|
||||
deprecated==1.2.14
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
@ -188,6 +195,8 @@ deprecated==1.2.14
|
||||
# via pikepdf
|
||||
deprecation==2.1.0
|
||||
# via postgrest
|
||||
diff-match-patch==20230430
|
||||
# via quivr-diff-assistant
|
||||
dirtyjson==1.0.8
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
@ -198,6 +207,7 @@ distro==1.9.0
|
||||
# via openai
|
||||
docx2txt==0.8
|
||||
# via quivr-core
|
||||
# via quivr-diff-assistant
|
||||
dropbox==12.0.2
|
||||
# via quivr-api
|
||||
ecdsa==0.19.0
|
||||
@ -214,6 +224,7 @@ executing==2.0.1
|
||||
# via stack-data
|
||||
faiss-cpu==1.8.0.post1
|
||||
# via quivr-core
|
||||
# via quivr-diff-assistant
|
||||
fastapi==0.110.3
|
||||
# via chainlit
|
||||
# via quivr-api
|
||||
@ -298,6 +309,9 @@ h11==0.14.0
|
||||
# via wsproto
|
||||
h2==4.1.0
|
||||
# via httpx
|
||||
h5py==3.10.0
|
||||
# via python-doctr
|
||||
# via quivr-diff-assistant
|
||||
hpack==4.0.0
|
||||
# via h2
|
||||
httpcore==1.0.5
|
||||
@ -325,6 +339,7 @@ httpx==0.27.0
|
||||
httpx-sse==0.4.0
|
||||
# via cohere
|
||||
huggingface-hub==0.24.6
|
||||
# via python-doctr
|
||||
# via timm
|
||||
# via tokenizers
|
||||
# via transformers
|
||||
@ -371,6 +386,7 @@ jmespath==1.0.1
|
||||
# via botocore
|
||||
joblib==1.4.2
|
||||
# via nltk
|
||||
# via scikit-learn
|
||||
jsonpatch==1.33
|
||||
# via langchain-core
|
||||
jsonpath-python==1.0.6
|
||||
@ -399,11 +415,12 @@ kiwisolver==1.4.5
|
||||
# via matplotlib
|
||||
kombu==5.4.0
|
||||
# via celery
|
||||
langchain==0.2.14
|
||||
langchain==0.2.16
|
||||
# via langchain-community
|
||||
# via megaparse
|
||||
# via quivr-api
|
||||
# via quivr-core
|
||||
# via quivr-diff-assistant
|
||||
langchain-anthropic==0.1.23
|
||||
# via quivr-core
|
||||
# via quivr-monorepo
|
||||
@ -414,7 +431,7 @@ langchain-community==0.2.12
|
||||
# via megaparse
|
||||
# via quivr-api
|
||||
# via quivr-core
|
||||
langchain-core==0.2.38
|
||||
langchain-core==0.2.41
|
||||
# via langchain
|
||||
# via langchain-anthropic
|
||||
# via langchain-cohere
|
||||
@ -428,18 +445,20 @@ langchain-core==0.2.38
|
||||
# via quivr-core
|
||||
langchain-experimental==0.0.64
|
||||
# via langchain-cohere
|
||||
langchain-openai==0.1.22
|
||||
langchain-openai==0.1.25
|
||||
# via megaparse
|
||||
# via quivr-api
|
||||
# via quivr-diff-assistant
|
||||
langchain-text-splitters==0.2.2
|
||||
# via langchain
|
||||
langdetect==1.0.9
|
||||
# via python-doctr
|
||||
# via unstructured
|
||||
langgraph==0.2.14
|
||||
# via quivr-core
|
||||
langgraph-checkpoint==1.0.6
|
||||
# via langgraph
|
||||
langsmith==0.1.100
|
||||
langsmith==0.1.126
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
@ -453,14 +472,15 @@ literalai==0.0.607
|
||||
# via chainlit
|
||||
llama-cloud==0.0.13
|
||||
# via llama-index-indices-managed-llama-cloud
|
||||
llama-index==0.10.67.post1
|
||||
llama-index==0.11.12
|
||||
# via megaparse
|
||||
llama-index-agent-openai==0.2.9
|
||||
# via quivr-diff-assistant
|
||||
llama-index-agent-openai==0.3.4
|
||||
# via llama-index
|
||||
# via llama-index-program-openai
|
||||
llama-index-cli==0.1.13
|
||||
llama-index-cli==0.3.1
|
||||
# via llama-index
|
||||
llama-index-core==0.10.67
|
||||
llama-index-core==0.11.12
|
||||
# via llama-index
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-cli
|
||||
@ -473,35 +493,39 @@ llama-index-core==0.10.67
|
||||
# via llama-index-readers-file
|
||||
# via llama-index-readers-llama-parse
|
||||
# via llama-parse
|
||||
llama-index-embeddings-openai==0.1.11
|
||||
llama-index-embeddings-openai==0.2.5
|
||||
# via llama-index
|
||||
# via llama-index-cli
|
||||
llama-index-indices-managed-llama-cloud==0.2.7
|
||||
llama-index-indices-managed-llama-cloud==0.3.1
|
||||
# via llama-index
|
||||
llama-index-legacy==0.9.48.post3
|
||||
# via llama-index
|
||||
llama-index-llms-openai==0.1.30
|
||||
llama-index-llms-openai==0.2.9
|
||||
# via llama-index
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-cli
|
||||
# via llama-index-multi-modal-llms-openai
|
||||
# via llama-index-program-openai
|
||||
# via llama-index-question-gen-openai
|
||||
llama-index-multi-modal-llms-openai==0.1.9
|
||||
# via quivr-diff-assistant
|
||||
llama-index-multi-modal-llms-openai==0.2.1
|
||||
# via llama-index
|
||||
llama-index-program-openai==0.1.7
|
||||
llama-index-program-openai==0.2.0
|
||||
# via llama-index
|
||||
# via llama-index-question-gen-openai
|
||||
llama-index-question-gen-openai==0.1.3
|
||||
llama-index-question-gen-openai==0.2.0
|
||||
# via llama-index
|
||||
llama-index-readers-file==0.1.33
|
||||
llama-index-readers-file==0.2.2
|
||||
# via llama-index
|
||||
llama-index-readers-llama-parse==0.1.6
|
||||
# via quivr-diff-assistant
|
||||
llama-index-readers-llama-parse==0.3.0
|
||||
# via llama-index
|
||||
llama-parse==0.4.9
|
||||
llama-parse==0.5.6
|
||||
# via llama-index-readers-llama-parse
|
||||
# via megaparse
|
||||
# via quivr-api
|
||||
llvmlite==0.43.0
|
||||
# via numba
|
||||
lxml==5.3.0
|
||||
# via pikepdf
|
||||
# via python-docx
|
||||
@ -535,7 +559,9 @@ marshmallow==3.22.0
|
||||
marshmallow-enum==1.5.1
|
||||
# via unstructured-client
|
||||
matplotlib==3.9.2
|
||||
# via mplcursors
|
||||
# via pycocotools
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured-inference
|
||||
matplotlib-inline==0.1.7
|
||||
# via ipykernel
|
||||
@ -576,6 +602,8 @@ mkdocstrings-python==1.11.1
|
||||
# via mkdocstrings
|
||||
monotonic==1.6
|
||||
# via posthog
|
||||
mplcursors==0.5.3
|
||||
# via quivr-diff-assistant
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
msal==1.30.0
|
||||
@ -608,6 +636,7 @@ networkx==3.2.1
|
||||
# via torch
|
||||
# via unstructured
|
||||
nltk==3.9.1
|
||||
# via llama-index
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via unstructured
|
||||
@ -615,16 +644,20 @@ nodeenv==1.9.1
|
||||
# via pre-commit
|
||||
notion-client==2.2.1
|
||||
# via quivr-api
|
||||
numba==0.60.0
|
||||
# via quivr-diff-assistant
|
||||
numpy==1.26.3
|
||||
# via chainlit
|
||||
# via contourpy
|
||||
# via faiss-cpu
|
||||
# via h5py
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via layoutparser
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via matplotlib
|
||||
# via numba
|
||||
# via onnx
|
||||
# via onnxruntime
|
||||
# via opencv-python
|
||||
@ -633,12 +666,18 @@ numpy==1.26.3
|
||||
# via pdf2docx
|
||||
# via pgvector
|
||||
# via pycocotools
|
||||
# via python-doctr
|
||||
# via quivr-diff-assistant
|
||||
# via scikit-learn
|
||||
# via scipy
|
||||
# via shapely
|
||||
# via torchvision
|
||||
# via transformers
|
||||
# via unstructured
|
||||
oauthlib==3.2.2
|
||||
# via requests-oauthlib
|
||||
olefile==0.47
|
||||
# via python-oxmsg
|
||||
omegaconf==2.3.0
|
||||
# via effdet
|
||||
onnx==1.16.2
|
||||
@ -646,21 +685,25 @@ onnx==1.16.2
|
||||
# via unstructured-inference
|
||||
onnxruntime==1.19.0
|
||||
# via unstructured-inference
|
||||
openai==1.42.0
|
||||
openai==1.47.1
|
||||
# via langchain-openai
|
||||
# via litellm
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-core
|
||||
# via llama-index-embeddings-openai
|
||||
# via llama-index-legacy
|
||||
# via llama-index-llms-openai
|
||||
# via quivr-api
|
||||
# via quivr-diff-assistant
|
||||
# via quivr-worker
|
||||
opencv-python==4.10.0.84
|
||||
# via layoutparser
|
||||
# via python-doctr
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured-inference
|
||||
opencv-python-headless==4.10.0.84
|
||||
# via pdf2docx
|
||||
openpyxl==3.1.5
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured
|
||||
opentelemetry-api==1.27.0
|
||||
# via opentelemetry-exporter-otlp-proto-grpc
|
||||
@ -720,8 +763,9 @@ paginate==0.5.7
|
||||
pandas==2.2.2
|
||||
# via langchain-cohere
|
||||
# via layoutparser
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via llama-index-readers-file
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured
|
||||
pandocfilters==1.5.1
|
||||
# via nbconvert
|
||||
@ -747,6 +791,8 @@ pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
|
||||
# via ipython
|
||||
pgvector==0.3.2
|
||||
# via quivr-api
|
||||
pi-heif==0.18.0
|
||||
# via unstructured
|
||||
pikepdf==9.1.1
|
||||
# via unstructured
|
||||
pillow==10.2.0
|
||||
@ -756,13 +802,12 @@ pillow==10.2.0
|
||||
# via matplotlib
|
||||
# via pdf2image
|
||||
# via pdfplumber
|
||||
# via pi-heif
|
||||
# via pikepdf
|
||||
# via pillow-heif
|
||||
# via python-doctr
|
||||
# via python-pptx
|
||||
# via torchvision
|
||||
# via unstructured-pytesseract
|
||||
pillow-heif==0.18.0
|
||||
# via unstructured
|
||||
platformdirs==4.2.2
|
||||
# via black
|
||||
# via jupyter-core
|
||||
@ -819,6 +864,8 @@ pyasn1==0.6.0
|
||||
# via rsa
|
||||
pyasn1-modules==0.4.0
|
||||
# via google-auth
|
||||
pyclipper==1.3.0.post5
|
||||
# via python-doctr
|
||||
pycocotools==2.0.8
|
||||
# via effdet
|
||||
pycodestyle==2.12.1
|
||||
@ -839,6 +886,7 @@ pydantic==2.8.2
|
||||
# via litellm
|
||||
# via literalai
|
||||
# via llama-cloud
|
||||
# via llama-index-core
|
||||
# via openai
|
||||
# via postgrest
|
||||
# via pydantic-settings
|
||||
@ -879,9 +927,12 @@ pyparsing==3.1.2
|
||||
# via unstructured-client
|
||||
pypdf==4.3.1
|
||||
# via llama-index-readers-file
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured
|
||||
pypdfium2==4.30.0
|
||||
# via pdfplumber
|
||||
# via python-doctr
|
||||
# via quivr-diff-assistant
|
||||
pyproject-api==1.6.1
|
||||
# via tox
|
||||
pyreadline3==3.4.1 ; sys_platform == 'win32'
|
||||
@ -910,6 +961,8 @@ python-dateutil==2.9.0.post0
|
||||
# via realtime
|
||||
# via storage3
|
||||
# via unstructured-client
|
||||
python-doctr==0.9.0
|
||||
# via quivr-diff-assistant
|
||||
python-docx==1.1.2
|
||||
# via megaparse
|
||||
# via pdf2docx
|
||||
@ -921,6 +974,7 @@ python-dotenv==1.0.1
|
||||
# via pydantic-settings
|
||||
# via pytest-dotenv
|
||||
# via quivr-api
|
||||
# via quivr-diff-assistant
|
||||
# via quivr-worker
|
||||
python-engineio==4.9.1
|
||||
# via python-socketio
|
||||
@ -929,11 +983,14 @@ python-iso639==2024.4.27
|
||||
python-jose==3.3.0
|
||||
# via quivr-api
|
||||
python-magic==0.4.27
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured
|
||||
python-multipart==0.0.9
|
||||
# via chainlit
|
||||
# via quivr-api
|
||||
# via unstructured-inference
|
||||
python-oxmsg==0.0.1
|
||||
# via unstructured
|
||||
python-pptx==1.0.2
|
||||
# via megaparse
|
||||
# via unstructured
|
||||
@ -967,6 +1024,7 @@ pyzmq==26.1.1
|
||||
# via ipykernel
|
||||
# via jupyter-client
|
||||
rapidfuzz==3.9.6
|
||||
# via python-doctr
|
||||
# via unstructured
|
||||
# via unstructured-inference
|
||||
realtime==2.0.2
|
||||
@ -1021,14 +1079,20 @@ s3transfer==0.10.2
|
||||
safetensors==0.4.4
|
||||
# via timm
|
||||
# via transformers
|
||||
scikit-learn==1.5.2
|
||||
# via quivr-diff-assistant
|
||||
scipy==1.14.1
|
||||
# via layoutparser
|
||||
# via python-doctr
|
||||
# via scikit-learn
|
||||
sentencepiece==0.2.0
|
||||
# via transformers
|
||||
sentry-sdk==2.13.0
|
||||
# via quivr-api
|
||||
setuptools==70.0.0
|
||||
# via opentelemetry-instrumentation
|
||||
shapely==2.0.6
|
||||
# via python-doctr
|
||||
simple-websocket==1.0.0
|
||||
# via python-engineio
|
||||
six==1.16.0
|
||||
@ -1091,6 +1155,8 @@ tenacity==8.5.0
|
||||
# via llama-index-legacy
|
||||
termcolor==2.4.0
|
||||
# via fire
|
||||
threadpoolctl==3.5.0
|
||||
# via scikit-learn
|
||||
tiktoken==0.7.0
|
||||
# via langchain-openai
|
||||
# via litellm
|
||||
@ -1141,6 +1207,7 @@ tqdm==4.66.5
|
||||
# via llama-index-core
|
||||
# via nltk
|
||||
# via openai
|
||||
# via python-doctr
|
||||
# via transformers
|
||||
# via unstructured
|
||||
traitlets==5.14.3
|
||||
@ -1180,6 +1247,7 @@ typing-extensions==4.12.2
|
||||
# via pydantic-core
|
||||
# via pyee
|
||||
# via python-docx
|
||||
# via python-oxmsg
|
||||
# via python-pptx
|
||||
# via realtime
|
||||
# via resend
|
||||
@ -1199,9 +1267,10 @@ tzdata==2024.1
|
||||
# via pandas
|
||||
unidecode==1.3.8
|
||||
# via quivr-api
|
||||
unstructured==0.15.7
|
||||
unstructured==0.15.13
|
||||
# via megaparse
|
||||
# via quivr-core
|
||||
# via quivr-diff-assistant
|
||||
unstructured-client==0.6.0
|
||||
# via unstructured
|
||||
unstructured-inference==0.7.36
|
||||
|
@ -20,7 +20,10 @@
|
||||
# via quivr-worker
|
||||
-e file:core/MegaParse
|
||||
# via quivr-core
|
||||
# via quivr-diff-assistant
|
||||
-e file:worker
|
||||
-e file:worker/diff-assistant
|
||||
# via quivr-worker
|
||||
aiofiles==24.1.0
|
||||
# via quivr-core
|
||||
aiohappyeyeballs==2.4.0
|
||||
@ -42,6 +45,8 @@ anthropic==0.34.2
|
||||
# via langchain-anthropic
|
||||
antlr4-python3-runtime==4.9.3
|
||||
# via omegaconf
|
||||
anyascii==0.3.2
|
||||
# via python-doctr
|
||||
anyio==4.4.0
|
||||
# via anthropic
|
||||
# via httpx
|
||||
@ -108,6 +113,7 @@ click==8.1.7
|
||||
# via mkdocs
|
||||
# via mkdocstrings
|
||||
# via nltk
|
||||
# via python-oxmsg
|
||||
# via uvicorn
|
||||
click-didyoumean==0.3.1
|
||||
# via celery
|
||||
@ -155,12 +161,15 @@ defusedxml==0.7.1
|
||||
# via fpdf2
|
||||
# via langchain-anthropic
|
||||
# via nbconvert
|
||||
# via python-doctr
|
||||
deprecated==1.2.14
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via pikepdf
|
||||
deprecation==2.1.0
|
||||
# via postgrest
|
||||
diff-match-patch==20230430
|
||||
# via quivr-diff-assistant
|
||||
dirtyjson==1.0.8
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
@ -169,6 +178,7 @@ distro==1.9.0
|
||||
# via openai
|
||||
docx2txt==0.8
|
||||
# via quivr-core
|
||||
# via quivr-diff-assistant
|
||||
dropbox==12.0.2
|
||||
# via quivr-api
|
||||
ecdsa==0.19.0
|
||||
@ -183,6 +193,7 @@ executing==2.1.0
|
||||
# via stack-data
|
||||
faiss-cpu==1.8.0.post1
|
||||
# via quivr-core
|
||||
# via quivr-diff-assistant
|
||||
fastapi==0.112.1
|
||||
# via quivr-api
|
||||
# via sentry-sdk
|
||||
@ -256,6 +267,9 @@ h11==0.14.0
|
||||
# via uvicorn
|
||||
h2==4.1.0
|
||||
# via httpx
|
||||
h5py==3.10.0
|
||||
# via python-doctr
|
||||
# via quivr-diff-assistant
|
||||
hpack==4.0.0
|
||||
# via h2
|
||||
httpcore==1.0.5
|
||||
@ -281,6 +295,7 @@ httpx==0.27.0
|
||||
httpx-sse==0.4.0
|
||||
# via cohere
|
||||
huggingface-hub==0.24.6
|
||||
# via python-doctr
|
||||
# via timm
|
||||
# via tokenizers
|
||||
# via transformers
|
||||
@ -322,6 +337,7 @@ jmespath==1.0.1
|
||||
# via botocore
|
||||
joblib==1.4.2
|
||||
# via nltk
|
||||
# via scikit-learn
|
||||
jsonpatch==1.33
|
||||
# via langchain-core
|
||||
jsonpath-python==1.0.6
|
||||
@ -350,11 +366,12 @@ kiwisolver==1.4.5
|
||||
# via matplotlib
|
||||
kombu==5.4.0
|
||||
# via celery
|
||||
langchain==0.2.14
|
||||
langchain==0.2.16
|
||||
# via langchain-community
|
||||
# via megaparse
|
||||
# via quivr-api
|
||||
# via quivr-core
|
||||
# via quivr-diff-assistant
|
||||
langchain-anthropic==0.1.23
|
||||
# via quivr-core
|
||||
# via quivr-monorepo
|
||||
@ -365,7 +382,7 @@ langchain-community==0.2.12
|
||||
# via megaparse
|
||||
# via quivr-api
|
||||
# via quivr-core
|
||||
langchain-core==0.2.38
|
||||
langchain-core==0.2.41
|
||||
# via langchain
|
||||
# via langchain-anthropic
|
||||
# via langchain-cohere
|
||||
@ -379,18 +396,20 @@ langchain-core==0.2.38
|
||||
# via quivr-core
|
||||
langchain-experimental==0.0.64
|
||||
# via langchain-cohere
|
||||
langchain-openai==0.1.22
|
||||
langchain-openai==0.1.25
|
||||
# via megaparse
|
||||
# via quivr-api
|
||||
# via quivr-diff-assistant
|
||||
langchain-text-splitters==0.2.2
|
||||
# via langchain
|
||||
langdetect==1.0.9
|
||||
# via python-doctr
|
||||
# via unstructured
|
||||
langgraph==0.2.19
|
||||
# via quivr-core
|
||||
langgraph-checkpoint==1.0.9
|
||||
# via langgraph
|
||||
langsmith==0.1.100
|
||||
langsmith==0.1.126
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
@ -400,14 +419,15 @@ litellm==1.43.19
|
||||
# via quivr-api
|
||||
llama-cloud==0.0.13
|
||||
# via llama-index-indices-managed-llama-cloud
|
||||
llama-index==0.10.67.post1
|
||||
llama-index==0.11.12
|
||||
# via megaparse
|
||||
llama-index-agent-openai==0.2.9
|
||||
# via quivr-diff-assistant
|
||||
llama-index-agent-openai==0.3.4
|
||||
# via llama-index
|
||||
# via llama-index-program-openai
|
||||
llama-index-cli==0.1.13
|
||||
llama-index-cli==0.3.1
|
||||
# via llama-index
|
||||
llama-index-core==0.10.67
|
||||
llama-index-core==0.11.12
|
||||
# via llama-index
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-cli
|
||||
@ -420,35 +440,39 @@ llama-index-core==0.10.67
|
||||
# via llama-index-readers-file
|
||||
# via llama-index-readers-llama-parse
|
||||
# via llama-parse
|
||||
llama-index-embeddings-openai==0.1.11
|
||||
llama-index-embeddings-openai==0.2.5
|
||||
# via llama-index
|
||||
# via llama-index-cli
|
||||
llama-index-indices-managed-llama-cloud==0.2.7
|
||||
llama-index-indices-managed-llama-cloud==0.3.1
|
||||
# via llama-index
|
||||
llama-index-legacy==0.9.48.post3
|
||||
# via llama-index
|
||||
llama-index-llms-openai==0.1.30
|
||||
llama-index-llms-openai==0.2.9
|
||||
# via llama-index
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-cli
|
||||
# via llama-index-multi-modal-llms-openai
|
||||
# via llama-index-program-openai
|
||||
# via llama-index-question-gen-openai
|
||||
llama-index-multi-modal-llms-openai==0.1.9
|
||||
# via quivr-diff-assistant
|
||||
llama-index-multi-modal-llms-openai==0.2.1
|
||||
# via llama-index
|
||||
llama-index-program-openai==0.1.7
|
||||
llama-index-program-openai==0.2.0
|
||||
# via llama-index
|
||||
# via llama-index-question-gen-openai
|
||||
llama-index-question-gen-openai==0.1.3
|
||||
llama-index-question-gen-openai==0.2.0
|
||||
# via llama-index
|
||||
llama-index-readers-file==0.1.33
|
||||
llama-index-readers-file==0.2.2
|
||||
# via llama-index
|
||||
llama-index-readers-llama-parse==0.1.6
|
||||
# via quivr-diff-assistant
|
||||
llama-index-readers-llama-parse==0.3.0
|
||||
# via llama-index
|
||||
llama-parse==0.4.9
|
||||
llama-parse==0.5.6
|
||||
# via llama-index-readers-llama-parse
|
||||
# via megaparse
|
||||
# via quivr-api
|
||||
llvmlite==0.43.0
|
||||
# via numba
|
||||
lxml==5.3.0
|
||||
# via pikepdf
|
||||
# via python-docx
|
||||
@ -482,7 +506,9 @@ marshmallow==3.22.0
|
||||
marshmallow-enum==1.5.1
|
||||
# via unstructured-client
|
||||
matplotlib==3.9.2
|
||||
# via mplcursors
|
||||
# via pycocotools
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured-inference
|
||||
matplotlib-inline==0.1.7
|
||||
# via ipykernel
|
||||
@ -521,6 +547,8 @@ mkdocstrings-python==1.11.1
|
||||
# via mkdocstrings
|
||||
monotonic==1.6
|
||||
# via posthog
|
||||
mplcursors==0.5.3
|
||||
# via quivr-diff-assistant
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
msal==1.30.0
|
||||
@ -549,20 +577,25 @@ networkx==3.2.1
|
||||
# via torch
|
||||
# via unstructured
|
||||
nltk==3.9.1
|
||||
# via llama-index
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via unstructured
|
||||
notion-client==2.2.1
|
||||
# via quivr-api
|
||||
numba==0.60.0
|
||||
# via quivr-diff-assistant
|
||||
numpy==1.26.3
|
||||
# via contourpy
|
||||
# via faiss-cpu
|
||||
# via h5py
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via layoutparser
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via matplotlib
|
||||
# via numba
|
||||
# via onnx
|
||||
# via onnxruntime
|
||||
# via opencv-python
|
||||
@ -571,12 +604,18 @@ numpy==1.26.3
|
||||
# via pdf2docx
|
||||
# via pgvector
|
||||
# via pycocotools
|
||||
# via python-doctr
|
||||
# via quivr-diff-assistant
|
||||
# via scikit-learn
|
||||
# via scipy
|
||||
# via shapely
|
||||
# via torchvision
|
||||
# via transformers
|
||||
# via unstructured
|
||||
oauthlib==3.2.2
|
||||
# via requests-oauthlib
|
||||
olefile==0.47
|
||||
# via python-oxmsg
|
||||
omegaconf==2.3.0
|
||||
# via effdet
|
||||
onnx==1.16.2
|
||||
@ -584,21 +623,25 @@ onnx==1.16.2
|
||||
# via unstructured-inference
|
||||
onnxruntime==1.19.0
|
||||
# via unstructured-inference
|
||||
openai==1.42.0
|
||||
openai==1.47.1
|
||||
# via langchain-openai
|
||||
# via litellm
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-core
|
||||
# via llama-index-embeddings-openai
|
||||
# via llama-index-legacy
|
||||
# via llama-index-llms-openai
|
||||
# via quivr-api
|
||||
# via quivr-diff-assistant
|
||||
# via quivr-worker
|
||||
opencv-python==4.10.0.84
|
||||
# via layoutparser
|
||||
# via python-doctr
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured-inference
|
||||
opencv-python-headless==4.10.0.84
|
||||
# via pdf2docx
|
||||
openpyxl==3.1.5
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured
|
||||
orjson==3.10.7
|
||||
# via langsmith
|
||||
@ -624,8 +667,9 @@ paginate==0.5.7
|
||||
pandas==2.2.2
|
||||
# via langchain-cohere
|
||||
# via layoutparser
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via llama-index-readers-file
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured
|
||||
pandocfilters==1.5.1
|
||||
# via nbconvert
|
||||
@ -650,6 +694,8 @@ pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
|
||||
# via ipython
|
||||
pgvector==0.3.2
|
||||
# via quivr-api
|
||||
pi-heif==0.18.0
|
||||
# via unstructured
|
||||
pikepdf==9.1.1
|
||||
# via unstructured
|
||||
pillow==10.2.0
|
||||
@ -659,13 +705,12 @@ pillow==10.2.0
|
||||
# via matplotlib
|
||||
# via pdf2image
|
||||
# via pdfplumber
|
||||
# via pi-heif
|
||||
# via pikepdf
|
||||
# via pillow-heif
|
||||
# via python-doctr
|
||||
# via python-pptx
|
||||
# via torchvision
|
||||
# via unstructured-pytesseract
|
||||
pillow-heif==0.18.0
|
||||
# via unstructured
|
||||
platformdirs==4.3.2
|
||||
# via jupyter-core
|
||||
# via mkdocs-get-deps
|
||||
@ -712,6 +757,8 @@ pyasn1==0.6.0
|
||||
# via rsa
|
||||
pyasn1-modules==0.4.0
|
||||
# via google-auth
|
||||
pyclipper==1.3.0.post5
|
||||
# via python-doctr
|
||||
pycocotools==2.0.8
|
||||
# via effdet
|
||||
pycparser==2.22 ; platform_python_implementation != 'PyPy' or implementation_name == 'pypy'
|
||||
@ -728,6 +775,7 @@ pydantic==2.8.2
|
||||
# via langsmith
|
||||
# via litellm
|
||||
# via llama-cloud
|
||||
# via llama-index-core
|
||||
# via openai
|
||||
# via postgrest
|
||||
# via pydantic-settings
|
||||
@ -765,9 +813,12 @@ pyparsing==3.1.2
|
||||
# via unstructured-client
|
||||
pypdf==4.3.1
|
||||
# via llama-index-readers-file
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured
|
||||
pypdfium2==4.30.0
|
||||
# via pdfplumber
|
||||
# via python-doctr
|
||||
# via quivr-diff-assistant
|
||||
pyreadline3==3.4.1 ; sys_platform == 'win32'
|
||||
# via humanfriendly
|
||||
python-dateutil==2.9.0.post0
|
||||
@ -781,6 +832,8 @@ python-dateutil==2.9.0.post0
|
||||
# via realtime
|
||||
# via storage3
|
||||
# via unstructured-client
|
||||
python-doctr==0.9.0
|
||||
# via quivr-diff-assistant
|
||||
python-docx==1.1.2
|
||||
# via megaparse
|
||||
# via pdf2docx
|
||||
@ -790,16 +843,20 @@ python-dotenv==1.0.1
|
||||
# via megaparse
|
||||
# via pydantic-settings
|
||||
# via quivr-api
|
||||
# via quivr-diff-assistant
|
||||
# via quivr-worker
|
||||
python-iso639==2024.4.27
|
||||
# via unstructured
|
||||
python-jose==3.3.0
|
||||
# via quivr-api
|
||||
python-magic==0.4.27
|
||||
# via quivr-diff-assistant
|
||||
# via unstructured
|
||||
python-multipart==0.0.9
|
||||
# via quivr-api
|
||||
# via unstructured-inference
|
||||
python-oxmsg==0.0.1
|
||||
# via unstructured
|
||||
python-pptx==1.0.2
|
||||
# via megaparse
|
||||
# via unstructured
|
||||
@ -830,6 +887,7 @@ pyzmq==26.2.0
|
||||
# via ipykernel
|
||||
# via jupyter-client
|
||||
rapidfuzz==3.9.6
|
||||
# via python-doctr
|
||||
# via unstructured
|
||||
# via unstructured-inference
|
||||
realtime==2.0.2
|
||||
@ -882,12 +940,18 @@ s3transfer==0.10.2
|
||||
safetensors==0.4.4
|
||||
# via timm
|
||||
# via transformers
|
||||
scikit-learn==1.5.2
|
||||
# via quivr-diff-assistant
|
||||
scipy==1.14.1
|
||||
# via layoutparser
|
||||
# via python-doctr
|
||||
# via scikit-learn
|
||||
sentencepiece==0.2.0
|
||||
# via transformers
|
||||
sentry-sdk==2.13.0
|
||||
# via quivr-api
|
||||
shapely==2.0.6
|
||||
# via python-doctr
|
||||
six==1.16.0
|
||||
# via asttokens
|
||||
# via bleach
|
||||
@ -945,6 +1009,8 @@ tenacity==8.5.0
|
||||
# via llama-index-legacy
|
||||
termcolor==2.4.0
|
||||
# via fire
|
||||
threadpoolctl==3.5.0
|
||||
# via scikit-learn
|
||||
tiktoken==0.7.0
|
||||
# via langchain-openai
|
||||
# via litellm
|
||||
@ -992,6 +1058,7 @@ tqdm==4.66.5
|
||||
# via llama-index-core
|
||||
# via nltk
|
||||
# via openai
|
||||
# via python-doctr
|
||||
# via transformers
|
||||
# via unstructured
|
||||
traitlets==5.14.3
|
||||
@ -1029,6 +1096,7 @@ typing-extensions==4.12.2
|
||||
# via pydantic-core
|
||||
# via pyee
|
||||
# via python-docx
|
||||
# via python-oxmsg
|
||||
# via python-pptx
|
||||
# via realtime
|
||||
# via resend
|
||||
@ -1048,9 +1116,10 @@ tzdata==2024.1
|
||||
# via pandas
|
||||
unidecode==1.3.8
|
||||
# via quivr-api
|
||||
unstructured==0.15.7
|
||||
unstructured==0.15.13
|
||||
# via megaparse
|
||||
# via quivr-core
|
||||
# via quivr-diff-assistant
|
||||
unstructured-client==0.8.1
|
||||
# via unstructured
|
||||
unstructured-inference==0.7.36
|
||||
|
@ -0,0 +1,9 @@
|
||||
alter table "public"."tasks" add column "assistant_name" text;
|
||||
|
||||
alter
|
||||
publication supabase_realtime add table tasks;
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,4 @@
|
||||
|
||||
alter table "public"."tasks" add column "task_metadata" jsonb;
|
||||
|
||||
|
2
backend/worker/diff-assistant/.env.exemple
Normal file
2
backend/worker/diff-assistant/.env.exemple
Normal file
@ -0,0 +1,2 @@
|
||||
OPENAI_API_KEY = myopenaikey
|
||||
LLAMA_PARSE_API_KEY = myllamaparsekey
|
15
backend/worker/diff-assistant/.gitignore
vendored
Normal file
15
backend/worker/diff-assistant/.gitignore
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
# python generated files
|
||||
__pycache__/
|
||||
*.py[oc]
|
||||
build/
|
||||
dist/
|
||||
wheels/
|
||||
*.egg-info
|
||||
|
||||
# venv
|
||||
.venv
|
||||
.env
|
||||
.DS_Store
|
||||
|
||||
#pkl
|
||||
*.pkl
|
1
backend/worker/diff-assistant/.python-version
Normal file
1
backend/worker/diff-assistant/.python-version
Normal file
@ -0,0 +1 @@
|
||||
3.11.9
|
3
backend/worker/diff-assistant/README.md
Normal file
3
backend/worker/diff-assistant/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# diff-assistant
|
||||
|
||||
Describe your project here.
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
backend/worker/diff-assistant/data/etiquettes/Cas3-2-3-AP.pdf
Normal file
BIN
backend/worker/diff-assistant/data/etiquettes/Cas3-2-3-AP.pdf
Normal file
Binary file not shown.
BIN
backend/worker/diff-assistant/data/etiquettes/Cas3-2-3-AV.pdf
Normal file
BIN
backend/worker/diff-assistant/data/etiquettes/Cas3-2-3-AV.pdf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,958 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from diff_algorithm import DiffAlgorithm\n",
|
||||
"from parser import DeadlyParser\n",
|
||||
"\n",
|
||||
"file_path_after = \"/Users/chloed./Documents/quivr/diff-assistant/src/cdp3/test_docs/etiquette_0_before.pdf\"\n",
|
||||
"file_path_before = \"/Users/chloed./Documents/quivr/diff-assistant/src/cdp3/test_docs/etiquette_0_after.pdf\"\n",
|
||||
"complex_file = \"/Users/chloed./Documents/quivr/diff-assistant/src/cdp3/test_docs/Cas3-2-3.pdf\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parser = DeadlyParser()\n",
|
||||
"parsed_before = parser.parse(file_path_before)\n",
|
||||
"parsed_after = parser.parse(file_path_after)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_before = parsed_before.render()\n",
|
||||
"text_after = parsed_after.render()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CUDA device False\n",
|
||||
"\n",
|
||||
"0: 1024x800 2 Pictures, 2 Section-headers, 18 Texts, 1091.6ms\n",
|
||||
"Speed: 24.9ms preprocess, 1091.6ms inference, 84.6ms postprocess per image at shape (1, 3, 1024, 800)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from PIL import Image\n",
|
||||
"import pypdfium2 as pdfium\n",
|
||||
"import torchvision.transforms as transforms\n",
|
||||
"\n",
|
||||
"import torch\n",
|
||||
"from ultralytics import YOLOv10\n",
|
||||
"\n",
|
||||
"print(\"CUDA device\", torch.cuda.is_available())\n",
|
||||
"\n",
|
||||
"device = torch.device(\"mps\") # Default CUDA device\n",
|
||||
"\n",
|
||||
"model = YOLOv10(\"./yolov10x_best.pt\").to(device)\n",
|
||||
"\n",
|
||||
"pdf = pdfium.PdfDocument(file_path_after)\n",
|
||||
"page = pdf[0] # load a page\n",
|
||||
"\n",
|
||||
"bitmap = page.render(scale=500 / 72)\n",
|
||||
"\n",
|
||||
"pil_image = bitmap.to_pil()\n",
|
||||
"\n",
|
||||
"# Create a transform to convert PIL image to tensor\n",
|
||||
"to_tensor = transforms.ToTensor()\n",
|
||||
"\n",
|
||||
"# Convert PIL image to tensor (this also normalizes values to [0, 1])\n",
|
||||
"tensor_image = to_tensor(pil_image)\n",
|
||||
"\n",
|
||||
"# Add batch dimension\n",
|
||||
"tensor_image = tensor_image.unsqueeze(0).to(device)\n",
|
||||
"\n",
|
||||
"# Assuming your model is already on the CUDA device\n",
|
||||
"model = model.to(device)\n",
|
||||
"\n",
|
||||
"# Perform inference\n",
|
||||
"with torch.no_grad():\n",
|
||||
" results = model.predict(source=pil_image, imgsz=1024, conf=0.35, batch=1)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"annotated_image = results[0].plot()[:, :, ::-1]\n",
|
||||
"\n",
|
||||
"im = Image.fromarray(annotated_image)\n",
|
||||
"\n",
|
||||
"im.show()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor([0.8352, 0.8235, 0.8203, 0.8113, 0.7984, 0.7860, 0.6394, 0.5778, 0.5666, 0.5546, 0.5365, 0.5300, 0.4666, 0.4322, 0.4222, 0.3932, 0.3926, 0.3901], device='mps:0')\n",
|
||||
"tensor([6., 9., 7., 9., 6., 9., 6., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.], device='mps:0')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(results[0].boxes.conf)\n",
|
||||
"print(results[0].boxes.cls)\n",
|
||||
"results[0].boxes.xyxyn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 157,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from io import BytesIO\n",
|
||||
"import base64\n",
|
||||
"def check_transcription(file_path, text):\n",
|
||||
" pdf = pdfium.PdfDocument(file_path)\n",
|
||||
" page = pdf[0] # load a page\n",
|
||||
" \n",
|
||||
" bitmap = page.render(scale=500 / 72)\n",
|
||||
" \n",
|
||||
" pil_image_before = bitmap.to_pil()\n",
|
||||
" \n",
|
||||
" buffered = BytesIO()\n",
|
||||
" pil_image_before.save(buffered, format=\"PNG\")\n",
|
||||
" img_str = base64.b64encode(buffered.getvalue()).decode()\n",
|
||||
" \n",
|
||||
" chat = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n",
|
||||
" result = chat.invoke(\n",
|
||||
" [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": f\"Can you correct this entire text retranscription, respond only with the corrected transcription: {text}\"},\n",
|
||||
" {\n",
|
||||
" \"type\": \"image_url\",\n",
|
||||
" \"image_url\": {\n",
|
||||
" \"url\": f\"data:image/jpeg;base64,{img_str}\",\n",
|
||||
" \"detail\": \"auto\",\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 158,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result_before = check_transcription(file_path_before, text_before)\n",
|
||||
"result_after = check_transcription(file_path_after, text_after)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 168,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Coup de pâtes\n",
|
||||
"TRADITION & INNOVATION\n",
|
||||
"\n",
|
||||
"50 CREPES FINES SUCREES AU RHUM NEGRITA® (PLIEES EN QUATRE) D270 55g\n",
|
||||
"50 Thin crêpes sweetened with rum Negrita® (folded in four) D270 55g\n",
|
||||
"\n",
|
||||
"25514\n",
|
||||
"Rhum NEGRITA\n",
|
||||
"50 Crêpes fines sucrées au rhum cuites, surgelées -\n",
|
||||
"50 Crêpes sweetened with rum, baked, frozen\n",
|
||||
"\n",
|
||||
"Ingrédients : LAIT entier, farine de BLE, sucre de canne 16.4%, ŒUFS entiers*, beurre concentré (LAIT), eau, rhum Negrita (colorant: E150a) 3.6%, sel, poudres à lever: E500-E331-amidon de BLE.\n",
|
||||
"* Œufs issus de poules élevées au sol\n",
|
||||
"\n",
|
||||
"Ingredients : Whole MILK, WHEAT flour, cane sugar 16.4%, whole EGGS*, concentrated butter (MILK), water, Negrita rum (colouring: E150a) 3.6%, salt, raising agents: E500-E331-WHEAT starch.\n",
|
||||
"* Barn eggs\n",
|
||||
"\n",
|
||||
"Conseil d'utilisation : Décongeler le produit 1 heure entre 0° et 4°C. Après décongélation et maintien à 4°C, le produit se conserve au maximum pendant 24 heures. Suggestion: possibilité de décongeler les crêpes 30 secondes au four à micro-ondes.\n",
|
||||
"How to prepare the product: Defrost the product 1 hour at 0°C - +4°C. After thawing, preserve the product at +4°C for 24 hours maximum. Suggestion: Defrost the crêpe 30 sec in the microwave.\n",
|
||||
"\n",
|
||||
"Informations nutritionnelles pour 100g / Average nutritional values for 100g:\n",
|
||||
"Valeur énergétique/Energy: 1495 kJ / 356 kcal\n",
|
||||
"Matières grasses totales/Fat (g): 11.4\n",
|
||||
"- dont Acides Gras Saturés/of which saturated fatty acids (g): 5.9\n",
|
||||
"Glucides/Carbohydrates (g): 49.5\n",
|
||||
"- dont sucres/of which sugar (g): 25.2\n",
|
||||
"Protéines/Proteins (g): 8.0\n",
|
||||
"Sel/Salt (g): 0.45\n",
|
||||
"\n",
|
||||
"A conserver à -18°C : Ne jamais recongeler un produit décongelé\n",
|
||||
"Store at -18°C: Don't refreeze, once defrosted\n",
|
||||
"\n",
|
||||
"Coup de pâtes\n",
|
||||
"50 CREPES FINES SUCREES AU RHUM NEGRITA® (PLIEES EN QUATRE) D270 55g\n",
|
||||
"50 Crêpes fines sucrées au rhum cuites, surgelées -\n",
|
||||
"50 Crêpes sweetened with rum, baked, frozen\n",
|
||||
"\n",
|
||||
"N° DE LOT / BATCH : 116241 13:17\n",
|
||||
"A consommer de préférence avant le / Best before : 25/10/2025\n",
|
||||
"\n",
|
||||
"25514\n",
|
||||
"FAB : A04A\n",
|
||||
"\n",
|
||||
"(01)03604380255141(15)251025(10)116241(91)0316175\n",
|
||||
"EAN No: 03604380255141\n",
|
||||
"\n",
|
||||
"Poids net / Net weight : 2750 g\n",
|
||||
"\n",
|
||||
"C.I: 7142 COUP DE PATES S.A.S. ZAC DU BEL AIR - 14-16 AVENUE JOSEPH PAXTON - FERRIERES EN BRIE - 77164 MARNE LA VALLEE CEDEX 3 - FRANCE\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(result_after.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 171,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(0, '50 CREPES FINES SUCREES AU\\nCoupdegal'),\n",
|
||||
" (-1, 'g'),\n",
|
||||
" (1, 'o'),\n",
|
||||
" (0, '\\nRHUM NEGRITAO (PLIEES EN QUATRE)\\nTRA'),\n",
|
||||
" (-1, 'C'),\n",
|
||||
" (1, 'D'),\n",
|
||||
" (0, 'ITION '),\n",
|
||||
" (-1, '&'),\n",
|
||||
" (1, 'a'),\n",
|
||||
" (0, ' INNO'),\n",
|
||||
" (-1, 'V'),\n",
|
||||
" (1, 'Y'),\n",
|
||||
" (0, 'AT'),\n",
|
||||
" (-1, 'IG'),\n",
|
||||
" (1, ':O'),\n",
|
||||
" (0, 'N\\nD270 55g\\n50 Thin cr'),\n",
|
||||
" (-1, 'ê'),\n",
|
||||
" (1, 'è'),\n",
|
||||
" (0, 'pes sweetened with rum Negrita'),\n",
|
||||
" (-1, 'g'),\n",
|
||||
" (1, 'e'),\n",
|
||||
" (0, '\\n(folded in four) D270 55g\\n25514 R'),\n",
|
||||
" (-1, 'k'),\n",
|
||||
" (1, 'h'),\n",
|
||||
" (0, 'um'),\n",
|
||||
" (-1, 'y'),\n",
|
||||
" (0, '\\n'),\n",
|
||||
" (1, 'NEGRITA '),\n",
|
||||
" (0, '50 Crêpes fines sucrées au rhum cuites, surgelées -\\n'),\n",
|
||||
" (-1, 'NEGRITA\\n'),\n",
|
||||
" (0, '50 Cr'),\n",
|
||||
" (-1, 'è'),\n",
|
||||
" (1, 'ê'),\n",
|
||||
" (0,\n",
|
||||
" 'pes sweetened with rum, baked, frozen\\nIngrédients : LAIT entier, farine de BLE, sucre de canne 16.'),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '4'),\n",
|
||||
" (0,\n",
|
||||
" '%, CEUFS entiers*,\\nbeurre concentré (LAIT), eau, rhum Negrita (colorant: E150a) 3.'),\n",
|
||||
" (-1, '7'),\n",
|
||||
" (1, '6'),\n",
|
||||
" (0,\n",
|
||||
" '%, sel, poudres à\\nlever: E500-E331-amidon de BLE.\\n* CEufs issus de poules élevées au sol\\nIngredients : Whole MILK, WHEAT flour, cane sugar 16.'),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '4'),\n",
|
||||
" (0, '%, whole EGGS*, con'),\n",
|
||||
" (-1, 'c'),\n",
|
||||
" (1, 'ç'),\n",
|
||||
" (0, 'entrated\\nbutter (MILK), water, Negrita rum (colouring: E150a) 3.'),\n",
|
||||
" (-1, '7'),\n",
|
||||
" (1, '6'),\n",
|
||||
" (0,\n",
|
||||
" \"%, salt, raising agents:\\nE500-E331-WHEAT starch.\\n* Barn eggs\\nConseil d'utilisation : Décongeler le produit 1 heure entre 0° et 4°C. Après décongélation et\\nmaintien à 4°C, le produit se conserve au maximum pendant 24 heures\"),\n",
|
||||
" (1, '.'),\n",
|
||||
" (0, '\\nSuggestion: possibilité de décongeler les cr'),\n",
|
||||
" (-1, 'è'),\n",
|
||||
" (1, 'é'),\n",
|
||||
" (0, 'pes 30 secondes au four à micr'),\n",
|
||||
" (-1, 'o'),\n",
|
||||
" (1, 'c'),\n",
|
||||
" (0, '-ondes.\\n'),\n",
|
||||
" (-1, 'BPA le 24.09.2020 '),\n",
|
||||
" (0, \"How to prepare the products: Defrost the product 1 hour at 0'C-+4\"),\n",
|
||||
" (1, '°'),\n",
|
||||
" (0, 'C. After thawing,\\npreserve the product at +4'),\n",
|
||||
" (-1, '°'),\n",
|
||||
" (1, '*'),\n",
|
||||
" (0,\n",
|
||||
" 'C for 24 hours maximum. Suggestion: Defrost the crèpe 30 sec\\nin the microwave.\\nInformations nutritionnelles pour 1 Average nutritional values for 100g:\\nValeur '),\n",
|
||||
" (-1, 'e'),\n",
|
||||
" (1, 'é'),\n",
|
||||
" (0, 'nerg'),\n",
|
||||
" (-1, 'e'),\n",
|
||||
" (1, 'é'),\n",
|
||||
" (0, 'tique/Energy: 149'),\n",
|
||||
" (-1, '7'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, ' kJ / 356 kcal\\nMatières grasses totales/Fat (g): 11.'),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '4'),\n",
|
||||
" (0, '\\n- dont Acides Gras Saturés/of which saturated fatty acids (g): '),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '.'),\n",
|
||||
" (-1, '1'),\n",
|
||||
" (1, '9'),\n",
|
||||
" (0, '\\nGiu'),\n",
|
||||
" (-1, 'c'),\n",
|
||||
" (1, 'ri'),\n",
|
||||
" (0, 'des/Car'),\n",
|
||||
" (-1, 'p'),\n",
|
||||
" (1, 'b'),\n",
|
||||
" (0, 'o'),\n",
|
||||
" (-1, 'n'),\n",
|
||||
" (1, 'h'),\n",
|
||||
" (0, 'y'),\n",
|
||||
" (-1, 'ct'),\n",
|
||||
" (1, 'di'),\n",
|
||||
" (0, 'ates (g): 4'),\n",
|
||||
" (-1, '8'),\n",
|
||||
" (1, '9'),\n",
|
||||
" (0, '.'),\n",
|
||||
" (-1, '9'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '\\n'),\n",
|
||||
" (-1, '- '),\n",
|
||||
" (0, 'dont sucres/of which sugar (g): 2'),\n",
|
||||
" (-1, '4'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '.'),\n",
|
||||
" (-1, '1'),\n",
|
||||
" (1, '2'),\n",
|
||||
" (0, '\\nProtéines'),\n",
|
||||
" (-1, '/'),\n",
|
||||
" (0, 'Proteins (g): 8.0\\nSel/Salt (g): 0.4'),\n",
|
||||
" (-1, '8'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0,\n",
|
||||
" \"\\nA conserver à -18°C : Ne jamais recongeler un produit décongelé\\nStore at -18°C: Don't refreeze, once defrosted\\n\"),\n",
|
||||
" (-1, 'Fabriqué en France - Made in France\\n'),\n",
|
||||
" (0, 'Cou'),\n",
|
||||
" (-1, 'y'),\n",
|
||||
" (0, 'pde'),\n",
|
||||
" (-1, 'g'),\n",
|
||||
" (1, ' '),\n",
|
||||
" (0, 'al'),\n",
|
||||
" (-1, 'g'),\n",
|
||||
" (0, '\\n50 CREPES FINES SUCREES AU RHUM\\nNEGRITA'),\n",
|
||||
" (-1, 'O'),\n",
|
||||
" (1, 'B'),\n",
|
||||
" (0, ' (PLIEES EN QUATRE) D270\\nT'),\n",
|
||||
" (-1, 'R'),\n",
|
||||
" (1, 'W'),\n",
|
||||
" (0, 'ADITION & INNOVAT'),\n",
|
||||
" (-1, ':'),\n",
|
||||
" (1, 'I'),\n",
|
||||
" (0, 'ON\\n55g\\n50 Cr'),\n",
|
||||
" (-1, 'ê'),\n",
|
||||
" (1, 'è'),\n",
|
||||
" (0,\n",
|
||||
" 'pes fines sucrées au rhum cuites, surgelées -\\nNo DE LOTI\\n50 Crèpes sweetened with rum, baked, frozen\\nBATCH : '),\n",
|
||||
" (-1, '084'),\n",
|
||||
" (1, '116'),\n",
|
||||
" (0, '2'),\n",
|
||||
" (-1, '0'),\n",
|
||||
" (1, '4'),\n",
|
||||
" (0, '1 1'),\n",
|
||||
" (-1, '5'),\n",
|
||||
" (1, '3'),\n",
|
||||
" (0, ':'),\n",
|
||||
" (-1, '4'),\n",
|
||||
" (1, '1'),\n",
|
||||
" (0, '7\\nA consommer de pr'),\n",
|
||||
" (-1, 'è'),\n",
|
||||
" (1, 'é'),\n",
|
||||
" (0, 'f'),\n",
|
||||
" (-1, 'è'),\n",
|
||||
" (1, 'é'),\n",
|
||||
" (0, 'rence avant'),\n",
|
||||
" (-1, ' '),\n",
|
||||
" (0, 'le '),\n",
|
||||
" (-1, 'I'),\n",
|
||||
" (1, '/'),\n",
|
||||
" (0, '\\n25514\\nBest before : 2'),\n",
|
||||
" (-1, '4'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '/'),\n",
|
||||
" (1, '1'),\n",
|
||||
" (0, '0'),\n",
|
||||
" (-1, '9'),\n",
|
||||
" (0, '/202'),\n",
|
||||
" (-1, '1'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '\\n'),\n",
|
||||
" (1, 'FAB :\\nA'),\n",
|
||||
" (0, '0'),\n",
|
||||
" (1, '4A\\n'),\n",
|
||||
" (0, '0'),\n",
|
||||
" (-1, '9'),\n",
|
||||
" (1, '1.'),\n",
|
||||
" (0, '0'),\n",
|
||||
" (1, '9'),\n",
|
||||
" (0, '80'),\n",
|
||||
" (-1, '43'),\n",
|
||||
" (1, '.9'),\n",
|
||||
" (0, '80'),\n",
|
||||
" (1, '2'),\n",
|
||||
" (0, '55141052'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '10'),\n",
|
||||
" (-1, '9'),\n",
|
||||
" (0, '2'),\n",
|
||||
" (-1, '4'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '10'),\n",
|
||||
" (-1, '08'),\n",
|
||||
" (1, '1162'),\n",
|
||||
" (0, '4'),\n",
|
||||
" (-1, '20'),\n",
|
||||
" (0, '1 (91)0316'),\n",
|
||||
" (-1, '4'),\n",
|
||||
" (1, '1'),\n",
|
||||
" (0, '7'),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '\\nEAN No: 03604380255141'),\n",
|
||||
" (-1, ' FAB : 00001 '),\n",
|
||||
" (1, '\\n'),\n",
|
||||
" (0, 'Poids net'),\n",
|
||||
" (-1, '\\n:\\n'),\n",
|
||||
" (1, '! '),\n",
|
||||
" (0, '2750\\nNet weight'),\n",
|
||||
" (1, ': :'),\n",
|
||||
" (0, '\\ng\\n'),\n",
|
||||
" (-1, '\\n'),\n",
|
||||
" (1, 'Ci: 7142 '),\n",
|
||||
" (0, 'COUP DE'),\n",
|
||||
" (1, 'F'),\n",
|
||||
" (0, ' PATES'),\n",
|
||||
" (-1, 'E'),\n",
|
||||
" (1, 'O'),\n",
|
||||
" (0, ' S'),\n",
|
||||
" (-1, '.'),\n",
|
||||
" (0, 'A.S'),\n",
|
||||
" (-1, '-;'),\n",
|
||||
" (0, ' ZA'),\n",
|
||||
" (-1, 'C'),\n",
|
||||
" (1, 'Ç'),\n",
|
||||
" (0, ' DU BEL AIR'),\n",
|
||||
" (-1, '-'),\n",
|
||||
" (0, ' 14-16 AVENUE'),\n",
|
||||
" (1, '.'),\n",
|
||||
" (0, ' '),\n",
|
||||
" (-1, 'J'),\n",
|
||||
" (1, 'V'),\n",
|
||||
" (0, 'OSEPH'),\n",
|
||||
" (-1, ' '),\n",
|
||||
" (0, 'PAXTON-\\nFERRIERES EN'),\n",
|
||||
" (-1, 'I'),\n",
|
||||
" (0, ' BRIE 77'),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '8'),\n",
|
||||
" (0, '14 MARNE LA VALLEE CEDEX 3'),\n",
|
||||
" (1, '- FRANÇE')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 171,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dmp= DiffAlgorithm()\n",
|
||||
"diff_main = dmp.diff_main(result_before.content, result_after.content)\n",
|
||||
"#diff_main = dmp.diff_main(text_before, text_after)\n",
|
||||
"#result = dmp.to_pretty_json(diff_main, parsed_before)\n",
|
||||
"diff_main"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 172,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#split differences and send to llm \n",
|
||||
"cleaned_diff = []\n",
|
||||
"for cat, content in diff_main:\n",
|
||||
" if content.strip() and content != \"\\n\":\n",
|
||||
" cleaned_diff.append((cat, content))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 173,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def format_difference(main_diff):\n",
|
||||
" text_modified = \"\"\n",
|
||||
" sub_stack = 0\n",
|
||||
" for op, data in main_diff:\n",
|
||||
" if op == 0: \n",
|
||||
" text_modified += data if sub_stack == 0 else f\"_]] {data}\"\n",
|
||||
" elif op == -1: \n",
|
||||
" if sub_stack == 0:\n",
|
||||
" text_modified += f\"[[{data}->\"\n",
|
||||
" sub_stack += 1\n",
|
||||
" else:\n",
|
||||
" text_modified += f\"{data}->\"\n",
|
||||
" elif op == 1: \n",
|
||||
" if sub_stack > 0:\n",
|
||||
" text_modified += f\"{data}]]\"\n",
|
||||
" sub_stack -= 1\n",
|
||||
" else:\n",
|
||||
" text_modified += f\"[[ _ ->{data}]]\"\n",
|
||||
" return text_modified"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 174,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"50 CREPES FINES SUCREES AU\\nCoupdegal[[g->o]]\\nRHUM NEGRITAO (PLIEES EN QUATRE)\\nTRA[[C->D]]ITION [[&->a]] INNO[[V->Y]]AT[[IG->:O]]N\\nD270 55g\\n50 Thin cr[[ê->è]]pes sweetened with rum Negrita[[g->e]]\\n(folded in four) D270 55g\\n25514 R[[k->h]]um[[y->NEGRITA ]]50 Crêpes fines sucrées au rhum cuites, surgelées -\\n[[NEGRITA\\n->_]] 50 Crè->ê]]pes sweetened with rum, baked, frozen\\nIngrédients : LAIT entier, farine de BLE, sucre de canne 16.[[6->4]]%, CEUFS entiers*,\\nbeurre concentré (LAIT), eau, rhum Negrita (colorant: E150a) 3.[[7->6]]%, sel, poudres à\\nlever: E500-E331-amidon de BLE.\\n* CEufs issus de poules élevées au sol\\nIngredients : Whole MILK, WHEAT flour, cane sugar 16.[[6->4]]%, whole EGGS*, con[[c->ç]]entrated\\nbutter (MILK), water, Negrita rum (colouring: E150a) 3.[[7->6]]%, salt, raising agents:\\nE500-E331-WHEAT starch.\\n* Barn eggs\\nConseil d'utilisation : Décongeler le produit 1 heure entre 0° et 4°C. Après décongélation et\\nmaintien à 4°C, le produit se conserve au maximum pendant 24 heures[[ _ ->.]]\\nSuggestion: possibilité de décongeler les cr[[è->é]]pes 30 secondes au four à micr[[o->c]]-ondes.\\n[[BPA le 24.09.2020 ->_]] How to prepare the products: Defrost the product 1 hour at 0'C-+4°]]C. After thawing,\\npreserve the product at +4[[°->*]]C for 24 hours maximum. Suggestion: Defrost the crèpe 30 sec\\nin the microwave.\\nInformations nutritionnelles pour 1 Average nutritional values for 100g:\\nValeur [[e->é]]nerg[[e->é]]tique/Energy: 149[[7->5]] kJ / 356 kcal\\nMatières grasses totales/Fat (g): 11.[[6->4]]\\n- dont Acides Gras Saturés/of which saturated fatty acids (g): [[6->5]].[[1->9]]\\nGiu[[c->ri]]des/Car[[p->b]]o[[n->h]]y[[ct->di]]ates (g): 4[[8->9]].[[9->5]][[- ->_]] dont sucres/of which sugar (g): 24->5]].[[1->2]]\\nProtéines[[/->_]] Proteins (g): 8.0\\nSel/Salt (g): 0.48->5]]\\nA conserver à -18°C : Ne jamais recongeler un produit décongelé\\nStore at -18°C: Don't refreeze, once defrosted\\n[[Fabriqué en France - Made in France\\n->_]] Couy->_]] pdeg->_]] alg->_]] \\n50 CREPES FINES SUCREES AU RHUM\\nNEGRITAO->B]] (PLIEES EN QUATRE) D270\\nT[[R->W]]ADITION & INNOVAT[[:->I]]ON\\n55g\\n50 Cr[[ê->è]]pes fines sucrées au rhum cuites, surgelées -\\nNo DE LOTI\\n50 Crèpes sweetened with rum, baked, frozen\\nBATCH : [[084->116]]2[[0->4]]1 1[[5->3]]:[[4->1]]7\\nA consommer de pr[[è->é]]f[[è->é]]rence avantle [[I->/]]\\n25514\\nBest before : 2[[4->5]]/[[ _ ->1]]0[[9->_]] /2021->5]][[ _ ->FAB :\\nA]]0[[ _ ->4A\\n]]0[[9->1.]]0[[ _ ->9]]80[[43->.9]]80[[ _ ->2]]55141052[[ _ ->5]]10[[9->_]] 24->5]]10[[08->1162]]4[[20->_]] 1 (91)03164->1]]7[[6->5]]\\nEAN No: 03604380255141[[ FAB : 00001 ->_]] Poids net\\n:\\n->! ]]2750\\nNet weight[[ _ ->: :]]\\ng\\n[[ _ ->Ci: 7142 ]]COUP DE[[ _ ->F]] PATES[[E->O]] S[[.->_]] A.S-;->_]] ZAC->Ç]] DU BEL AIR[[-->_]] 14-16 AVENUE.]][[J->V]]OSEPHPAXTON-\\nFERRIERES EN[[I->_]] BRIE 776->8]]14 MARNE LA VALLEE CEDEX 3[[ _ ->- FRANÇE]]\""
|
||||
]
|
||||
},
|
||||
"execution_count": 174,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"format_difference(cleaned_diff)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 175,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(\n",
|
||||
" model=\"gpt-4o\",\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=None,\n",
|
||||
" timeout=None,\n",
|
||||
" max_retries=2,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 176,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"section_diffs = [cleaned_diff]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 177,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"report = []\n",
|
||||
"#modified_section_names = []\n",
|
||||
"for section in section_diffs:\n",
|
||||
" if len(section) == 1 and section[0][0] == 0:\n",
|
||||
" print(\"No differences found in this section.\")\n",
|
||||
" continue\n",
|
||||
" else:\n",
|
||||
" text_modified = format_difference(section)\n",
|
||||
" #modified_section_names.append(section[0][1].split(\"\\n\")[1].split(\"#\")[-1].strip())\n",
|
||||
" messages = [\n",
|
||||
" (\n",
|
||||
" \"human\",\n",
|
||||
" f\"\"\"You are tasked with analyzing and reporting differences in text for a Quality engineer. The input text contains differences marked with special tokens. Your job is to parse these differences and create a clear, concise report.\n",
|
||||
"\n",
|
||||
" Here is the text containing the differences:\n",
|
||||
"\n",
|
||||
" <diff_text>\n",
|
||||
" {text_modified}\n",
|
||||
" </diff_text>\n",
|
||||
"\n",
|
||||
" RULE #1 : If there are no [[->]] tokens, it indicates no changes to report, inventing changes means death.\n",
|
||||
" The differences are marked using the following format:\n",
|
||||
" - [[before->after]] indicates a change from \"before\" to \"after\"\n",
|
||||
" - If there is no \"before\" text, it indicates an addition\n",
|
||||
" - If there is no \"after\" text, it indicates a deletion\n",
|
||||
" - If there is no [[ ]] token, it indicates no changes to report\n",
|
||||
" - Make sense of the difference and do not keep the '[' in the report.\n",
|
||||
" - \"_\" alone means empty.\n",
|
||||
"\n",
|
||||
" Follow these steps to create your report:\n",
|
||||
"\n",
|
||||
" 1. Carefully read through the entire text.\n",
|
||||
" 2. Identify each instance of [[ ]] tokens.\n",
|
||||
" 3. For each instance, determine the modification that was made.\n",
|
||||
" Present your report in the following format:\n",
|
||||
" <report>\n",
|
||||
" In the section ..., the modification found are :\n",
|
||||
" * the **black** cat was changed to : the **red** cat\n",
|
||||
" * ...\n",
|
||||
" </report>\n",
|
||||
" Note that there might be no modifications in some sections. In that case, simply state that no differences were found.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" Remember, your goal is to create a clear and concise report that allows the Quality engineer to quickly verify the differences. Focus on accuracy and readability in your output, give every indication possible to make it easier to find the modification.\n",
|
||||
" The report should be written in a professional and formal tone and in French.\"\"\",\n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
" response = llm.invoke(messages)\n",
|
||||
" report.append(response.content)\n",
|
||||
"\n",
|
||||
"#print(\"The modified Sections are : \", modified_section_names)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 178,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<report>\n",
|
||||
"Dans la section \"50 CREPES FINES SUCREES AU\", les modifications trouvées sont :\n",
|
||||
"* Coupdegal**g** a été changé en : Coupdegal**o**\n",
|
||||
"\n",
|
||||
"Dans la section \"RHUM NEGRITAO (PLIEES EN QUATRE)\", les modifications trouvées sont :\n",
|
||||
"* TRA**C**ITION a été changé en : TRA**D**ITION\n",
|
||||
"* TRA**&** INNO**V**ATION a été changé en : TRA**a** INNO**Y**ATION\n",
|
||||
"* INNO**V**ATION a été changé en : INNO**Y**ATION\n",
|
||||
"* INNO**IG**N a été changé en : INNO**:O**N\n",
|
||||
"\n",
|
||||
"Dans la section \"50 Thin crêpes sweetened with rum Negrita\", les modifications trouvées sont :\n",
|
||||
"* cr**ê**pes a été changé en : cr**è**pes\n",
|
||||
"* Negrita**g** a été changé en : Negrita**e**\n",
|
||||
"\n",
|
||||
"Dans la section \"25514 Rhum NEGRITA 50 Crêpes fines sucrées au rhum cuites, surgelées\", les modifications trouvées sont :\n",
|
||||
"* R**k**um a été changé en : R**h**um\n",
|
||||
"* Rhum**y** a été changé en : Rhum**NEGRITA**\n",
|
||||
"* NEGRITA a été changé en : (supprimé)\n",
|
||||
"* Crè**ê**pes a été changé en : Crè**e**pes\n",
|
||||
"\n",
|
||||
"Dans la section \"Ingrédients\", les modifications trouvées sont :\n",
|
||||
"* sucre de canne 16.**6**% a été changé en : sucre de canne 16.**4**%\n",
|
||||
"* rhum Negrita (colorant: E150a) 3.**7**% a été changé en : rhum Negrita (colorant: E150a) 3.**6**%\n",
|
||||
"\n",
|
||||
"Dans la section \"Ingredients\", les modifications trouvées sont :\n",
|
||||
"* cane sugar 16.**6**% a été changé en : cane sugar 16.**4**%\n",
|
||||
"* con**c**entrated butter a été changé en : con**ç**entrated butter\n",
|
||||
"* Negrita rum (colouring: E150a) 3.**7**% a été changé en : Negrita rum (colouring: E150a) 3.**6**%\n",
|
||||
"\n",
|
||||
"Dans la section \"Conseil d'utilisation\", les modifications trouvées sont :\n",
|
||||
"* 24 heures** _ ** a été changé en : 24 heures**.**\n",
|
||||
"* cr**è**pes a été changé en : cr**é**pes\n",
|
||||
"* micr**o**-ondes a été changé en : micr**c**-ondes\n",
|
||||
"* BPA le 24.09.2020 a été changé en : (supprimé)\n",
|
||||
"\n",
|
||||
"Dans la section \"How to prepare the products\", les modifications trouvées sont :\n",
|
||||
"* 0'C-+4**°**C a été changé en : 0'C-+4**C**\n",
|
||||
"* +4**°**C a été changé en : +4**C**\n",
|
||||
"\n",
|
||||
"Dans la section \"Valeur énergétique/Energy\", les modifications trouvées sont :\n",
|
||||
"* Valeur **e**nerg**e**tique a été changé en : Valeur **é**nerg**é**tique\n",
|
||||
"* 149**7** kJ a été changé en : 149**5** kJ\n",
|
||||
"\n",
|
||||
"Dans la section \"Matières grasses totales/Fat (g)\", les modifications trouvées sont :\n",
|
||||
"* 11.**6** a été changé en : 11.**4**\n",
|
||||
"\n",
|
||||
"Dans la section \"Acides Gras Saturés/of which saturated fatty acids (g)\", les modifications trouvées sont :\n",
|
||||
"* **6**.1 a été changé en : **5**.9\n",
|
||||
"\n",
|
||||
"Dans la section \"Glucides/Carbohydrates (g)\", les modifications trouvées sont :\n",
|
||||
"* Giu**c**des a été changé en : Giu**ri**des\n",
|
||||
"* Car**p**o**n**y**ct**ates a été changé en : Car**b**o**h**y**di**ates\n",
|
||||
"* 4**8**.9 a été changé en : 4**9**.5\n",
|
||||
"* 24**-**1 a été changé en : 24**.2**\n",
|
||||
"\n",
|
||||
"Dans la section \"Protéines/Proteins (g)\", les modifications trouvées sont :\n",
|
||||
"* Protéines**/** a été changé en : Protéines\n",
|
||||
"\n",
|
||||
"Dans la section \"Sel/Salt (g)\", les modifications trouvées sont :\n",
|
||||
"* 0.48**->5** a été changé en : 0.48**5**\n",
|
||||
"\n",
|
||||
"Dans la section \"A conserver à -18°C\", les modifications trouvées sont :\n",
|
||||
"* Fabriqué en France - Made in France a été changé en : (supprimé)\n",
|
||||
"\n",
|
||||
"Dans la section \"50 CREPES FINES SUCREES AU RHUM\", les modifications trouvées sont :\n",
|
||||
"* NEGRITAO**->B** a été changé en : NEGRITAO**B**\n",
|
||||
"* T**R**ADITION a été changé en : T**W**ADITION\n",
|
||||
"* INNOVAT**:**ON a été changé en : INNOVAT**I**ON\n",
|
||||
"\n",
|
||||
"Dans la section \"50 Crêpes fines sucrées au rhum cuites, surgelées\", les modifications trouvées sont :\n",
|
||||
"* cr**ê**pes a été changé en : cr**è**pes\n",
|
||||
"\n",
|
||||
"Dans la section \"BATCH\", les modifications trouvées sont :\n",
|
||||
"* 084**2**0 a été changé en : 116**4**1\n",
|
||||
"* 1**5**:4**7** a été changé en : 1**3**:1**7**\n",
|
||||
"\n",
|
||||
"Dans la section \"A consommer de préférence avant le\", les modifications trouvées sont :\n",
|
||||
"* 2**4**/10/2021 a été changé en : 2**5**/10/2021\n",
|
||||
"* FAB : A0 a été changé en : FAB : 4A\n",
|
||||
"* 09.0 a été changé en : 1.0\n",
|
||||
"* 98043 a été changé en : 980\n",
|
||||
"* 255141052 a été changé en : 2551410525\n",
|
||||
"* 109 a été changé en : 109\n",
|
||||
"* 24.10.08 a été changé en : 24.10.1162\n",
|
||||
"* 20 a été changé en : 20\n",
|
||||
"* 1 (91)03164 a été changé en : 1 (91)03164-17\n",
|
||||
"\n",
|
||||
"Dans la section \"EAN No\", les modifications trouvées sont :\n",
|
||||
"* EAN No: 03604380255141 FAB : 00001 a été changé en : EAN No: 03604380255141\n",
|
||||
"\n",
|
||||
"Dans la section \"Poids net\", les modifications trouvées sont :\n",
|
||||
"* Poids net : 2750 a été changé en : Poids net : 2750\n",
|
||||
"\n",
|
||||
"Dans la section \"Net weight\", les modifications trouvées sont :\n",
|
||||
"* Net weight : : a été changé en : Net weight : :\n",
|
||||
"\n",
|
||||
"Dans la section \"COUP DE PATES\", les modifications trouvées sont :\n",
|
||||
"* COUP DE PATES a été changé en : COUP DE F PATES\n",
|
||||
"* S.A.S a été changé en : S.A.S\n",
|
||||
"* ZAC DU BEL AIR a été changé en : ZAC DU BEL AIR\n",
|
||||
"* 14-16 AVENUE a été changé en : 14-16 AVENUE\n",
|
||||
"* JOSEPH PAXTON a été changé en : JOSEPH PAXTON\n",
|
||||
"* FERRIERES EN BRIE 77614 MARNE LA VALLEE CEDEX 3 a été changé en : FERRIERES EN BRIE 77614 MARNE LA VALLEE CEDEX 3\n",
|
||||
"</report>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(report[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 166,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(1, 'Coup de pâtes\\nTRADITION & INNOVATION\\n\\n'),\n",
|
||||
" (0,\n",
|
||||
" '50 CREPES FINES SUCREES AU RHUM NEGRITA® (PLIEES EN QUATRE) D270 55g\\n50 Thin crêpes sweetened with rum Negrita® (folded in four) D270 55g\\n'),\n",
|
||||
" (0, '25514'),\n",
|
||||
" (0, 'Rhum NEGRITA\\n50 Crêpes fines sucrées au rhum cuites, surgelées -'),\n",
|
||||
" (0, '50 Crêpes sweetened with rum, baked, frozen'),\n",
|
||||
" (0, '\\nIngrédients : LAIT entier, farine de BLE, sucre de canne 16.'),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '4'),\n",
|
||||
" (0,\n",
|
||||
" '%, ŒUFS entiers*, beurre concentré (LAIT), eau, rhum Negrita (colorant: E150a) 3.'),\n",
|
||||
" (-1, '7'),\n",
|
||||
" (1, '6'),\n",
|
||||
" (0,\n",
|
||||
" '%, sel, poudres à lever: E500-E331-amidon de BLE.\\n* Œufs issus de poules élevées au sol\\n'),\n",
|
||||
" (0, 'Ingredients : Whole MILK, WHEAT flour, cane sugar 16.'),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '4'),\n",
|
||||
" (0,\n",
|
||||
" '%, whole EGGS*, concentrated butter (MILK), water, Negrita rum (colouring: E150a) 3.'),\n",
|
||||
" (-1, '7'),\n",
|
||||
" (1, '6'),\n",
|
||||
" (0, '%, salt, raising agents: E500-E331-WHEAT starch.\\n* Barn eggs\\n'),\n",
|
||||
" (0,\n",
|
||||
" \"Conseil d'utilisation : Décongeler le produit 1 heure entre 0° et 4°C. Après décongélation et maintien à 4°C, le produit se conserve au maximum pendant 24 heures\"),\n",
|
||||
" (1, '. '),\n",
|
||||
" (0,\n",
|
||||
" 'Suggestion: possibilité de décongeler les crêpes 30 secondes au four à micro-ondes.\\n'),\n",
|
||||
" (-1, 'BPA le 24.09.2020 '),\n",
|
||||
" (0, 'How to prepare the product'),\n",
|
||||
" (-1, 's'),\n",
|
||||
" (0, ': Defrost the product 1 hour at 0°C'),\n",
|
||||
" (0, '-'),\n",
|
||||
" (1, ' +'),\n",
|
||||
" (0, '4°C. After thawing, preserve the product at '),\n",
|
||||
" (1, '+'),\n",
|
||||
" (0,\n",
|
||||
" '4°C for 24 hours maximum. Suggestion: Defrost the crêpe 30 sec in the microwave.\\n'),\n",
|
||||
" (0,\n",
|
||||
" 'Informations nutritionnelles pour 100g / Average nutritional values for 100g:\\nValeur énergétique/Energy: 149'),\n",
|
||||
" (-1, '7'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, ' kJ / 356 kcal\\nMatières grasses totales/Fat (g): 11.'),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '4'),\n",
|
||||
" (0, '\\n- dont Acides Gras Saturés/of which saturated fatty acids (g): '),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '.'),\n",
|
||||
" (-1, '1'),\n",
|
||||
" (1, '9'),\n",
|
||||
" (0, '\\nGlucides/Carbohydrates (g): 4'),\n",
|
||||
" (-1, '8'),\n",
|
||||
" (1, '9'),\n",
|
||||
" (0, '.'),\n",
|
||||
" (-1, '9'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '\\n- dont sucres/of which sugar (g): 2'),\n",
|
||||
" (-1, '4'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '.'),\n",
|
||||
" (-1, '1'),\n",
|
||||
" (1, '2'),\n",
|
||||
" (0, '\\nProtéines/Proteins (g): 8.0\\nSel/Salt (g): 0.4'),\n",
|
||||
" (-1, '8'),\n",
|
||||
" (1, '5\\n'),\n",
|
||||
" (0,\n",
|
||||
" \"\\nA conserver à -18°C : Ne jamais recongeler un produit décongelé\\nStore at -18°C: Don't refreeze, once defrosted\\n\"),\n",
|
||||
" (-1, 'Fabriq'),\n",
|
||||
" (1, '\\nCo'),\n",
|
||||
" (0, 'u'),\n",
|
||||
" (-1, 'é'),\n",
|
||||
" (1, 'p'),\n",
|
||||
" (-1, 'en France - Ma'),\n",
|
||||
" (0, 'de '),\n",
|
||||
" (-1, 'in Franc'),\n",
|
||||
" (1, 'pât'),\n",
|
||||
" (0, 'e'),\n",
|
||||
" (1, 's'),\n",
|
||||
" (0,\n",
|
||||
" '\\n50 CREPES FINES SUCREES AU RHUM NEGRITA® (PLIEES EN QUATRE) D270 55g\\n50 Crêpes fines sucrées au rhum cuites, surgelées -'),\n",
|
||||
" (0, '50 Crêpes sweetened with rum, baked, frozen\\n'),\n",
|
||||
" (0, 'N° DE LOT / BATCH : '),\n",
|
||||
" (-1, '08'),\n",
|
||||
" (1, '1162'),\n",
|
||||
" (0, '4'),\n",
|
||||
" (-1, '20'),\n",
|
||||
" (0, '1 1'),\n",
|
||||
" (-1, '5'),\n",
|
||||
" (1, '3'),\n",
|
||||
" (0, ':'),\n",
|
||||
" (-1, '4'),\n",
|
||||
" (1, '1'),\n",
|
||||
" (0, '7\\nA consommer de préférence avant le / Best before : 2'),\n",
|
||||
" (-1, '4'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '/'),\n",
|
||||
" (1, '1'),\n",
|
||||
" (0, '0'),\n",
|
||||
" (-1, '9'),\n",
|
||||
" (0, '/202'),\n",
|
||||
" (-1, '1'),\n",
|
||||
" (1, '5\\n'),\n",
|
||||
" (0, '\\n25514\\n'),\n",
|
||||
" (1, 'FAB : A04A\\n\\n'),\n",
|
||||
" (0, '(01)03604380255141(15)2'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '10'),\n",
|
||||
" (-1, '9'),\n",
|
||||
" (0, '2'),\n",
|
||||
" (-1, '4'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '(10)'),\n",
|
||||
" (-1, '08'),\n",
|
||||
" (1, '1162'),\n",
|
||||
" (0, '4'),\n",
|
||||
" (-1, '20'),\n",
|
||||
" (0, '1'),\n",
|
||||
" (0, '(91)0316'),\n",
|
||||
" (-1, '4'),\n",
|
||||
" (1, '1'),\n",
|
||||
" (0, '7'),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (1, '5'),\n",
|
||||
" (0, '\\nEAN N'),\n",
|
||||
" (-1, '°'),\n",
|
||||
" (1, 'o'),\n",
|
||||
" (0, ': 03604380255141'),\n",
|
||||
" (-1, ' FAB : 00001 '),\n",
|
||||
" (0, 'Poids net / Net weight : 2750 g\\n'),\n",
|
||||
" (1, '\\nC.I: 7142 '),\n",
|
||||
" (0, 'COUP DE PATES'),\n",
|
||||
" (-1, '®'),\n",
|
||||
" (0, ' S.A.S'),\n",
|
||||
" (-1, ' -'),\n",
|
||||
" (1, '.'),\n",
|
||||
" (0, ' ZAC DU BEL AIR - 14-16 AVENUE JOSEPH PAXTON - FERRIERES EN BRIE - 77'),\n",
|
||||
" (-1, '6'),\n",
|
||||
" (0, '1'),\n",
|
||||
" (1, '6'),\n",
|
||||
" (0, '4 MARNE LA VALLEE CEDEX 3'),\n",
|
||||
" (1, ' - FRANCE')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 166,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cleaned_diff"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
53
backend/worker/diff-assistant/pyproject.toml
Normal file
53
backend/worker/diff-assistant/pyproject.toml
Normal file
@ -0,0 +1,53 @@
|
||||
[project]
|
||||
name = "quivr-diff-assistant"
|
||||
version = "0.1.0"
|
||||
description = "Diff Assistant"
|
||||
authors = [
|
||||
{ name = "Stan Girard", email = "stan@quivr.app" }
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"python-doctr>=0.9.0",
|
||||
"matplotlib>=3.9.2",
|
||||
"mplcursors>=0.5.3",
|
||||
"diff-match-patch>=20230430",
|
||||
"scikit-learn>=1.5.1",
|
||||
"numpy>=1.16.0",
|
||||
"unstructured>=0.15.9",
|
||||
"python-magic>=0.4.27",
|
||||
"pypdfium2>=4.30.0",
|
||||
"numba>=0.60.0",
|
||||
"docx2txt>=0.8",
|
||||
"openpyxl>=3.1.5",
|
||||
"faiss-cpu>=1.8.0.post1",
|
||||
"llama-index>=0.11.8",
|
||||
"openai>=1.44.1",
|
||||
"pandas>=2.2.2",
|
||||
"pypdf>=4.3.1",
|
||||
"llama-index-readers-file>=0.2.1",
|
||||
"llama-index-llms-openai>=0.2.3",
|
||||
"python-dotenv>=1.0.1",
|
||||
"langchain>=0.2.16",
|
||||
"langchain-openai>=0.1.24",
|
||||
"opencv-python>=4.10.0.84",
|
||||
"megaparse>=0.0.31",
|
||||
"h5py==3.10.0",
|
||||
]
|
||||
readme = "README.md"
|
||||
requires-python = ">= 3.8"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.rye]
|
||||
managed = true
|
||||
dev-dependencies = [
|
||||
"pytest>=8.3.2",
|
||||
]
|
||||
|
||||
[tool.hatch.metadata]
|
||||
allow-direct-references = true
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["quivr_diff_assistant"]
|
221
backend/worker/diff-assistant/quivr_diff_assistant/main_uc2.py
Normal file
221
backend/worker/diff-assistant/quivr_diff_assistant/main_uc2.py
Normal file
@ -0,0 +1,221 @@
|
||||
import asyncio
|
||||
from enum import Enum
|
||||
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
from dotenv import load_dotenv
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_openai import ChatOpenAI
|
||||
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
|
||||
from llama_index.core.node_parser import UnstructuredElementNodeParser
|
||||
from llama_index.core.query_engine import RetrieverQueryEngine
|
||||
from llama_index.core.retrievers import RecursiveRetriever
|
||||
from llama_index.core.schema import Document
|
||||
from llama_index.llms.openai import OpenAI
|
||||
from utils.utils import COMPARISON_PROMPT
|
||||
|
||||
from quivr_diff_assistant.use_case_3.parser import DeadlyParser
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Set pandas display options
|
||||
pd.set_option("display.max_rows", None)
|
||||
pd.set_option("display.max_columns", None)
|
||||
pd.set_option("display.width", None)
|
||||
pd.set_option("display.max_colwidth", None)
|
||||
|
||||
|
||||
def load_and_process_document(file_path, pickle_file):
|
||||
print(file_path)
|
||||
reader = SimpleDirectoryReader(input_files=[file_path])
|
||||
docs = reader.load_data()
|
||||
print(len(docs), " and", len(docs[0].text))
|
||||
if len(docs) == 1 and len(docs[0].text) < 9:
|
||||
print("No text found with classical parse, switching to OCR ...")
|
||||
parser = DeadlyParser()
|
||||
doc = parser.deep_parse(file_path)
|
||||
docs = [Document().from_langchain_format(doc)]
|
||||
|
||||
node_parser = UnstructuredElementNodeParser()
|
||||
|
||||
raw_nodes = node_parser.get_nodes_from_documents(docs)
|
||||
|
||||
base_nodes, node_mappings = node_parser.get_base_nodes_and_mappings(raw_nodes)
|
||||
return base_nodes, node_mappings
|
||||
|
||||
|
||||
def create_query_engine(base_nodes, node_mappings):
|
||||
vector_index = VectorStoreIndex(base_nodes)
|
||||
vector_retriever = vector_index.as_retriever(similarity_top_k=5)
|
||||
recursive_retriever = RecursiveRetriever(
|
||||
"vector",
|
||||
retriever_dict={"vector": vector_retriever},
|
||||
node_dict=node_mappings,
|
||||
verbose=True,
|
||||
)
|
||||
return RetrieverQueryEngine.from_args(
|
||||
recursive_retriever, llm=OpenAI(temperature=0, model="gpt-4")
|
||||
)
|
||||
|
||||
|
||||
def compare_responses(response1, response2):
|
||||
llm = OpenAI(temperature=0, model="gpt-4")
|
||||
prompt = f"""
|
||||
Compare the following two responses and determine if they convey the same information:
|
||||
Response for document 1: {response1}
|
||||
Response for document 2: {response2}
|
||||
Are these responses essentially the same? Provide a brief explanation for your conclusion. The difference in format are not important, focus on the content and the numbers.
|
||||
If there are any specific differences, please highlight them with bullet points. Respond in french and in a markdown format.
|
||||
"""
|
||||
return llm.complete(prompt)
|
||||
|
||||
|
||||
class ComparisonTypes(str, Enum):
|
||||
CDC_ETIQUETTE = "Cahier des Charges - Etiquette"
|
||||
CDC_FICHE_DEV = "Cahier des Charges - Fiche Dev"
|
||||
|
||||
|
||||
def llm_comparator(
|
||||
document: str, cdc: str, llm: BaseChatModel, comparison_type: ComparisonTypes
|
||||
):
|
||||
chain = COMPARISON_PROMPT | llm | StrOutputParser()
|
||||
|
||||
if comparison_type == ComparisonTypes.CDC_ETIQUETTE:
|
||||
text_1 = "Etiquette"
|
||||
elif comparison_type == ComparisonTypes.CDC_FICHE_DEV:
|
||||
text_1 = "Fiche Dev"
|
||||
|
||||
return chain.stream(
|
||||
{
|
||||
"document": document,
|
||||
"text_1": text_1,
|
||||
"cdc": cdc,
|
||||
"text_2": "Cahier des Charges",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def test_main():
|
||||
cdc_doc = "/Users/jchevall/Coding/diff-assistant/data/Use case #2/Cas2-2-1_Mendiant Lait_QD PC F03 - FR Cahier des charges produit -rev 2021-v2.pdf"
|
||||
doc = "/Users/jchevall/Coding/diff-assistant/data/Use case #2/Cas2-2-1_Proposition étiquette Mendiant Lait croustillant.pdf"
|
||||
|
||||
cdc_doc = "/Users/jchevall/Coding/diff-assistant/data/Use case #2/Cas2-1-3_12_CDC_70690_Entremets rond vanille pécan individuel_2024.06.28 VALIDE.docx"
|
||||
doc = "/Users/jchevall/Coding/diff-assistant/data/Use case #2/Cas2-1-3_CDP_R&D_TABL_01_Fiche développement produit - Entremets vanille pécan 28 06 2024.xlsx"
|
||||
|
||||
comparison_type = ComparisonTypes.CDC_FICHE_DEV
|
||||
|
||||
llm = ChatOpenAI(
|
||||
model="gpt-4o",
|
||||
temperature=0.1,
|
||||
max_tokens=None,
|
||||
max_retries=2,
|
||||
)
|
||||
|
||||
parser = DeadlyParser()
|
||||
parsed_cdc_doc = await parser.aparse(cdc_doc)
|
||||
|
||||
if comparison_type == ComparisonTypes.CDC_ETIQUETTE:
|
||||
parsed_doc = await parser.deep_aparse(doc, llm=llm)
|
||||
else:
|
||||
parsed_doc = await parser.aparse(doc)
|
||||
|
||||
print("\n\n Cahier des Charges")
|
||||
print(parsed_cdc_doc.page_content)
|
||||
|
||||
print("\n\n Other document")
|
||||
print(parsed_doc.page_content)
|
||||
|
||||
comparison = llm_comparator(
|
||||
document=parsed_doc.page_content,
|
||||
cdc=parsed_cdc_doc.page_content,
|
||||
llm=llm,
|
||||
comparison_type=comparison_type,
|
||||
)
|
||||
|
||||
print("\n\n Comparison")
|
||||
print(comparison)
|
||||
|
||||
|
||||
def get_document_path(doc):
|
||||
try:
|
||||
with open(doc.name, "wb") as temp_file:
|
||||
temp_file.write(doc.getbuffer())
|
||||
path = temp_file.name
|
||||
except:
|
||||
path = doc
|
||||
|
||||
return path
|
||||
|
||||
|
||||
async def parse_documents(cdc_doc, doc, comparison_type: ComparisonTypes, llm):
|
||||
parser = DeadlyParser()
|
||||
|
||||
# Schedule the coroutines as tasks
|
||||
cdc_task = asyncio.create_task(parser.aparse(get_document_path(cdc_doc)))
|
||||
|
||||
if comparison_type == ComparisonTypes.CDC_ETIQUETTE:
|
||||
doc_task = asyncio.create_task(
|
||||
parser.deep_aparse(get_document_path(doc), llm=llm)
|
||||
)
|
||||
else:
|
||||
doc_task = asyncio.create_task(parser.aparse(get_document_path(doc)))
|
||||
|
||||
# Optionally, do other work here while tasks are running
|
||||
|
||||
# Await the tasks to get the results
|
||||
parsed_cdc_doc = await cdc_task
|
||||
print("\n\n Cahier de Charges: \n", parsed_cdc_doc.page_content)
|
||||
|
||||
parsed_doc = await doc_task
|
||||
print("\n\n Other doc: \n", parsed_doc.page_content)
|
||||
|
||||
return parsed_cdc_doc, parsed_doc
|
||||
|
||||
|
||||
def main():
|
||||
st.title("Document Comparison Tool : Use Case 2")
|
||||
|
||||
# File uploaders for two documents
|
||||
cdc_doc = st.file_uploader(
|
||||
"Upload Cahier des Charges", type=["docx", "xlsx", "pdf", "txt"]
|
||||
)
|
||||
doc = st.file_uploader(
|
||||
"Upload Etiquette / Fiche Dev", type=["docx", "xlsx", "pdf", "txt"]
|
||||
)
|
||||
|
||||
comparison_type = st.selectbox(
|
||||
"Select document types",
|
||||
[ComparisonTypes.CDC_ETIQUETTE.value, ComparisonTypes.CDC_FICHE_DEV.value],
|
||||
)
|
||||
|
||||
if st.button("Process Documents and Questions"):
|
||||
if not cdc_doc or not doc:
|
||||
st.error("Please upload both documents before launching the processing.")
|
||||
return
|
||||
|
||||
with st.spinner("Processing files..."):
|
||||
llm = ChatOpenAI(
|
||||
model="gpt-4o",
|
||||
temperature=0.1,
|
||||
max_tokens=None,
|
||||
max_retries=2,
|
||||
)
|
||||
|
||||
parsed_cdc_doc, parsed_doc = asyncio.run(
|
||||
parse_documents(cdc_doc, doc, comparison_type=comparison_type, llm=llm)
|
||||
)
|
||||
|
||||
comparison = llm_comparator(
|
||||
document=parsed_doc.page_content,
|
||||
cdc=parsed_cdc_doc.page_content,
|
||||
llm=llm,
|
||||
comparison_type=comparison_type,
|
||||
)
|
||||
# Run the async function using asyncio.run()
|
||||
# comparison = asyncio.run(process_documents(cdc_doc, doc, comparison_type))
|
||||
st.write_stream(comparison)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
125
backend/worker/diff-assistant/quivr_diff_assistant/main_uc3.py
Normal file
125
backend/worker/diff-assistant/quivr_diff_assistant/main_uc3.py
Normal file
@ -0,0 +1,125 @@
|
||||
import asyncio
|
||||
import os
|
||||
import tempfile
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
|
||||
import streamlit as st
|
||||
from diff_match_patch import diff_match_patch
|
||||
|
||||
# get environment variables
|
||||
from dotenv import load_dotenv
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_openai import ChatOpenAI
|
||||
from use_case_3.diff_type import DiffResult, llm_comparator
|
||||
from use_case_3.llm_reporter import redact_report
|
||||
from use_case_3.parser import DeadlyParser
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class DocumentType(Enum):
|
||||
ETIQUETTE = "etiquette"
|
||||
CAHIER_DES_CHARGES = "cdc"
|
||||
|
||||
|
||||
async def create_modification_report(
|
||||
before_file: str | Path,
|
||||
after_file: str | Path,
|
||||
type: DocumentType,
|
||||
llm: BaseChatModel,
|
||||
partition: bool = False,
|
||||
use_llm_comparator: bool = False,
|
||||
parser=DeadlyParser(),
|
||||
) -> str:
|
||||
if type == DocumentType.ETIQUETTE:
|
||||
print("parsing before file")
|
||||
before_text = parser.deep_parse(before_file, partition=partition, llm=llm)
|
||||
print("parsing after file")
|
||||
after_text = parser.deep_parse(after_file, partition=partition, llm=llm)
|
||||
elif type == DocumentType.CAHIER_DES_CHARGES:
|
||||
before_text = await parser.aparse(before_file)
|
||||
after_text = await parser.aparse(after_file)
|
||||
|
||||
print(before_text.page_content)
|
||||
print(after_text.page_content)
|
||||
text_after_sections = before_text.page_content.split("\n# ")
|
||||
text_before_sections = after_text.page_content.split("\n# ")
|
||||
assert len(text_after_sections) == len(text_before_sections)
|
||||
|
||||
if use_llm_comparator:
|
||||
print("using llm comparator")
|
||||
return llm_comparator(
|
||||
before_text.page_content, after_text.page_content, llm=llm
|
||||
)
|
||||
print("using diff match patch")
|
||||
dmp = diff_match_patch()
|
||||
section_diffs = []
|
||||
for after_section, before_section in zip(
|
||||
text_after_sections, text_before_sections, strict=False
|
||||
):
|
||||
main_diff: list[tuple[int, str]] = dmp.diff_main(after_section, before_section)
|
||||
section_diffs.append(DiffResult(main_diff))
|
||||
|
||||
return redact_report(section_diffs, llm=llm)
|
||||
|
||||
|
||||
def save_uploaded_file(uploaded_file):
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False, suffix=os.path.splitext(uploaded_file.name)[1]
|
||||
) as tmp_file:
|
||||
tmp_file.write(uploaded_file.getvalue())
|
||||
return tmp_file.name
|
||||
|
||||
|
||||
st.title("Document Modification Report Generator : Use Case 3")
|
||||
|
||||
# File uploaders
|
||||
before_file = st.file_uploader("Upload 'Before' file", type=["pdf", "docx"])
|
||||
after_file = st.file_uploader("Upload 'After' file", type=["pdf", "docx"])
|
||||
|
||||
# Document type selector
|
||||
doc_type = st.selectbox("Select document type", ["ETIQUETTE", "CAHIER_DES_CHARGES"])
|
||||
|
||||
# Complexity of document
|
||||
complexity = st.checkbox("Complex document (lot of text of OCRise)")
|
||||
|
||||
# Process button
|
||||
if st.button("Process"):
|
||||
if before_file and after_file:
|
||||
with st.spinner("Processing files..."):
|
||||
# Save uploaded files
|
||||
before_path = save_uploaded_file(before_file)
|
||||
after_path = save_uploaded_file(after_file)
|
||||
|
||||
# Initialize LLM
|
||||
openai_gpt4o = ChatOpenAI(
|
||||
model="gpt-4o",
|
||||
temperature=0,
|
||||
max_tokens=None,
|
||||
max_retries=2,
|
||||
)
|
||||
use_llm_comparator = True if doc_type == "ETIQUETTE" else False
|
||||
|
||||
# Generate report
|
||||
print("generating report")
|
||||
report = asyncio.run(
|
||||
create_modification_report(
|
||||
before_path,
|
||||
after_path,
|
||||
DocumentType[doc_type],
|
||||
openai_gpt4o,
|
||||
partition=complexity,
|
||||
use_llm_comparator=use_llm_comparator,
|
||||
)
|
||||
)
|
||||
print("report generated")
|
||||
# Display results
|
||||
st.subheader("Modification Report")
|
||||
st.write(report)
|
||||
|
||||
# Clean up temporary files
|
||||
os.unlink(before_path)
|
||||
os.unlink(after_path)
|
||||
else:
|
||||
st.error("Please upload both 'Before' and 'After' files.")
|
@ -0,0 +1,59 @@
|
||||
# from langchain_openai import OpenAIEmbeddings
|
||||
# from rich.console import Console
|
||||
# from rich.panel import Panel
|
||||
# from rich.prompt import Prompt
|
||||
|
||||
# from quivr_core import Brain
|
||||
# from quivr_core.config import LLMEndpointConfig
|
||||
# from quivr_core.llm.llm_endpoint import LLMEndpoint
|
||||
# from quivr_core.quivr_rag import QuivrQARAG
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# brain_1 = Brain.from_files(
|
||||
# name="cdc_brain",
|
||||
# file_paths=["data/cdc/Cas2-1-3_Entremets_rond_vanille_pecan_individuel.docx"],
|
||||
# llm=LLMEndpoint.from_config(
|
||||
# LLMEndpointConfig(model="gpt-4o-mini", temperature=0.0)
|
||||
# ),
|
||||
# embedder=OpenAIEmbeddings(),
|
||||
# )
|
||||
|
||||
# brain_2 = Brain.from_files(
|
||||
# name="etiquette_brain",
|
||||
# file_paths=[
|
||||
# "data/fiche_dev_produit/Cas2-1-3_Entremets_rond_vanille_pecan_individuel.xlsx"
|
||||
# ],
|
||||
# llm=LLMEndpoint.from_config(
|
||||
# LLMEndpointConfig(model="gpt-4o-mini", temperature=0.0)
|
||||
# ),
|
||||
# embedder=OpenAIEmbeddings(),
|
||||
# )
|
||||
|
||||
# # Check brain info
|
||||
# brain_1.print_info()
|
||||
# brain_2.print_info()
|
||||
|
||||
# console = Console()
|
||||
# console.print(Panel.fit("Ask what to compare : ", style="bold magenta"))
|
||||
|
||||
# while True:
|
||||
# # Get user input
|
||||
# section = Prompt.ask("[bold cyan]Section[/bold cyan]")
|
||||
|
||||
# # Check if user wants to exit
|
||||
# if section.lower() == "exit":
|
||||
# console.print(Panel("Goodbye!", style="bold yellow"))
|
||||
# break
|
||||
|
||||
# question = (
|
||||
# f"Quelle est/sont le(s) {section} ? Answer only with exact text citation."
|
||||
# )
|
||||
# response_1 = brain_1.ask(question)
|
||||
# response_2 = brain_2.ask(question, rag_pipeline=QuivrQARAG)
|
||||
# # Print the answer with typing effect
|
||||
# console.print(f"[bold green]Quivr CDC[/bold green]: {response_1.answer}")
|
||||
# console.print()
|
||||
# console.print(f"[bold blue]Quivr Fiche Dev[/bold blue]: {response_2.answer}")
|
||||
|
||||
# console.print("-" * console.width)
|
@ -0,0 +1,105 @@
|
||||
from typing import List, Tuple
|
||||
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
|
||||
DIFF_PROMPT = PromptTemplate.from_template(
|
||||
template="""
|
||||
You need to compare two texts and report all the differences. Your job is to parse these differences and create a clear, concise report. \
|
||||
Organize the report by sections and provide a detailed explanation of each difference. \
|
||||
Be specific on difference, it will be reviewed and verified by a Quality engineer.
|
||||
Here are the different sections of the report:
|
||||
* Dénominations, comprenant:
|
||||
* dénomination légale: nom du produit tel qu’il est défini par la réglementation, \
|
||||
en général cela inclut aussi avec des information sur son état (cuite, cru, gelé ... )
|
||||
* dénomination commercial: nom du produit tel qu’il est vendu au consommateur
|
||||
* Ingrédients et allergènes, comprenant:
|
||||
* liste d’ingrédients
|
||||
* traces d’allergènes
|
||||
* Une sous-section pour chaque sous produit si il y a lieu;
|
||||
* Eléments de traçabilité, comprenant:
|
||||
* le code-barre EAN
|
||||
* le code article
|
||||
* DDM - date de durabilité minimale
|
||||
* numéro de lot
|
||||
* date de fabrication
|
||||
* adresse de l'entreprise
|
||||
* Conseils d’utilisation / de manipulation produit, comprenant :
|
||||
* Conditions de remise en oeuvre
|
||||
* Durée de vie
|
||||
* Conditions de transport
|
||||
* Conditions de conservation : « A conserver à -18°C / Ne pas recongeler un produit décongeler »
|
||||
* Temps de decongelation
|
||||
* Temperature de prechauffage
|
||||
* Poids du produit
|
||||
* Valeurs / informations nutritionnelles
|
||||
* Autres
|
||||
|
||||
Notes:
|
||||
-> Coup de Pates: Tradition & Innovation, est l'entreprise productrice / marque du produit.
|
||||
|
||||
Chaque sections doivent être organisées comme suit et séparées par des lignes entre chaque avant et après:
|
||||
|
||||
## section_name
|
||||
|
||||
**Avant** : ...
|
||||
|
||||
**Après** : ...
|
||||
|
||||
**Modifications**:
|
||||
* ...
|
||||
* ...
|
||||
|
||||
|
||||
-----TEXT BEFORE MODIFICATION-----
|
||||
{before_text}
|
||||
-----TEXT AFTER MODIFICATION-----
|
||||
{after_text}
|
||||
|
||||
The report should be written in a professional and formal tone and in French.
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
class DiffResult:
|
||||
def __init__(self, diffs: List[Tuple[int, str]]) -> None:
|
||||
self.diffs = diffs
|
||||
|
||||
def remove_dummy_diffs(self) -> None:
|
||||
cleaned_diff = []
|
||||
for cat, content in self.diffs:
|
||||
if content.strip() and content != "\n":
|
||||
cleaned_diff.append((cat, content))
|
||||
|
||||
self.diffs = cleaned_diff
|
||||
|
||||
def format_diffs(self) -> str:
|
||||
text_modified = ""
|
||||
|
||||
sub_stack = 0
|
||||
for op, data in self.diffs:
|
||||
if op == 0:
|
||||
text_modified += data if sub_stack == 0 else f"_]] {data}"
|
||||
elif op == -1:
|
||||
if sub_stack == 0:
|
||||
text_modified += f"[[{data}->"
|
||||
sub_stack += 1
|
||||
else:
|
||||
text_modified += f"{data}->"
|
||||
elif op == 1:
|
||||
if sub_stack > 0:
|
||||
text_modified += f"{data}]]"
|
||||
sub_stack -= 1
|
||||
else:
|
||||
text_modified += f"[[ _ ->{data}]]"
|
||||
|
||||
return text_modified
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.format_diffs()
|
||||
|
||||
|
||||
def llm_comparator(before_text: str, after_text: str, llm: BaseChatModel) -> str:
|
||||
chain = DIFF_PROMPT | llm
|
||||
result = chain.invoke({"before_text": before_text, "after_text": after_text})
|
||||
return str(result.content)
|
@ -0,0 +1,74 @@
|
||||
from typing import List
|
||||
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
|
||||
from quivr_diff_assistant.use_case_3.diff_type import DiffResult
|
||||
|
||||
REPORT_PROMPT = PromptTemplate.from_template(
|
||||
template="""You are tasked with analyzing and reporting differences in text for a Quality engineer. The input text contains differences marked with special tokens. Your job is to parse these differences and create a clear, concise report.
|
||||
|
||||
Here is the text containing the differences:
|
||||
|
||||
<diff_text>
|
||||
{text_modified}
|
||||
</diff_text>
|
||||
|
||||
RULE #1 : If there are no [[->]] tokens, it indicates no changes to report, inventing changes means death.
|
||||
The differences are marked using the following format:
|
||||
- [[before->after]] indicates a change from "before" to "after"
|
||||
- If there is no "before" text, it indicates an addition
|
||||
- If there is no "after" text, it indicates a deletion
|
||||
- If there is no [[ ]] token, it indicates no changes to report
|
||||
- Make sense of the difference and do not keep the '[' in the report.
|
||||
- "_" alone means empty.
|
||||
|
||||
Follow these steps to create your report:
|
||||
|
||||
1. Carefully read through the entire text.
|
||||
2. Identify each instance of [[ ]] tokens.
|
||||
3. For each instance, determine the modification that was made.
|
||||
Present your report in the following markdown format:
|
||||
|
||||
# Title (Difference Report)
|
||||
## Section Name
|
||||
### Subsection Name (if applicable)
|
||||
* Original: Original text
|
||||
* Modified: Modified text
|
||||
* Changes:
|
||||
* Change 1
|
||||
* Change 2
|
||||
* Change 3
|
||||
|
||||
Avoid repetitive infos, only report the changes.
|
||||
Keep the checkbox when possible and compare the correct check box.
|
||||
|
||||
|
||||
Every modification should be clearly stated with the original text and the modified text.
|
||||
Note that there might be no modifications in some sections. In that case, simply return nothing.
|
||||
Try to make the report as clear and concise as possible, a point for each modification found with details, avoid big comparisons.
|
||||
|
||||
|
||||
Remember, your goal is to create a clear and concise report that allows the Quality engineer to quickly verify the differences. Focus on accuracy and readability in your output, give every indication possible to make it easier to find the modification.
|
||||
The report should be written in a professional and formal tone and in French.""",
|
||||
)
|
||||
|
||||
|
||||
def redact_report(difference_per_section: List[DiffResult], llm: BaseChatModel) -> str:
|
||||
report_per_section = []
|
||||
combined_diffs = ""
|
||||
for section in difference_per_section:
|
||||
if len(section.diffs) == 1 and section.diffs[0][0] == 0:
|
||||
print("No differences found in this section.")
|
||||
continue
|
||||
combined_diffs += str(section)
|
||||
|
||||
chain = REPORT_PROMPT | llm
|
||||
result = chain.invoke({"text_modified": str(combined_diffs)})
|
||||
report_per_section.append(result.content)
|
||||
|
||||
report_text = ""
|
||||
|
||||
for rep in report_per_section:
|
||||
report_text += "\n".join(rep.split("\n")[1:-1]) + "\n\n"
|
||||
return report_text
|
@ -0,0 +1,287 @@
|
||||
"""
|
||||
All of this needs to be in MegaParse, this is just a placeholder for now.
|
||||
"""
|
||||
|
||||
import base64
|
||||
from typing import List
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from doctr.io import DocumentFile
|
||||
from doctr.io.elements import Document as doctrDocument
|
||||
from doctr.models import ocr_predictor
|
||||
from doctr.models.predictor.pytorch import OCRPredictor
|
||||
from doctr.utils.common_types import AbstractFile
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.messages import HumanMessage
|
||||
from megaparse import MegaParse # FIXME: @chloedia Version problems
|
||||
from quivr_api.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
"""
|
||||
This needs to be in megaparse @chloedia
|
||||
"""
|
||||
|
||||
|
||||
class DeadlyParser:
|
||||
def __init__(self):
|
||||
self.predictor: OCRPredictor = ocr_predictor(
|
||||
pretrained=True, det_arch="fast_base", reco_arch="crnn_vgg16_bn"
|
||||
)
|
||||
|
||||
async def deep_aparse(
|
||||
self,
|
||||
file: AbstractFile,
|
||||
partition: bool = False,
|
||||
llm: BaseChatModel | None = None,
|
||||
) -> Document:
|
||||
"""
|
||||
Parse the OCR output from the input file and return the extracted text.
|
||||
"""
|
||||
try:
|
||||
docs = DocumentFile.from_pdf(file, scale=int(500 / 72))
|
||||
if partition:
|
||||
cropped_image = crop_to_content(docs[0])
|
||||
# cv2.imshow("cropped", cropped_image)
|
||||
# cv2.waitKey(0) # Wait for a key press
|
||||
|
||||
docs = split_image(cropped_image)
|
||||
# for i, sub_image in enumerate(docs):
|
||||
# cv2.imshow(f"sub_image_{i}", sub_image)
|
||||
# cv2.waitKey(0) # Wait for a key press
|
||||
# cv2.destroyAllWindows()
|
||||
|
||||
print("ocr start")
|
||||
raw_results: doctrDocument = self.predictor(docs)
|
||||
print("ocr done")
|
||||
if llm:
|
||||
entire_content = ""
|
||||
print("ocr llm start")
|
||||
for raw_result, img in zip(raw_results.pages, docs, strict=False):
|
||||
if raw_result.render() == "":
|
||||
continue
|
||||
_, buffer = cv2.imencode(".png", img)
|
||||
img_str64 = base64.b64encode(buffer.tobytes()).decode("utf-8")
|
||||
|
||||
processed_result = llm.invoke(
|
||||
[
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "text",
|
||||
"text": f"Can you correct this entire text retranscription, respond only with the corrected transcription: {raw_result.render()},\n\n do not transcribe logos or images.",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{img_str64}",
|
||||
"detail": "auto",
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
assert isinstance(
|
||||
processed_result.content, str
|
||||
), "The LVM did not return a string"
|
||||
entire_content += processed_result.content
|
||||
print("ocr llm done")
|
||||
return Document(page_content=entire_content)
|
||||
|
||||
return Document(page_content=raw_results.render())
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return Document(page_content=raw_results.render())
|
||||
|
||||
def deep_parse(
|
||||
self,
|
||||
file: AbstractFile,
|
||||
partition: bool = False,
|
||||
llm: BaseChatModel | None = None,
|
||||
) -> Document:
|
||||
"""
|
||||
Parse the OCR output from the input file and return the extracted text.
|
||||
"""
|
||||
try:
|
||||
logger.info("Starting document processing")
|
||||
|
||||
# Reduce image scale to lower memory usage
|
||||
docs = DocumentFile.from_pdf(file, scale=int(300 / 72))
|
||||
logger.info("Document loaded")
|
||||
|
||||
if partition:
|
||||
logger.info("Partitioning document")
|
||||
cropped_image = crop_to_content(docs[0])
|
||||
docs = split_image(cropped_image)
|
||||
|
||||
logger.info("Starting OCR")
|
||||
raw_results: doctrDocument = self.predictor(docs)
|
||||
logger.info("OCR completed")
|
||||
|
||||
if llm:
|
||||
entire_content = ""
|
||||
logger.info("Starting LLM processing")
|
||||
for i, (raw_result, img) in enumerate(
|
||||
zip(raw_results.pages, docs, strict=False)
|
||||
):
|
||||
if raw_result.render() == "":
|
||||
continue
|
||||
_, buffer = cv2.imencode(".png", img)
|
||||
img_str64 = base64.b64encode(buffer.tobytes()).decode("utf-8")
|
||||
|
||||
processed_result = llm.invoke(
|
||||
[
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "text",
|
||||
"text": f"Can you correct this entire text retranscription, respond only with the corrected transcription: {raw_result.render()},\n\n do not transcribe logos or images.",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{img_str64}",
|
||||
"detail": "auto",
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
assert isinstance(
|
||||
processed_result.content, str
|
||||
), "The LLM did not return a string"
|
||||
entire_content += processed_result.content
|
||||
logger.info("LLM processing completed")
|
||||
return Document(page_content=entire_content)
|
||||
|
||||
return Document(page_content=raw_results.render())
|
||||
except Exception as e:
|
||||
logger.error(f"Error in deep_parse: {str(e)}", exc_info=True)
|
||||
raise
|
||||
|
||||
def parse(self, file_path) -> Document:
|
||||
"""
|
||||
Parse with megaparse
|
||||
"""
|
||||
mp = MegaParse(file_path)
|
||||
return mp.load()
|
||||
|
||||
async def aparse(self, file_path) -> Document:
|
||||
"""
|
||||
Parse with megaparse
|
||||
"""
|
||||
mp = MegaParse(file_path)
|
||||
return await mp.aload()
|
||||
# except:
|
||||
# reader = SimpleDirectoryReader(input_files=[file_path])
|
||||
# docs = reader.load_data()
|
||||
# for doc in docs:
|
||||
# print(doc)
|
||||
# pause
|
||||
# return "".join([doc.text for doc in docs])
|
||||
|
||||
|
||||
# FIXME: When time @chloedia optimize this function and discount random points on the scan
|
||||
def crop_to_content(image: np.ndarray) -> np.ndarray:
|
||||
"""Crop the image to the text area."""
|
||||
# Convert to grayscale
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
|
||||
|
||||
# Apply threshold to get image with only black and white
|
||||
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
||||
|
||||
# Create rectangular kernel for dilation
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
||||
|
||||
# Dilate to connect text into blocks
|
||||
dilated = cv2.dilate(thresh, kernel, iterations=5)
|
||||
|
||||
# Find contours
|
||||
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
if contours:
|
||||
# Find the bounding rectangles of all contours
|
||||
bounding_rects = [cv2.boundingRect(c) for c in contours]
|
||||
|
||||
# Combine all bounding rectangles
|
||||
x = min(rect[0] for rect in bounding_rects)
|
||||
y = min(rect[1] for rect in bounding_rects)
|
||||
max_x = max(rect[0] + rect[2] for rect in bounding_rects)
|
||||
max_y = max(rect[1] + rect[3] for rect in bounding_rects)
|
||||
w = max_x - x
|
||||
h = max_y - y
|
||||
|
||||
# Add padding
|
||||
padding = 10
|
||||
x = max(0, x - padding)
|
||||
y = max(0, y - padding)
|
||||
w = min(image.shape[1] - x, w + 2 * padding)
|
||||
h = min(image.shape[0] - y, h + 2 * padding)
|
||||
|
||||
# Crop the image
|
||||
return image[y : y + h, x : x + w]
|
||||
else:
|
||||
return image
|
||||
|
||||
|
||||
# FIXME: When time @chloedia optimize this function
|
||||
def split_image(image: np.ndarray) -> List[np.ndarray]:
|
||||
"""Split the image into 4 parts along the y-axis, avoiding splitting letters."""
|
||||
if len(image.shape) == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
gray = image
|
||||
|
||||
# Apply threshold
|
||||
_, thresh = cv2.threshold(
|
||||
gray, 250, 255, cv2.THRESH_BINARY
|
||||
) # Adjust threshold for white pixels
|
||||
|
||||
# Find horizontal projection
|
||||
h_proj = np.sum(thresh, axis=1)
|
||||
|
||||
# Calculate the ideal height for each part
|
||||
total_height = image.shape[0]
|
||||
ideal_height = total_height // 4
|
||||
|
||||
sub_images = []
|
||||
start = 0
|
||||
|
||||
for i in range(3): # We'll make 3 cuts to create 4 parts
|
||||
target_end = (i + 1) * ideal_height
|
||||
|
||||
# Look for the best cut point around the target end
|
||||
best_cut = target_end
|
||||
max_whitespace = 0
|
||||
|
||||
search_start = max(target_end - ideal_height // 2, 0)
|
||||
search_end = min(target_end + ideal_height // 2, total_height)
|
||||
|
||||
for j in range(search_start, search_end):
|
||||
# Check for a continuous white line
|
||||
if np.all(thresh[j, :] == 255):
|
||||
whitespace = np.sum(
|
||||
h_proj[max(0, j - 5) : min(total_height, j + 6)]
|
||||
== 255 * image.shape[1]
|
||||
)
|
||||
if whitespace > max_whitespace:
|
||||
max_whitespace = whitespace
|
||||
best_cut = j
|
||||
|
||||
# If no suitable white line is found, use the target end
|
||||
if max_whitespace == 0:
|
||||
best_cut = target_end
|
||||
|
||||
# Make the cut
|
||||
sub_images.append(image[start:best_cut, :])
|
||||
start = best_cut
|
||||
|
||||
# Add the last part
|
||||
sub_images.append(image[start:, :])
|
||||
|
||||
return sub_images
|
@ -0,0 +1,91 @@
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
|
||||
COMPARISON_PROMPT = PromptTemplate.from_template(
|
||||
template="""
|
||||
You are provided with two texts <text 1> and <text 2>. You need to consider the information contained in \
|
||||
<text 1> and compare it with the corresponding information contained in <text 2>. \
|
||||
Keep in mind that <text 2> contains non-relevant information for this task, and that in <text 2> you \
|
||||
should only focus on the information correspnding to the information contained in <text 1>. \
|
||||
You need to report all the differences between the information contained in <text 1> and <text 2>. \\
|
||||
Your job is to parse these differences and create a clear, concise report. \
|
||||
Organize the report by sections and provide a detailed explanation of each difference. \
|
||||
Be specific on difference, it will be reviewed and verified by a highly-trained quality engineer.
|
||||
Here are the different sections of the report:
|
||||
* Dénominations, comprenant:
|
||||
* dénomination légale: nom du produit tel qu’il est défini par la réglementation, \
|
||||
en général cela inclut aussi avec des information sur son état (cuite, cru, gelé ... )
|
||||
* dénomination commercial: nom du produit tel qu’il est vendu au consommateur
|
||||
* Ingrédients et allergènes (si presents dans plusieurs langues, comparer langue par langue), comprenant:
|
||||
* liste d’ingrédients
|
||||
* traces d’allergènes
|
||||
* une sous-section pour chaque sous produit si il y a lieu;
|
||||
* Eléments de traçabilité, comprenant:
|
||||
* le code-barre EAN
|
||||
* le code article
|
||||
* numéro de lot
|
||||
* date de fabrication
|
||||
* adresse de l'entreprise
|
||||
* Conseils d’utilisation / de manipulation produit, comprenant :
|
||||
* Conditions / conseils de remise en oeuvre
|
||||
* Durée de vie
|
||||
* Durée de conservation (à compter de la date de production, à température ambiante / réfrigérée)
|
||||
* DDM - date de durabilité minimale
|
||||
* Conditions de transport
|
||||
* Conditions de conservation : « A conserver à -18°C / Ne pas recongeler un produit décongeler »
|
||||
* Temps de decongelation
|
||||
* Temperature de prechauffage
|
||||
* Caractéristiques / parametres physiques produit (unité de négoce), comprenant:
|
||||
* poids de la pièce
|
||||
* dimensions de la pièce
|
||||
* poids du produit / unité de négoce (typiquement, carton)
|
||||
* dimensions du produit / unité de négoce (typiquement, carton)
|
||||
* nombre de pièces par unité de negoce (typiquement, carton) / colis
|
||||
* poids du colis / carton
|
||||
* Données palettisation / donnée technique sur palette (unité de transport)
|
||||
* hauteur palette
|
||||
* dimensions de l'unité de negoce (typiquement, carton) / colis
|
||||
* nombre de colis par couche / palette
|
||||
* Valeurs / informations nutritionnelles
|
||||
* Autres
|
||||
|
||||
Notes:
|
||||
-> Coup de Pates: Tradition & Innovation, est l'entreprise productrice / marque du produit.
|
||||
|
||||
Chaque sections doivent être organisées comme suit :
|
||||
## Section name
|
||||
**<text 1>** :
|
||||
* ...
|
||||
* ...
|
||||
|
||||
**<text 2>** : ...
|
||||
* ...
|
||||
* ...
|
||||
|
||||
**Differences**:
|
||||
* ...
|
||||
* ...
|
||||
|
||||
|
||||
Beginning of <text 1>
|
||||
{document}
|
||||
End of <text 1>
|
||||
|
||||
|
||||
Beginning of <text 2>
|
||||
{cdc}
|
||||
End of <text 2>
|
||||
|
||||
|
||||
You need to consider all the information contained in <text 1> and compare it \
|
||||
with the corresponding information contained in <text 2>.
|
||||
The report should be written in a professional and formal tone and in French \
|
||||
and it should follow the structure outlined above. If <text 1> doesn't contain a particular information, \
|
||||
then you should ignore that information for <text 2> as well and avoid reporting any differences.
|
||||
|
||||
In the report you should replace evry occurence of <text 1> with {text_1} and every occurence of <text 2> with {text_2}.
|
||||
|
||||
## Dénominations
|
||||
**{text_1}** :
|
||||
*
|
||||
"""
|
||||
)
|
760
backend/worker/diff-assistant/requirements-dev.lock
Normal file
760
backend/worker/diff-assistant/requirements-dev.lock
Normal file
@ -0,0 +1,760 @@
|
||||
# generated by rye
|
||||
# use `rye lock` or `rye sync` to update this lockfile
|
||||
#
|
||||
# last locked with the following flags:
|
||||
# pre: false
|
||||
# features: []
|
||||
# all-features: false
|
||||
# with-sources: false
|
||||
# generate-hashes: false
|
||||
# universal: false
|
||||
|
||||
-e file:.
|
||||
aiohappyeyeballs==2.4.0
|
||||
# via aiohttp
|
||||
aiohttp==3.10.5
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
aiosignal==1.3.1
|
||||
# via aiohttp
|
||||
altair==5.4.1
|
||||
# via streamlit
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
antlr4-python3-runtime==4.9.3
|
||||
# via omegaconf
|
||||
anyascii==0.3.2
|
||||
# via python-doctr
|
||||
anyio==4.4.0
|
||||
# via httpx
|
||||
# via openai
|
||||
appnope==0.1.4
|
||||
# via ipykernel
|
||||
asttokens==2.4.1
|
||||
# via stack-data
|
||||
attrs==24.2.0
|
||||
# via aiohttp
|
||||
# via jsonschema
|
||||
# via referencing
|
||||
backoff==2.2.1
|
||||
# via unstructured
|
||||
beautifulsoup4==4.12.3
|
||||
# via llama-index-readers-file
|
||||
# via unstructured
|
||||
blinker==1.8.2
|
||||
# via streamlit
|
||||
cachetools==5.5.0
|
||||
# via google-auth
|
||||
# via streamlit
|
||||
certifi==2024.7.4
|
||||
# via httpcore
|
||||
# via httpx
|
||||
# via requests
|
||||
# via unstructured-client
|
||||
cffi==1.17.1
|
||||
# via cryptography
|
||||
chardet==5.2.0
|
||||
# via unstructured
|
||||
charset-normalizer==3.3.2
|
||||
# via pdfminer-six
|
||||
# via requests
|
||||
# via unstructured-client
|
||||
click==8.1.7
|
||||
# via nltk
|
||||
# via python-oxmsg
|
||||
# via streamlit
|
||||
cobble==0.1.4
|
||||
# via mammoth
|
||||
coloredlogs==15.0.1
|
||||
# via onnxruntime
|
||||
comm==0.2.2
|
||||
# via ipykernel
|
||||
contourpy==1.2.1
|
||||
# via matplotlib
|
||||
cryptography==43.0.1
|
||||
# via pdfminer-six
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dataclasses-json==0.6.7
|
||||
# via langchain-community
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via unstructured
|
||||
# via unstructured-client
|
||||
debugpy==1.8.5
|
||||
# via ipykernel
|
||||
decorator==5.1.1
|
||||
# via ipython
|
||||
deepdiff==7.0.1
|
||||
# via unstructured-client
|
||||
defusedxml==0.7.1
|
||||
# via python-doctr
|
||||
deprecated==1.2.14
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via pikepdf
|
||||
diff-match-patch==20230430
|
||||
# via diff-assistant
|
||||
dirtyjson==1.0.8
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
distro==1.9.0
|
||||
# via openai
|
||||
docx2txt==0.8
|
||||
# via diff-assistant
|
||||
effdet==0.4.1
|
||||
# via unstructured
|
||||
emoji==2.12.1
|
||||
# via unstructured
|
||||
et-xmlfile==1.1.0
|
||||
# via openpyxl
|
||||
executing==2.0.1
|
||||
# via stack-data
|
||||
faiss-cpu==1.8.0.post1
|
||||
# via diff-assistant
|
||||
filelock==3.15.4
|
||||
# via huggingface-hub
|
||||
# via torch
|
||||
# via transformers
|
||||
filetype==1.2.0
|
||||
# via unstructured
|
||||
fire==0.6.0
|
||||
# via pdf2docx
|
||||
flatbuffers==24.3.25
|
||||
# via onnxruntime
|
||||
fonttools==4.53.1
|
||||
# via matplotlib
|
||||
# via pdf2docx
|
||||
frozenlist==1.4.1
|
||||
# via aiohttp
|
||||
# via aiosignal
|
||||
fsspec==2024.6.1
|
||||
# via huggingface-hub
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via torch
|
||||
gitdb==4.0.11
|
||||
# via gitpython
|
||||
gitpython==3.1.43
|
||||
# via streamlit
|
||||
google-api-core==2.19.2
|
||||
# via google-cloud-vision
|
||||
google-auth==2.34.0
|
||||
# via google-api-core
|
||||
# via google-cloud-vision
|
||||
google-cloud-vision==3.7.4
|
||||
# via unstructured
|
||||
googleapis-common-protos==1.65.0
|
||||
# via google-api-core
|
||||
# via grpcio-status
|
||||
greenlet==3.0.3
|
||||
# via sqlalchemy
|
||||
grpcio==1.66.1
|
||||
# via google-api-core
|
||||
# via grpcio-status
|
||||
grpcio-status==1.66.1
|
||||
# via google-api-core
|
||||
h11==0.14.0
|
||||
# via httpcore
|
||||
h5py==3.11.0
|
||||
# via python-doctr
|
||||
httpcore==1.0.5
|
||||
# via httpx
|
||||
httpx==0.27.0
|
||||
# via langsmith
|
||||
# via llama-cloud
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via openai
|
||||
# via unstructured-client
|
||||
huggingface-hub==0.24.6
|
||||
# via python-doctr
|
||||
# via timm
|
||||
# via tokenizers
|
||||
# via transformers
|
||||
# via unstructured-inference
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
idna==3.7
|
||||
# via anyio
|
||||
# via httpx
|
||||
# via requests
|
||||
# via unstructured-client
|
||||
# via yarl
|
||||
iniconfig==2.0.0
|
||||
# via pytest
|
||||
iopath==0.1.10
|
||||
# via layoutparser
|
||||
ipykernel==6.29.5
|
||||
# via diff-assistant
|
||||
ipython==8.26.0
|
||||
# via ipykernel
|
||||
jedi==0.19.1
|
||||
# via ipython
|
||||
jinja2==3.1.4
|
||||
# via altair
|
||||
# via pydeck
|
||||
# via torch
|
||||
jiter==0.5.0
|
||||
# via openai
|
||||
joblib==1.4.2
|
||||
# via nltk
|
||||
# via scikit-learn
|
||||
jsonpatch==1.33
|
||||
# via langchain-core
|
||||
jsonpath-python==1.0.6
|
||||
# via unstructured-client
|
||||
jsonpointer==3.0.0
|
||||
# via jsonpatch
|
||||
jsonschema==4.23.0
|
||||
# via altair
|
||||
jsonschema-specifications==2023.12.1
|
||||
# via jsonschema
|
||||
jupyter-client==8.6.2
|
||||
# via ipykernel
|
||||
jupyter-core==5.7.2
|
||||
# via ipykernel
|
||||
# via jupyter-client
|
||||
kiwisolver==1.4.5
|
||||
# via matplotlib
|
||||
langchain==0.2.16
|
||||
# via diff-assistant
|
||||
# via langchain-community
|
||||
# via megaparse
|
||||
langchain-community==0.2.16
|
||||
# via megaparse
|
||||
langchain-core==0.2.39
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-openai
|
||||
# via langchain-text-splitters
|
||||
# via megaparse
|
||||
langchain-openai==0.1.24
|
||||
# via diff-assistant
|
||||
# via megaparse
|
||||
langchain-text-splitters==0.2.4
|
||||
# via langchain
|
||||
langdetect==1.0.9
|
||||
# via python-doctr
|
||||
# via unstructured
|
||||
langsmith==0.1.118
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
layoutparser==0.3.4
|
||||
# via unstructured-inference
|
||||
llama-cloud==0.0.17
|
||||
# via llama-index-indices-managed-llama-cloud
|
||||
llama-index==0.11.8
|
||||
# via diff-assistant
|
||||
# via megaparse
|
||||
llama-index-agent-openai==0.3.1
|
||||
# via llama-index
|
||||
# via llama-index-llms-openai
|
||||
# via llama-index-program-openai
|
||||
llama-index-cli==0.3.1
|
||||
# via llama-index
|
||||
llama-index-core==0.11.8
|
||||
# via llama-index
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-cli
|
||||
# via llama-index-embeddings-openai
|
||||
# via llama-index-indices-managed-llama-cloud
|
||||
# via llama-index-llms-openai
|
||||
# via llama-index-multi-modal-llms-openai
|
||||
# via llama-index-program-openai
|
||||
# via llama-index-question-gen-openai
|
||||
# via llama-index-readers-file
|
||||
# via llama-index-readers-llama-parse
|
||||
# via llama-parse
|
||||
llama-index-embeddings-openai==0.2.4
|
||||
# via llama-index
|
||||
# via llama-index-cli
|
||||
llama-index-indices-managed-llama-cloud==0.3.0
|
||||
# via llama-index
|
||||
llama-index-legacy==0.9.48.post3
|
||||
# via llama-index
|
||||
llama-index-llms-openai==0.2.3
|
||||
# via diff-assistant
|
||||
# via llama-index
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-cli
|
||||
# via llama-index-multi-modal-llms-openai
|
||||
# via llama-index-program-openai
|
||||
# via llama-index-question-gen-openai
|
||||
llama-index-multi-modal-llms-openai==0.2.0
|
||||
# via llama-index
|
||||
llama-index-program-openai==0.2.0
|
||||
# via llama-index
|
||||
# via llama-index-question-gen-openai
|
||||
llama-index-question-gen-openai==0.2.0
|
||||
# via llama-index
|
||||
llama-index-readers-file==0.2.1
|
||||
# via diff-assistant
|
||||
# via llama-index
|
||||
llama-index-readers-llama-parse==0.3.0
|
||||
# via llama-index
|
||||
llama-parse==0.5.3
|
||||
# via llama-index-readers-llama-parse
|
||||
# via megaparse
|
||||
llvmlite==0.43.0
|
||||
# via numba
|
||||
lxml==5.3.0
|
||||
# via pikepdf
|
||||
# via python-docx
|
||||
# via python-pptx
|
||||
# via unstructured
|
||||
mammoth==1.8.0
|
||||
# via megaparse
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==2.1.5
|
||||
# via jinja2
|
||||
marshmallow==3.22.0
|
||||
# via dataclasses-json
|
||||
# via unstructured-client
|
||||
matplotlib==3.9.2
|
||||
# via diff-assistant
|
||||
# via mplcursors
|
||||
# via pycocotools
|
||||
# via unstructured-inference
|
||||
matplotlib-inline==0.1.7
|
||||
# via ipykernel
|
||||
# via ipython
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
megaparse==0.0.31
|
||||
# via diff-assistant
|
||||
mplcursors==0.5.3
|
||||
# via diff-assistant
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
multidict==6.0.5
|
||||
# via aiohttp
|
||||
# via yarl
|
||||
mypy-extensions==1.0.0
|
||||
# via typing-inspect
|
||||
# via unstructured-client
|
||||
narwhals==1.6.2
|
||||
# via altair
|
||||
nest-asyncio==1.6.0
|
||||
# via ipykernel
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via unstructured-client
|
||||
networkx==3.3
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via torch
|
||||
nltk==3.9.1
|
||||
# via llama-index
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via unstructured
|
||||
numba==0.60.0
|
||||
# via diff-assistant
|
||||
numpy==1.26.4
|
||||
# via contourpy
|
||||
# via diff-assistant
|
||||
# via faiss-cpu
|
||||
# via h5py
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via layoutparser
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via matplotlib
|
||||
# via numba
|
||||
# via onnx
|
||||
# via onnxruntime
|
||||
# via opencv-python
|
||||
# via opencv-python-headless
|
||||
# via pandas
|
||||
# via pdf2docx
|
||||
# via pyarrow
|
||||
# via pycocotools
|
||||
# via pydeck
|
||||
# via python-doctr
|
||||
# via scikit-learn
|
||||
# via scipy
|
||||
# via shapely
|
||||
# via streamlit
|
||||
# via torchvision
|
||||
# via transformers
|
||||
# via unstructured
|
||||
olefile==0.47
|
||||
# via python-oxmsg
|
||||
omegaconf==2.3.0
|
||||
# via effdet
|
||||
onnx==1.16.2
|
||||
# via python-doctr
|
||||
# via unstructured
|
||||
# via unstructured-inference
|
||||
onnxruntime==1.19.2
|
||||
# via unstructured-inference
|
||||
openai==1.44.1
|
||||
# via diff-assistant
|
||||
# via langchain-openai
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-embeddings-openai
|
||||
# via llama-index-legacy
|
||||
# via llama-index-llms-openai
|
||||
opencv-python==4.10.0.84
|
||||
# via diff-assistant
|
||||
# via layoutparser
|
||||
# via python-doctr
|
||||
# via unstructured-inference
|
||||
opencv-python-headless==4.10.0.84
|
||||
# via pdf2docx
|
||||
openpyxl==3.1.5
|
||||
# via diff-assistant
|
||||
ordered-set==4.1.0
|
||||
# via deepdiff
|
||||
orjson==3.10.7
|
||||
# via langsmith
|
||||
packaging==24.1
|
||||
# via altair
|
||||
# via faiss-cpu
|
||||
# via huggingface-hub
|
||||
# via ipykernel
|
||||
# via langchain-core
|
||||
# via marshmallow
|
||||
# via matplotlib
|
||||
# via onnxruntime
|
||||
# via pikepdf
|
||||
# via pytest
|
||||
# via streamlit
|
||||
# via transformers
|
||||
# via unstructured-client
|
||||
# via unstructured-pytesseract
|
||||
pandas==2.2.2
|
||||
# via diff-assistant
|
||||
# via layoutparser
|
||||
# via llama-index-legacy
|
||||
# via llama-index-readers-file
|
||||
# via streamlit
|
||||
parso==0.8.4
|
||||
# via jedi
|
||||
pdf2docx==0.5.8
|
||||
# via megaparse
|
||||
pdf2image==1.17.0
|
||||
# via layoutparser
|
||||
# via unstructured
|
||||
pdfminer-six==20231228
|
||||
# via pdfplumber
|
||||
# via unstructured
|
||||
pdfplumber==0.11.4
|
||||
# via layoutparser
|
||||
# via megaparse
|
||||
pexpect==4.9.0
|
||||
# via ipython
|
||||
pi-heif==0.18.0
|
||||
# via unstructured
|
||||
pikepdf==9.2.1
|
||||
# via unstructured
|
||||
pillow==10.4.0
|
||||
# via layoutparser
|
||||
# via llama-index-core
|
||||
# via matplotlib
|
||||
# via pdf2image
|
||||
# via pdfplumber
|
||||
# via pi-heif
|
||||
# via pikepdf
|
||||
# via python-doctr
|
||||
# via python-pptx
|
||||
# via streamlit
|
||||
# via torchvision
|
||||
# via unstructured-pytesseract
|
||||
platformdirs==4.2.2
|
||||
# via jupyter-core
|
||||
pluggy==1.5.0
|
||||
# via pytest
|
||||
portalocker==2.10.1
|
||||
# via iopath
|
||||
prompt-toolkit==3.0.47
|
||||
# via ipython
|
||||
proto-plus==1.24.0
|
||||
# via google-api-core
|
||||
# via google-cloud-vision
|
||||
protobuf==5.27.3
|
||||
# via google-api-core
|
||||
# via google-cloud-vision
|
||||
# via googleapis-common-protos
|
||||
# via grpcio-status
|
||||
# via onnx
|
||||
# via onnxruntime
|
||||
# via proto-plus
|
||||
# via streamlit
|
||||
psutil==6.0.0
|
||||
# via ipykernel
|
||||
# via unstructured
|
||||
ptyprocess==0.7.0
|
||||
# via pexpect
|
||||
pure-eval==0.2.3
|
||||
# via stack-data
|
||||
pyarrow==17.0.0
|
||||
# via streamlit
|
||||
pyasn1==0.6.1
|
||||
# via pyasn1-modules
|
||||
# via rsa
|
||||
pyasn1-modules==0.4.1
|
||||
# via google-auth
|
||||
pyclipper==1.3.0.post5
|
||||
# via python-doctr
|
||||
pycocotools==2.0.8
|
||||
# via effdet
|
||||
pycparser==2.22
|
||||
# via cffi
|
||||
pycryptodome==3.20.0
|
||||
# via megaparse
|
||||
pydantic==2.8.2
|
||||
# via langchain
|
||||
# via langchain-core
|
||||
# via langsmith
|
||||
# via llama-cloud
|
||||
# via llama-index-core
|
||||
# via openai
|
||||
pydantic-core==2.20.1
|
||||
# via pydantic
|
||||
pydeck==0.9.1
|
||||
# via streamlit
|
||||
pygments==2.18.0
|
||||
# via ipython
|
||||
# via rich
|
||||
pymupdf==1.24.10
|
||||
# via pdf2docx
|
||||
pymupdfb==1.24.10
|
||||
# via pymupdf
|
||||
pyparsing==3.1.2
|
||||
# via matplotlib
|
||||
pypdf==4.3.1
|
||||
# via diff-assistant
|
||||
# via llama-index-readers-file
|
||||
# via unstructured
|
||||
# via unstructured-client
|
||||
pypdfium2==4.30.0
|
||||
# via diff-assistant
|
||||
# via pdfplumber
|
||||
# via python-doctr
|
||||
pytest==8.3.2
|
||||
python-dateutil==2.9.0.post0
|
||||
# via jupyter-client
|
||||
# via matplotlib
|
||||
# via pandas
|
||||
# via unstructured-client
|
||||
python-doctr==0.9.0
|
||||
# via diff-assistant
|
||||
python-docx==1.1.2
|
||||
# via megaparse
|
||||
# via pdf2docx
|
||||
python-dotenv==1.0.1
|
||||
# via diff-assistant
|
||||
# via megaparse
|
||||
python-iso639==2024.4.27
|
||||
# via unstructured
|
||||
python-magic==0.4.27
|
||||
# via diff-assistant
|
||||
# via unstructured
|
||||
python-multipart==0.0.9
|
||||
# via unstructured-inference
|
||||
python-oxmsg==0.0.1
|
||||
# via unstructured
|
||||
python-pptx==1.0.2
|
||||
# via megaparse
|
||||
pytz==2024.1
|
||||
# via pandas
|
||||
pyyaml==6.0.2
|
||||
# via huggingface-hub
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
# via layoutparser
|
||||
# via llama-index-core
|
||||
# via omegaconf
|
||||
# via timm
|
||||
# via transformers
|
||||
pyzmq==26.1.1
|
||||
# via ipykernel
|
||||
# via jupyter-client
|
||||
rapidfuzz==3.9.6
|
||||
# via python-doctr
|
||||
# via unstructured
|
||||
# via unstructured-inference
|
||||
referencing==0.35.1
|
||||
# via jsonschema
|
||||
# via jsonschema-specifications
|
||||
regex==2024.7.24
|
||||
# via nltk
|
||||
# via tiktoken
|
||||
# via transformers
|
||||
requests==2.32.3
|
||||
# via google-api-core
|
||||
# via huggingface-hub
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langsmith
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via requests-toolbelt
|
||||
# via streamlit
|
||||
# via tiktoken
|
||||
# via transformers
|
||||
# via unstructured
|
||||
# via unstructured-client
|
||||
requests-toolbelt==1.0.0
|
||||
# via unstructured-client
|
||||
rich==13.8.0
|
||||
# via streamlit
|
||||
rpds-py==0.20.0
|
||||
# via jsonschema
|
||||
# via referencing
|
||||
rsa==4.9
|
||||
# via google-auth
|
||||
safetensors==0.4.5
|
||||
# via timm
|
||||
# via transformers
|
||||
scikit-learn==1.5.1
|
||||
# via diff-assistant
|
||||
scipy==1.14.1
|
||||
# via layoutparser
|
||||
# via python-doctr
|
||||
# via scikit-learn
|
||||
shapely==2.0.6
|
||||
# via python-doctr
|
||||
six==1.16.0
|
||||
# via asttokens
|
||||
# via fire
|
||||
# via langdetect
|
||||
# via python-dateutil
|
||||
# via unstructured-client
|
||||
smmap==5.0.1
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
# via httpx
|
||||
# via openai
|
||||
soupsieve==2.6
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.32
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
stack-data==0.6.3
|
||||
# via ipython
|
||||
streamlit==1.38.0
|
||||
# via diff-assistant
|
||||
striprtf==0.0.26
|
||||
# via llama-index-readers-file
|
||||
sympy==1.13.2
|
||||
# via onnxruntime
|
||||
# via torch
|
||||
tabulate==0.9.0
|
||||
# via unstructured
|
||||
tenacity==8.5.0
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via streamlit
|
||||
termcolor==2.4.0
|
||||
# via fire
|
||||
threadpoolctl==3.5.0
|
||||
# via scikit-learn
|
||||
tiktoken==0.7.0
|
||||
# via langchain-openai
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
timm==1.0.9
|
||||
# via effdet
|
||||
# via unstructured-inference
|
||||
tokenizers==0.19.1
|
||||
# via transformers
|
||||
toml==0.10.2
|
||||
# via streamlit
|
||||
torch==2.3.1
|
||||
# via diff-assistant
|
||||
# via effdet
|
||||
# via python-doctr
|
||||
# via timm
|
||||
# via torchvision
|
||||
# via unstructured-inference
|
||||
torchvision==0.18.1
|
||||
# via effdet
|
||||
# via python-doctr
|
||||
# via timm
|
||||
tornado==6.4.1
|
||||
# via ipykernel
|
||||
# via jupyter-client
|
||||
# via streamlit
|
||||
tqdm==4.66.5
|
||||
# via huggingface-hub
|
||||
# via iopath
|
||||
# via llama-index-core
|
||||
# via nltk
|
||||
# via openai
|
||||
# via python-doctr
|
||||
# via transformers
|
||||
# via unstructured
|
||||
traitlets==5.14.3
|
||||
# via comm
|
||||
# via ipykernel
|
||||
# via ipython
|
||||
# via jupyter-client
|
||||
# via jupyter-core
|
||||
# via matplotlib-inline
|
||||
transformers==4.44.2
|
||||
# via unstructured-inference
|
||||
typing-extensions==4.12.2
|
||||
# via altair
|
||||
# via emoji
|
||||
# via huggingface-hub
|
||||
# via iopath
|
||||
# via ipython
|
||||
# via langchain-core
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via openai
|
||||
# via pydantic
|
||||
# via pydantic-core
|
||||
# via python-docx
|
||||
# via python-oxmsg
|
||||
# via python-pptx
|
||||
# via sqlalchemy
|
||||
# via streamlit
|
||||
# via torch
|
||||
# via typing-inspect
|
||||
# via unstructured
|
||||
# via unstructured-client
|
||||
typing-inspect==0.9.0
|
||||
# via dataclasses-json
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via unstructured-client
|
||||
tzdata==2024.1
|
||||
# via pandas
|
||||
unstructured==0.15.9
|
||||
# via diff-assistant
|
||||
# via megaparse
|
||||
unstructured-client==0.25.5
|
||||
# via unstructured
|
||||
unstructured-inference==0.7.36
|
||||
# via unstructured
|
||||
unstructured-pytesseract==0.3.13
|
||||
# via unstructured
|
||||
urllib3==2.2.2
|
||||
# via requests
|
||||
# via unstructured-client
|
||||
wcwidth==0.2.13
|
||||
# via prompt-toolkit
|
||||
wrapt==1.16.0
|
||||
# via deprecated
|
||||
# via llama-index-core
|
||||
# via unstructured
|
||||
xlsxwriter==3.2.0
|
||||
# via python-pptx
|
||||
yarl==1.9.7
|
||||
# via aiohttp
|
754
backend/worker/diff-assistant/requirements.lock
Normal file
754
backend/worker/diff-assistant/requirements.lock
Normal file
@ -0,0 +1,754 @@
|
||||
# generated by rye
|
||||
# use `rye lock` or `rye sync` to update this lockfile
|
||||
#
|
||||
# last locked with the following flags:
|
||||
# pre: false
|
||||
# features: []
|
||||
# all-features: false
|
||||
# with-sources: false
|
||||
# generate-hashes: false
|
||||
# universal: false
|
||||
|
||||
-e file:.
|
||||
aiohappyeyeballs==2.4.0
|
||||
# via aiohttp
|
||||
aiohttp==3.10.5
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
aiosignal==1.3.1
|
||||
# via aiohttp
|
||||
altair==5.4.1
|
||||
# via streamlit
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
antlr4-python3-runtime==4.9.3
|
||||
# via omegaconf
|
||||
anyascii==0.3.2
|
||||
# via python-doctr
|
||||
anyio==4.4.0
|
||||
# via httpx
|
||||
# via openai
|
||||
appnope==0.1.4
|
||||
# via ipykernel
|
||||
asttokens==2.4.1
|
||||
# via stack-data
|
||||
attrs==24.2.0
|
||||
# via aiohttp
|
||||
# via jsonschema
|
||||
# via referencing
|
||||
backoff==2.2.1
|
||||
# via unstructured
|
||||
beautifulsoup4==4.12.3
|
||||
# via llama-index-readers-file
|
||||
# via unstructured
|
||||
blinker==1.8.2
|
||||
# via streamlit
|
||||
cachetools==5.5.0
|
||||
# via google-auth
|
||||
# via streamlit
|
||||
certifi==2024.7.4
|
||||
# via httpcore
|
||||
# via httpx
|
||||
# via requests
|
||||
# via unstructured-client
|
||||
cffi==1.17.1
|
||||
# via cryptography
|
||||
chardet==5.2.0
|
||||
# via unstructured
|
||||
charset-normalizer==3.3.2
|
||||
# via pdfminer-six
|
||||
# via requests
|
||||
# via unstructured-client
|
||||
click==8.1.7
|
||||
# via nltk
|
||||
# via python-oxmsg
|
||||
# via streamlit
|
||||
cobble==0.1.4
|
||||
# via mammoth
|
||||
coloredlogs==15.0.1
|
||||
# via onnxruntime
|
||||
comm==0.2.2
|
||||
# via ipykernel
|
||||
contourpy==1.2.1
|
||||
# via matplotlib
|
||||
cryptography==43.0.1
|
||||
# via pdfminer-six
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dataclasses-json==0.6.7
|
||||
# via langchain-community
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via unstructured
|
||||
# via unstructured-client
|
||||
debugpy==1.8.5
|
||||
# via ipykernel
|
||||
decorator==5.1.1
|
||||
# via ipython
|
||||
deepdiff==7.0.1
|
||||
# via unstructured-client
|
||||
defusedxml==0.7.1
|
||||
# via python-doctr
|
||||
deprecated==1.2.14
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via pikepdf
|
||||
diff-match-patch==20230430
|
||||
# via diff-assistant
|
||||
dirtyjson==1.0.8
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
distro==1.9.0
|
||||
# via openai
|
||||
docx2txt==0.8
|
||||
# via diff-assistant
|
||||
effdet==0.4.1
|
||||
# via unstructured
|
||||
emoji==2.12.1
|
||||
# via unstructured
|
||||
et-xmlfile==1.1.0
|
||||
# via openpyxl
|
||||
executing==2.0.1
|
||||
# via stack-data
|
||||
faiss-cpu==1.8.0.post1
|
||||
# via diff-assistant
|
||||
filelock==3.15.4
|
||||
# via huggingface-hub
|
||||
# via torch
|
||||
# via transformers
|
||||
filetype==1.2.0
|
||||
# via unstructured
|
||||
fire==0.6.0
|
||||
# via pdf2docx
|
||||
flatbuffers==24.3.25
|
||||
# via onnxruntime
|
||||
fonttools==4.53.1
|
||||
# via matplotlib
|
||||
# via pdf2docx
|
||||
frozenlist==1.4.1
|
||||
# via aiohttp
|
||||
# via aiosignal
|
||||
fsspec==2024.6.1
|
||||
# via huggingface-hub
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via torch
|
||||
gitdb==4.0.11
|
||||
# via gitpython
|
||||
gitpython==3.1.43
|
||||
# via streamlit
|
||||
google-api-core==2.19.2
|
||||
# via google-cloud-vision
|
||||
google-auth==2.34.0
|
||||
# via google-api-core
|
||||
# via google-cloud-vision
|
||||
google-cloud-vision==3.7.4
|
||||
# via unstructured
|
||||
googleapis-common-protos==1.65.0
|
||||
# via google-api-core
|
||||
# via grpcio-status
|
||||
greenlet==3.0.3
|
||||
# via sqlalchemy
|
||||
grpcio==1.66.1
|
||||
# via google-api-core
|
||||
# via grpcio-status
|
||||
grpcio-status==1.66.1
|
||||
# via google-api-core
|
||||
h11==0.14.0
|
||||
# via httpcore
|
||||
h5py==3.11.0
|
||||
# via python-doctr
|
||||
httpcore==1.0.5
|
||||
# via httpx
|
||||
httpx==0.27.0
|
||||
# via langsmith
|
||||
# via llama-cloud
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via openai
|
||||
# via unstructured-client
|
||||
huggingface-hub==0.24.6
|
||||
# via python-doctr
|
||||
# via timm
|
||||
# via tokenizers
|
||||
# via transformers
|
||||
# via unstructured-inference
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
idna==3.7
|
||||
# via anyio
|
||||
# via httpx
|
||||
# via requests
|
||||
# via unstructured-client
|
||||
# via yarl
|
||||
iopath==0.1.10
|
||||
# via layoutparser
|
||||
ipykernel==6.29.5
|
||||
# via diff-assistant
|
||||
ipython==8.26.0
|
||||
# via ipykernel
|
||||
jedi==0.19.1
|
||||
# via ipython
|
||||
jinja2==3.1.4
|
||||
# via altair
|
||||
# via pydeck
|
||||
# via torch
|
||||
jiter==0.5.0
|
||||
# via openai
|
||||
joblib==1.4.2
|
||||
# via nltk
|
||||
# via scikit-learn
|
||||
jsonpatch==1.33
|
||||
# via langchain-core
|
||||
jsonpath-python==1.0.6
|
||||
# via unstructured-client
|
||||
jsonpointer==3.0.0
|
||||
# via jsonpatch
|
||||
jsonschema==4.23.0
|
||||
# via altair
|
||||
jsonschema-specifications==2023.12.1
|
||||
# via jsonschema
|
||||
jupyter-client==8.6.2
|
||||
# via ipykernel
|
||||
jupyter-core==5.7.2
|
||||
# via ipykernel
|
||||
# via jupyter-client
|
||||
kiwisolver==1.4.5
|
||||
# via matplotlib
|
||||
langchain==0.2.16
|
||||
# via diff-assistant
|
||||
# via langchain-community
|
||||
# via megaparse
|
||||
langchain-community==0.2.16
|
||||
# via megaparse
|
||||
langchain-core==0.2.39
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-openai
|
||||
# via langchain-text-splitters
|
||||
# via megaparse
|
||||
langchain-openai==0.1.24
|
||||
# via diff-assistant
|
||||
# via megaparse
|
||||
langchain-text-splitters==0.2.4
|
||||
# via langchain
|
||||
langdetect==1.0.9
|
||||
# via python-doctr
|
||||
# via unstructured
|
||||
langsmith==0.1.118
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
layoutparser==0.3.4
|
||||
# via unstructured-inference
|
||||
llama-cloud==0.0.17
|
||||
# via llama-index-indices-managed-llama-cloud
|
||||
llama-index==0.11.8
|
||||
# via diff-assistant
|
||||
# via megaparse
|
||||
llama-index-agent-openai==0.3.1
|
||||
# via llama-index
|
||||
# via llama-index-llms-openai
|
||||
# via llama-index-program-openai
|
||||
llama-index-cli==0.3.1
|
||||
# via llama-index
|
||||
llama-index-core==0.11.8
|
||||
# via llama-index
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-cli
|
||||
# via llama-index-embeddings-openai
|
||||
# via llama-index-indices-managed-llama-cloud
|
||||
# via llama-index-llms-openai
|
||||
# via llama-index-multi-modal-llms-openai
|
||||
# via llama-index-program-openai
|
||||
# via llama-index-question-gen-openai
|
||||
# via llama-index-readers-file
|
||||
# via llama-index-readers-llama-parse
|
||||
# via llama-parse
|
||||
llama-index-embeddings-openai==0.2.4
|
||||
# via llama-index
|
||||
# via llama-index-cli
|
||||
llama-index-indices-managed-llama-cloud==0.3.0
|
||||
# via llama-index
|
||||
llama-index-legacy==0.9.48.post3
|
||||
# via llama-index
|
||||
llama-index-llms-openai==0.2.3
|
||||
# via diff-assistant
|
||||
# via llama-index
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-cli
|
||||
# via llama-index-multi-modal-llms-openai
|
||||
# via llama-index-program-openai
|
||||
# via llama-index-question-gen-openai
|
||||
llama-index-multi-modal-llms-openai==0.2.0
|
||||
# via llama-index
|
||||
llama-index-program-openai==0.2.0
|
||||
# via llama-index
|
||||
# via llama-index-question-gen-openai
|
||||
llama-index-question-gen-openai==0.2.0
|
||||
# via llama-index
|
||||
llama-index-readers-file==0.2.1
|
||||
# via diff-assistant
|
||||
# via llama-index
|
||||
llama-index-readers-llama-parse==0.3.0
|
||||
# via llama-index
|
||||
llama-parse==0.5.3
|
||||
# via llama-index-readers-llama-parse
|
||||
# via megaparse
|
||||
llvmlite==0.43.0
|
||||
# via numba
|
||||
lxml==5.3.0
|
||||
# via pikepdf
|
||||
# via python-docx
|
||||
# via python-pptx
|
||||
# via unstructured
|
||||
mammoth==1.8.0
|
||||
# via megaparse
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==2.1.5
|
||||
# via jinja2
|
||||
marshmallow==3.22.0
|
||||
# via dataclasses-json
|
||||
# via unstructured-client
|
||||
matplotlib==3.9.2
|
||||
# via diff-assistant
|
||||
# via mplcursors
|
||||
# via pycocotools
|
||||
# via unstructured-inference
|
||||
matplotlib-inline==0.1.7
|
||||
# via ipykernel
|
||||
# via ipython
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
megaparse==0.0.31
|
||||
# via diff-assistant
|
||||
mplcursors==0.5.3
|
||||
# via diff-assistant
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
multidict==6.0.5
|
||||
# via aiohttp
|
||||
# via yarl
|
||||
mypy-extensions==1.0.0
|
||||
# via typing-inspect
|
||||
# via unstructured-client
|
||||
narwhals==1.6.2
|
||||
# via altair
|
||||
nest-asyncio==1.6.0
|
||||
# via ipykernel
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via unstructured-client
|
||||
networkx==3.3
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via torch
|
||||
nltk==3.9.1
|
||||
# via llama-index
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via unstructured
|
||||
numba==0.60.0
|
||||
# via diff-assistant
|
||||
numpy==1.26.4
|
||||
# via contourpy
|
||||
# via diff-assistant
|
||||
# via faiss-cpu
|
||||
# via h5py
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via layoutparser
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via matplotlib
|
||||
# via numba
|
||||
# via onnx
|
||||
# via onnxruntime
|
||||
# via opencv-python
|
||||
# via opencv-python-headless
|
||||
# via pandas
|
||||
# via pdf2docx
|
||||
# via pyarrow
|
||||
# via pycocotools
|
||||
# via pydeck
|
||||
# via python-doctr
|
||||
# via scikit-learn
|
||||
# via scipy
|
||||
# via shapely
|
||||
# via streamlit
|
||||
# via torchvision
|
||||
# via transformers
|
||||
# via unstructured
|
||||
olefile==0.47
|
||||
# via python-oxmsg
|
||||
omegaconf==2.3.0
|
||||
# via effdet
|
||||
onnx==1.16.2
|
||||
# via python-doctr
|
||||
# via unstructured
|
||||
# via unstructured-inference
|
||||
onnxruntime==1.19.2
|
||||
# via unstructured-inference
|
||||
openai==1.44.1
|
||||
# via diff-assistant
|
||||
# via langchain-openai
|
||||
# via llama-index-agent-openai
|
||||
# via llama-index-embeddings-openai
|
||||
# via llama-index-legacy
|
||||
# via llama-index-llms-openai
|
||||
opencv-python==4.10.0.84
|
||||
# via diff-assistant
|
||||
# via layoutparser
|
||||
# via python-doctr
|
||||
# via unstructured-inference
|
||||
opencv-python-headless==4.10.0.84
|
||||
# via pdf2docx
|
||||
openpyxl==3.1.5
|
||||
# via diff-assistant
|
||||
ordered-set==4.1.0
|
||||
# via deepdiff
|
||||
orjson==3.10.7
|
||||
# via langsmith
|
||||
packaging==24.1
|
||||
# via altair
|
||||
# via faiss-cpu
|
||||
# via huggingface-hub
|
||||
# via ipykernel
|
||||
# via langchain-core
|
||||
# via marshmallow
|
||||
# via matplotlib
|
||||
# via onnxruntime
|
||||
# via pikepdf
|
||||
# via streamlit
|
||||
# via transformers
|
||||
# via unstructured-client
|
||||
# via unstructured-pytesseract
|
||||
pandas==2.2.2
|
||||
# via diff-assistant
|
||||
# via layoutparser
|
||||
# via llama-index-legacy
|
||||
# via llama-index-readers-file
|
||||
# via streamlit
|
||||
parso==0.8.4
|
||||
# via jedi
|
||||
pdf2docx==0.5.8
|
||||
# via megaparse
|
||||
pdf2image==1.17.0
|
||||
# via layoutparser
|
||||
# via unstructured
|
||||
pdfminer-six==20231228
|
||||
# via pdfplumber
|
||||
# via unstructured
|
||||
pdfplumber==0.11.4
|
||||
# via layoutparser
|
||||
# via megaparse
|
||||
pexpect==4.9.0
|
||||
# via ipython
|
||||
pi-heif==0.18.0
|
||||
# via unstructured
|
||||
pikepdf==9.2.1
|
||||
# via unstructured
|
||||
pillow==10.4.0
|
||||
# via layoutparser
|
||||
# via llama-index-core
|
||||
# via matplotlib
|
||||
# via pdf2image
|
||||
# via pdfplumber
|
||||
# via pi-heif
|
||||
# via pikepdf
|
||||
# via python-doctr
|
||||
# via python-pptx
|
||||
# via streamlit
|
||||
# via torchvision
|
||||
# via unstructured-pytesseract
|
||||
platformdirs==4.2.2
|
||||
# via jupyter-core
|
||||
portalocker==2.10.1
|
||||
# via iopath
|
||||
prompt-toolkit==3.0.47
|
||||
# via ipython
|
||||
proto-plus==1.24.0
|
||||
# via google-api-core
|
||||
# via google-cloud-vision
|
||||
protobuf==5.27.3
|
||||
# via google-api-core
|
||||
# via google-cloud-vision
|
||||
# via googleapis-common-protos
|
||||
# via grpcio-status
|
||||
# via onnx
|
||||
# via onnxruntime
|
||||
# via proto-plus
|
||||
# via streamlit
|
||||
psutil==6.0.0
|
||||
# via ipykernel
|
||||
# via unstructured
|
||||
ptyprocess==0.7.0
|
||||
# via pexpect
|
||||
pure-eval==0.2.3
|
||||
# via stack-data
|
||||
pyarrow==17.0.0
|
||||
# via streamlit
|
||||
pyasn1==0.6.1
|
||||
# via pyasn1-modules
|
||||
# via rsa
|
||||
pyasn1-modules==0.4.1
|
||||
# via google-auth
|
||||
pyclipper==1.3.0.post5
|
||||
# via python-doctr
|
||||
pycocotools==2.0.8
|
||||
# via effdet
|
||||
pycparser==2.22
|
||||
# via cffi
|
||||
pycryptodome==3.20.0
|
||||
# via megaparse
|
||||
pydantic==2.8.2
|
||||
# via langchain
|
||||
# via langchain-core
|
||||
# via langsmith
|
||||
# via llama-cloud
|
||||
# via llama-index-core
|
||||
# via openai
|
||||
pydantic-core==2.20.1
|
||||
# via pydantic
|
||||
pydeck==0.9.1
|
||||
# via streamlit
|
||||
pygments==2.18.0
|
||||
# via ipython
|
||||
# via rich
|
||||
pymupdf==1.24.10
|
||||
# via pdf2docx
|
||||
pymupdfb==1.24.10
|
||||
# via pymupdf
|
||||
pyparsing==3.1.2
|
||||
# via matplotlib
|
||||
pypdf==4.3.1
|
||||
# via diff-assistant
|
||||
# via llama-index-readers-file
|
||||
# via unstructured
|
||||
# via unstructured-client
|
||||
pypdfium2==4.30.0
|
||||
# via diff-assistant
|
||||
# via pdfplumber
|
||||
# via python-doctr
|
||||
python-dateutil==2.9.0.post0
|
||||
# via jupyter-client
|
||||
# via matplotlib
|
||||
# via pandas
|
||||
# via unstructured-client
|
||||
python-doctr==0.9.0
|
||||
# via diff-assistant
|
||||
python-docx==1.1.2
|
||||
# via megaparse
|
||||
# via pdf2docx
|
||||
python-dotenv==1.0.1
|
||||
# via diff-assistant
|
||||
# via megaparse
|
||||
python-iso639==2024.4.27
|
||||
# via unstructured
|
||||
python-magic==0.4.27
|
||||
# via diff-assistant
|
||||
# via unstructured
|
||||
python-multipart==0.0.9
|
||||
# via unstructured-inference
|
||||
python-oxmsg==0.0.1
|
||||
# via unstructured
|
||||
python-pptx==1.0.2
|
||||
# via megaparse
|
||||
pytz==2024.1
|
||||
# via pandas
|
||||
pyyaml==6.0.2
|
||||
# via huggingface-hub
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
# via layoutparser
|
||||
# via llama-index-core
|
||||
# via omegaconf
|
||||
# via timm
|
||||
# via transformers
|
||||
pyzmq==26.1.1
|
||||
# via ipykernel
|
||||
# via jupyter-client
|
||||
rapidfuzz==3.9.6
|
||||
# via python-doctr
|
||||
# via unstructured
|
||||
# via unstructured-inference
|
||||
referencing==0.35.1
|
||||
# via jsonschema
|
||||
# via jsonschema-specifications
|
||||
regex==2024.7.24
|
||||
# via nltk
|
||||
# via tiktoken
|
||||
# via transformers
|
||||
requests==2.32.3
|
||||
# via google-api-core
|
||||
# via huggingface-hub
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langsmith
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via requests-toolbelt
|
||||
# via streamlit
|
||||
# via tiktoken
|
||||
# via transformers
|
||||
# via unstructured
|
||||
# via unstructured-client
|
||||
requests-toolbelt==1.0.0
|
||||
# via unstructured-client
|
||||
rich==13.8.0
|
||||
# via streamlit
|
||||
rpds-py==0.20.0
|
||||
# via jsonschema
|
||||
# via referencing
|
||||
rsa==4.9
|
||||
# via google-auth
|
||||
safetensors==0.4.5
|
||||
# via timm
|
||||
# via transformers
|
||||
scikit-learn==1.5.1
|
||||
# via diff-assistant
|
||||
scipy==1.14.1
|
||||
# via layoutparser
|
||||
# via python-doctr
|
||||
# via scikit-learn
|
||||
shapely==2.0.6
|
||||
# via python-doctr
|
||||
six==1.16.0
|
||||
# via asttokens
|
||||
# via fire
|
||||
# via langdetect
|
||||
# via python-dateutil
|
||||
# via unstructured-client
|
||||
smmap==5.0.1
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
# via httpx
|
||||
# via openai
|
||||
soupsieve==2.6
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.32
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
stack-data==0.6.3
|
||||
# via ipython
|
||||
streamlit==1.38.0
|
||||
# via diff-assistant
|
||||
striprtf==0.0.26
|
||||
# via llama-index-readers-file
|
||||
sympy==1.13.2
|
||||
# via onnxruntime
|
||||
# via torch
|
||||
tabulate==0.9.0
|
||||
# via unstructured
|
||||
tenacity==8.5.0
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via streamlit
|
||||
termcolor==2.4.0
|
||||
# via fire
|
||||
threadpoolctl==3.5.0
|
||||
# via scikit-learn
|
||||
tiktoken==0.7.0
|
||||
# via langchain-openai
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
timm==1.0.9
|
||||
# via effdet
|
||||
# via unstructured-inference
|
||||
tokenizers==0.19.1
|
||||
# via transformers
|
||||
toml==0.10.2
|
||||
# via streamlit
|
||||
torch==2.3.1
|
||||
# via diff-assistant
|
||||
# via effdet
|
||||
# via python-doctr
|
||||
# via timm
|
||||
# via torchvision
|
||||
# via unstructured-inference
|
||||
torchvision==0.18.1
|
||||
# via effdet
|
||||
# via python-doctr
|
||||
# via timm
|
||||
tornado==6.4.1
|
||||
# via ipykernel
|
||||
# via jupyter-client
|
||||
# via streamlit
|
||||
tqdm==4.66.5
|
||||
# via huggingface-hub
|
||||
# via iopath
|
||||
# via llama-index-core
|
||||
# via nltk
|
||||
# via openai
|
||||
# via python-doctr
|
||||
# via transformers
|
||||
# via unstructured
|
||||
traitlets==5.14.3
|
||||
# via comm
|
||||
# via ipykernel
|
||||
# via ipython
|
||||
# via jupyter-client
|
||||
# via jupyter-core
|
||||
# via matplotlib-inline
|
||||
transformers==4.44.2
|
||||
# via unstructured-inference
|
||||
typing-extensions==4.12.2
|
||||
# via altair
|
||||
# via emoji
|
||||
# via huggingface-hub
|
||||
# via iopath
|
||||
# via ipython
|
||||
# via langchain-core
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via openai
|
||||
# via pydantic
|
||||
# via pydantic-core
|
||||
# via python-docx
|
||||
# via python-oxmsg
|
||||
# via python-pptx
|
||||
# via sqlalchemy
|
||||
# via streamlit
|
||||
# via torch
|
||||
# via typing-inspect
|
||||
# via unstructured
|
||||
# via unstructured-client
|
||||
typing-inspect==0.9.0
|
||||
# via dataclasses-json
|
||||
# via llama-index-core
|
||||
# via llama-index-legacy
|
||||
# via unstructured-client
|
||||
tzdata==2024.1
|
||||
# via pandas
|
||||
unstructured==0.15.9
|
||||
# via diff-assistant
|
||||
# via megaparse
|
||||
unstructured-client==0.25.5
|
||||
# via unstructured
|
||||
unstructured-inference==0.7.36
|
||||
# via unstructured
|
||||
unstructured-pytesseract==0.3.13
|
||||
# via unstructured
|
||||
urllib3==2.2.2
|
||||
# via requests
|
||||
# via unstructured-client
|
||||
wcwidth==0.2.13
|
||||
# via prompt-toolkit
|
||||
wrapt==1.16.0
|
||||
# via deprecated
|
||||
# via llama-index-core
|
||||
# via unstructured
|
||||
xlsxwriter==3.2.0
|
||||
# via python-pptx
|
||||
yarl==1.9.7
|
||||
# via aiohttp
|
6
backend/worker/diff-assistant/tests/conftest.py
Normal file
6
backend/worker/diff-assistant/tests/conftest.py
Normal file
@ -0,0 +1,6 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def hello_message():
|
||||
return "Hello from diff-assistant!"
|
5
backend/worker/diff-assistant/tests/test_hello.py
Normal file
5
backend/worker/diff-assistant/tests/test_hello.py
Normal file
@ -0,0 +1,5 @@
|
||||
from use_case_3 import hello
|
||||
|
||||
|
||||
def test_hello(hello_message):
|
||||
assert hello() == hello_message
|
@ -8,6 +8,7 @@ authors = [
|
||||
dependencies = [
|
||||
"quivr-core[all]",
|
||||
"quivr-api",
|
||||
"quivr-diff-assistant",
|
||||
"celery[redis]>=5.0.0",
|
||||
"python-dotenv>=1.0.0",
|
||||
"playwright>=1.0.0",
|
||||
@ -48,3 +49,7 @@ path = "../quivr-core"
|
||||
[[tool.rye.sources]]
|
||||
name = "quivr-api"
|
||||
path = "../quivr-api"
|
||||
|
||||
[[tool.rye.sources]]
|
||||
name = "quivr-diff-assistant"
|
||||
path = "./diff-assistant"
|
||||
|
@ -5,6 +5,8 @@ from quivr_api.modules.upload.service.upload_file import (
|
||||
upload_file_storage,
|
||||
)
|
||||
|
||||
from quivr_worker.assistants.cdp_use_case_2 import process_cdp_use_case_2
|
||||
from quivr_worker.assistants.cdp_use_case_3 import process_cdp_use_case_3
|
||||
from quivr_worker.utils.pdf_generator.pdf_generator import PDFGenerator, PDFModel
|
||||
|
||||
|
||||
@ -15,19 +17,29 @@ async def process_assistant(
|
||||
tasks_service: TasksService,
|
||||
user_id: str,
|
||||
):
|
||||
print(task_id)
|
||||
task = await tasks_service.get_task_by_id(task_id, user_id) # type: ignore
|
||||
assistant_name = task.assistant_name
|
||||
output = ""
|
||||
if assistant_id == 3:
|
||||
output = await process_cdp_use_case_3(
|
||||
assistant_id, notification_uuid, task_id, tasks_service, user_id
|
||||
)
|
||||
elif assistant_id == 2:
|
||||
output = await process_cdp_use_case_2(
|
||||
assistant_id, notification_uuid, task_id, tasks_service, user_id
|
||||
)
|
||||
else:
|
||||
new_task = await tasks_service.update_task(task_id, {"status": "processing"})
|
||||
# Add a random delay of 10 to 20 seconds
|
||||
|
||||
await tasks_service.update_task(task_id, {"status": "in_progress"})
|
||||
|
||||
print(task)
|
||||
|
||||
task_result = {"status": "completed", "answer": "#### Assistant answer"}
|
||||
task_result = {"status": "completed", "answer": output}
|
||||
|
||||
output_dir = f"{assistant_id}/{notification_uuid}"
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
output_path = f"{output_dir}/output.pdf"
|
||||
|
||||
generated_pdf = PDFGenerator(PDFModel(title="Test", content="Test"))
|
||||
generated_pdf = PDFGenerator(PDFModel(title=assistant_name, content=output))
|
||||
generated_pdf.print_pdf()
|
||||
generated_pdf.output(output_path)
|
||||
|
||||
@ -36,5 +48,4 @@ async def process_assistant(
|
||||
|
||||
# Now delete the file
|
||||
os.remove(output_path)
|
||||
|
||||
await tasks_service.update_task(task_id, task_result)
|
||||
|
312
backend/worker/quivr_worker/assistants/cdp_use_case_2.py
Normal file
312
backend/worker/quivr_worker/assistants/cdp_use_case_2.py
Normal file
@ -0,0 +1,312 @@
|
||||
import random
|
||||
import string
|
||||
from enum import Enum
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# get environment variables
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_openai import ChatOpenAI
|
||||
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
|
||||
from llama_index.core.node_parser import UnstructuredElementNodeParser
|
||||
from llama_index.core.query_engine import RetrieverQueryEngine
|
||||
from llama_index.core.retrievers import RecursiveRetriever
|
||||
from llama_index.core.schema import Document
|
||||
from llama_index.llms.openai import OpenAI
|
||||
from quivr_api.logger import get_logger
|
||||
from quivr_api.modules.assistant.dto.inputs import InputAssistant
|
||||
from quivr_api.modules.assistant.services.tasks_service import TasksService
|
||||
from quivr_api.modules.dependencies import get_supabase_client
|
||||
from quivr_diff_assistant.use_case_3.parser import DeadlyParser
|
||||
from quivr_diff_assistant.utils.utils import COMPARISON_PROMPT
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# Set pandas display options
|
||||
pd.set_option("display.max_rows", None)
|
||||
pd.set_option("display.max_columns", None)
|
||||
pd.set_option("display.width", None)
|
||||
pd.set_option("display.max_colwidth", None)
|
||||
|
||||
|
||||
def load_and_process_document(file_path, pickle_file):
|
||||
print(file_path)
|
||||
reader = SimpleDirectoryReader(input_files=[file_path])
|
||||
docs = reader.load_data()
|
||||
print(len(docs), " and", len(docs[0].text))
|
||||
if len(docs) == 1 and len(docs[0].text) < 9:
|
||||
print("No text found with classical parse, switching to OCR ...")
|
||||
parser = DeadlyParser()
|
||||
doc = parser.deep_parse(file_path)
|
||||
docs = [Document().from_langchain_format(doc)]
|
||||
|
||||
node_parser = UnstructuredElementNodeParser()
|
||||
|
||||
raw_nodes = node_parser.get_nodes_from_documents(docs)
|
||||
|
||||
base_nodes, node_mappings = node_parser.get_base_nodes_and_mappings(raw_nodes)
|
||||
return base_nodes, node_mappings
|
||||
|
||||
|
||||
def create_query_engine(base_nodes, node_mappings):
|
||||
vector_index = VectorStoreIndex(base_nodes)
|
||||
vector_retriever = vector_index.as_retriever(similarity_top_k=5)
|
||||
recursive_retriever = RecursiveRetriever(
|
||||
"vector",
|
||||
retriever_dict={"vector": vector_retriever},
|
||||
node_dict=node_mappings,
|
||||
verbose=True,
|
||||
)
|
||||
return RetrieverQueryEngine.from_args(
|
||||
recursive_retriever, llm=OpenAI(temperature=0, model="gpt-4")
|
||||
)
|
||||
|
||||
|
||||
def compare_responses(response1, response2):
|
||||
llm = OpenAI(temperature=0, model="gpt-4")
|
||||
prompt = f"""
|
||||
Compare the following two responses and determine if they convey the same information:
|
||||
Response for document 1: {response1}
|
||||
Response for document 2: {response2}
|
||||
Are these responses essentially the same? Provide a brief explanation for your conclusion. The difference in format are not important, focus on the content and the numbers.
|
||||
If there are any specific differences, please highlight them with bullet points. Respond in french and in a markdown format.
|
||||
"""
|
||||
return llm.complete(prompt)
|
||||
|
||||
|
||||
class ComparisonTypes(str, Enum):
|
||||
CDC_ETIQUETTE = "Cahier des Charges - Etiquette"
|
||||
CDC_FICHE_DEV = "Cahier des Charges - Fiche Dev"
|
||||
|
||||
|
||||
def llm_comparator(
|
||||
document: str, cdc: str, llm: BaseChatModel, comparison_type: ComparisonTypes
|
||||
):
|
||||
chain = COMPARISON_PROMPT | llm | StrOutputParser()
|
||||
|
||||
if comparison_type == ComparisonTypes.CDC_ETIQUETTE:
|
||||
text_1 = "Etiquette"
|
||||
elif comparison_type == ComparisonTypes.CDC_FICHE_DEV:
|
||||
text_1 = "Fiche Dev"
|
||||
|
||||
return chain.stream(
|
||||
{
|
||||
"document": document,
|
||||
"text_1": text_1,
|
||||
"cdc": cdc,
|
||||
"text_2": "Cahier des Charges",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def process_cdp_use_case_2(
|
||||
assistant_id: str,
|
||||
notification_uuid: str,
|
||||
task_id: int,
|
||||
tasks_service: TasksService,
|
||||
user_id: str,
|
||||
) -> str:
|
||||
task = await tasks_service.get_task_by_id(task_id, user_id) # type: ignore
|
||||
logger.info(f"Task: {task} 📝")
|
||||
# Parse settings into InputAssistant
|
||||
input_assistant = InputAssistant.model_validate(task.settings)
|
||||
assert input_assistant.inputs.files is not None
|
||||
assert len(input_assistant.inputs.files) == 2
|
||||
|
||||
# Get the value of the "Document 1" key and "Document 2" key. The input files might not be in the order of "Document 1" and "Document 2"
|
||||
# So we need to find the correct order
|
||||
logger.info(f"Input assistant: {input_assistant} 📂")
|
||||
before_file_key = input_assistant.inputs.files[0].key
|
||||
after_file_key = input_assistant.inputs.files[1].key
|
||||
|
||||
before_file_value = input_assistant.inputs.files[0].value
|
||||
after_file_value = input_assistant.inputs.files[1].value
|
||||
|
||||
if before_file_key == "Document 2":
|
||||
before_file_value = input_assistant.inputs.files[1].value
|
||||
after_file_value = input_assistant.inputs.files[0].value
|
||||
|
||||
# Get the files from supabase
|
||||
supabase_client = get_supabase_client()
|
||||
path = f"{task.assistant_id}/{task.pretty_id}/"
|
||||
logger.info(f"Path: {path} 📁")
|
||||
await tasks_service.update_task(task_id, {"status": "processing"})
|
||||
|
||||
before_file_data = supabase_client.storage.from_("quivr").download(
|
||||
f"{path}{before_file_value}"
|
||||
)
|
||||
after_file_data = supabase_client.storage.from_("quivr").download(
|
||||
f"{path}{after_file_value}"
|
||||
)
|
||||
|
||||
# Generate a random string of 8 characters
|
||||
random_string = "".join(random.choices(string.ascii_letters + string.digits, k=8))
|
||||
|
||||
# Write temp files with the original name without using save_uploaded_file
|
||||
# because the file is already in the quivr bucket
|
||||
before_file_path = f"/tmp/{random_string}_{before_file_value}"
|
||||
after_file_path = f"/tmp/{random_string}_{after_file_value}"
|
||||
with open(before_file_path, "wb") as f:
|
||||
f.write(before_file_data)
|
||||
with open(after_file_path, "wb") as f:
|
||||
f.write(after_file_data)
|
||||
assert input_assistant.inputs.select_texts is not None
|
||||
value_use_case = input_assistant.inputs.select_texts[0].value
|
||||
|
||||
## Get the document type
|
||||
document_type = None
|
||||
if value_use_case == "Etiquettes VS Cahier des charges":
|
||||
document_type = ComparisonTypes.CDC_ETIQUETTE
|
||||
elif value_use_case == "Fiche Dev VS Cahier des charges":
|
||||
document_type = ComparisonTypes.CDC_FICHE_DEV
|
||||
else:
|
||||
logger.error(f"❌ Document type not supported: {value_use_case}")
|
||||
raise ValueError(f"Document type not supported: {value_use_case}")
|
||||
parser = DeadlyParser()
|
||||
logger.info(f"Document type: {document_type} 📄")
|
||||
llm = ChatOpenAI(
|
||||
model="gpt-4o",
|
||||
temperature=0.1,
|
||||
max_tokens=None,
|
||||
max_retries=2,
|
||||
)
|
||||
|
||||
before_file_parsed = await parser.aparse(before_file_path)
|
||||
logger.info("Before file parsed 📜")
|
||||
after_file_parsed = None
|
||||
if document_type == ComparisonTypes.CDC_ETIQUETTE:
|
||||
logger.info("Parsing after file with deep parse 🔍")
|
||||
after_file_parsed = await parser.deep_aparse(after_file_path, llm=llm)
|
||||
else:
|
||||
logger.info("Parsing after file with classical parse 🔍")
|
||||
after_file_parsed = await parser.aparse(after_file_path)
|
||||
|
||||
logger.info("Comparing documents ⚖️")
|
||||
comparison = llm_comparator(
|
||||
document=after_file_parsed.page_content,
|
||||
cdc=before_file_parsed.page_content,
|
||||
llm=llm,
|
||||
comparison_type=document_type,
|
||||
)
|
||||
|
||||
logger.info(f"Comparison: {comparison} ✅")
|
||||
return "".join(comparison)
|
||||
|
||||
|
||||
async def test_main():
|
||||
cdc_doc = "/Users/jchevall/Coding/diff-assistant/data/Use case #2/Cas2-2-1_Mendiant Lait_QD PC F03 - FR Cahier des charges produit -rev 2021-v2.pdf"
|
||||
doc = "/Users/jchevall/Coding/diff-assistant/data/Use case #2/Cas2-2-1_Proposition étiquette Mendiant Lait croustillant.pdf"
|
||||
|
||||
comparison_type = ComparisonTypes.CDC_FICHE_DEV
|
||||
|
||||
llm = ChatOpenAI(
|
||||
model="gpt-4o",
|
||||
temperature=0.1,
|
||||
max_tokens=None,
|
||||
max_retries=2,
|
||||
)
|
||||
|
||||
parser = DeadlyParser()
|
||||
parsed_cdc_doc = await parser.aparse(cdc_doc)
|
||||
|
||||
if comparison_type == ComparisonTypes.CDC_ETIQUETTE:
|
||||
parsed_doc = await parser.deep_aparse(doc, llm=llm)
|
||||
else:
|
||||
parsed_doc = await parser.aparse(doc)
|
||||
|
||||
print("\n\n Cahier des Charges")
|
||||
print(parsed_cdc_doc.page_content)
|
||||
|
||||
print("\n\n Other document")
|
||||
print(parsed_doc.page_content)
|
||||
|
||||
comparison = llm_comparator(
|
||||
document=parsed_doc.page_content,
|
||||
cdc=parsed_cdc_doc.page_content,
|
||||
llm=llm,
|
||||
comparison_type=comparison_type,
|
||||
)
|
||||
|
||||
print("\n\n Comparison")
|
||||
print(comparison)
|
||||
|
||||
|
||||
def get_document_path(doc):
|
||||
try:
|
||||
with open(doc.name, "wb") as temp_file:
|
||||
temp_file.write(doc.getbuffer())
|
||||
path = temp_file.name
|
||||
except:
|
||||
path = doc
|
||||
|
||||
return path
|
||||
|
||||
|
||||
async def parse_documents(cdc_doc, doc, comparison_type: ComparisonTypes, llm):
|
||||
parser = DeadlyParser()
|
||||
|
||||
# Schedule the coroutines as tasks
|
||||
cdc_task = asyncio.create_task(parser.aparse(get_document_path(cdc_doc)))
|
||||
|
||||
if comparison_type == ComparisonTypes.CDC_ETIQUETTE:
|
||||
doc_task = asyncio.create_task(
|
||||
parser.deep_aparse(get_document_path(doc), llm=llm)
|
||||
)
|
||||
else:
|
||||
doc_task = asyncio.create_task(parser.aparse(get_document_path(doc)))
|
||||
|
||||
# Optionally, do other work here while tasks are running
|
||||
|
||||
# Await the tasks to get the results
|
||||
parsed_cdc_doc = await cdc_task
|
||||
print("\n\n Cahier de Charges: \n", parsed_cdc_doc.page_content)
|
||||
|
||||
parsed_doc = await doc_task
|
||||
print("\n\n Other doc: \n", parsed_doc.page_content)
|
||||
|
||||
return parsed_cdc_doc, parsed_doc
|
||||
|
||||
|
||||
# def main():
|
||||
# st.title("Document Comparison Tool : Use Case 2")
|
||||
|
||||
# # File uploaders for two documents
|
||||
# cdc_doc = st.file_uploader(
|
||||
# "Upload Cahier des Charges", type=["docx", "xlsx", "pdf", "txt"]
|
||||
# )
|
||||
# doc = st.file_uploader(
|
||||
# "Upload Etiquette / Fiche Dev", type=["docx", "xlsx", "pdf", "txt"]
|
||||
# )
|
||||
|
||||
# comparison_type = st.selectbox(
|
||||
# "Select document types",
|
||||
# [ComparisonTypes.CDC_ETIQUETTE.value, ComparisonTypes.CDC_FICHE_DEV.value],
|
||||
# )
|
||||
|
||||
# if st.button("Process Documents and Questions"):
|
||||
# if not cdc_doc or not doc:
|
||||
# st.error("Please upload both documents before launching the processing.")
|
||||
# return
|
||||
|
||||
# with st.spinner("Processing files..."):
|
||||
# llm = ChatOpenAI(
|
||||
# model="gpt-4o",
|
||||
# temperature=0.1,
|
||||
# max_tokens=None,
|
||||
# max_retries=2,
|
||||
# )
|
||||
|
||||
# parsed_cdc_doc, parsed_doc = asyncio.run(
|
||||
# parse_documents(cdc_doc, doc, comparison_type=comparison_type, llm=llm)
|
||||
# )
|
||||
|
||||
# comparison = llm_comparator(
|
||||
# document=parsed_doc.page_content,
|
||||
# cdc=parsed_cdc_doc.page_content,
|
||||
# llm=llm,
|
||||
# comparison_type=comparison_type,
|
||||
# )
|
||||
# # Run the async function using asyncio.run()
|
||||
# # comparison = asyncio.run(process_documents(cdc_doc, doc, comparison_type))
|
||||
# st.write_stream(comparison)
|
224
backend/worker/quivr_worker/assistants/cdp_use_case_3.py
Normal file
224
backend/worker/quivr_worker/assistants/cdp_use_case_3.py
Normal file
@ -0,0 +1,224 @@
|
||||
import os
|
||||
import random
|
||||
import string
|
||||
import tempfile
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
|
||||
from diff_match_patch import diff_match_patch
|
||||
|
||||
# get environment variables
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_openai import ChatOpenAI
|
||||
from quivr_api.logger import get_logger
|
||||
from quivr_api.modules.assistant.dto.inputs import InputAssistant
|
||||
from quivr_api.modules.assistant.services.tasks_service import TasksService
|
||||
from quivr_api.modules.dependencies import get_supabase_client
|
||||
from quivr_diff_assistant.use_case_3.diff_type import DiffResult, llm_comparator
|
||||
from quivr_diff_assistant.use_case_3.llm_reporter import redact_report
|
||||
from quivr_diff_assistant.use_case_3.parser import DeadlyParser
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class DocumentType(Enum):
|
||||
ETIQUETTE = "etiquette"
|
||||
CAHIER_DES_CHARGES = "cdc"
|
||||
|
||||
|
||||
async def process_cdp_use_case_3(
|
||||
assistant_id: str,
|
||||
notification_uuid: str,
|
||||
task_id: int,
|
||||
tasks_service: TasksService,
|
||||
user_id: str,
|
||||
) -> str:
|
||||
task = await tasks_service.get_task_by_id(task_id, user_id) # type: ignore
|
||||
|
||||
# Parse settings into InputAssistant
|
||||
input_assistant = InputAssistant.model_validate(task.settings)
|
||||
assert input_assistant.inputs.files is not None
|
||||
assert len(input_assistant.inputs.files) == 2
|
||||
|
||||
# Get the value of the "Document 1" key and "Document 2" key. The input files might not be in the order of "Document 1" and "Document 2"
|
||||
# So we need to find the correct order
|
||||
before_file_key = input_assistant.inputs.files[0].key
|
||||
after_file_key = input_assistant.inputs.files[1].key
|
||||
|
||||
before_file_value = input_assistant.inputs.files[0].value
|
||||
after_file_value = input_assistant.inputs.files[1].value
|
||||
|
||||
if before_file_key == "Document 2":
|
||||
before_file_value = input_assistant.inputs.files[1].value
|
||||
after_file_value = input_assistant.inputs.files[0].value
|
||||
|
||||
# Get the files from supabase
|
||||
supabase_client = get_supabase_client()
|
||||
path = f"{task.assistant_id}/{task.pretty_id}/"
|
||||
|
||||
await tasks_service.update_task(task_id, {"status": "processing"})
|
||||
|
||||
# Before file key - parsed from the
|
||||
before_file_data = supabase_client.storage.from_("quivr").download(
|
||||
f"{path}{before_file_value}"
|
||||
)
|
||||
after_file_data = supabase_client.storage.from_("quivr").download(
|
||||
f"{path}{after_file_value}"
|
||||
)
|
||||
|
||||
# Generate a random string of 8 characters
|
||||
random_string = "".join(random.choices(string.ascii_letters + string.digits, k=8))
|
||||
|
||||
# Write temp files with the original name without using save_uploaded_file
|
||||
# because the file is already in the quivr bucket
|
||||
before_file_path = f"/tmp/{random_string}_{before_file_value}"
|
||||
after_file_path = f"/tmp/{random_string}_{after_file_value}"
|
||||
with open(before_file_path, "wb") as f:
|
||||
f.write(before_file_data)
|
||||
with open(after_file_path, "wb") as f:
|
||||
f.write(after_file_data)
|
||||
|
||||
assert input_assistant.inputs.select_texts is not None
|
||||
value_use_case = input_assistant.inputs.select_texts[0].value
|
||||
|
||||
## Get the document type
|
||||
document_type = None
|
||||
if value_use_case == "Etiquettes":
|
||||
document_type = DocumentType.ETIQUETTE
|
||||
elif value_use_case == "Cahier des charges":
|
||||
document_type = DocumentType.CAHIER_DES_CHARGES
|
||||
else:
|
||||
raise ValueError(f"Invalid value for use case: {value_use_case}")
|
||||
|
||||
## Get the hard to read document boolean value
|
||||
assert input_assistant.inputs.booleans is not None
|
||||
hard_to_read_document = input_assistant.inputs.booleans[0].value
|
||||
|
||||
assert before_file_data is not None
|
||||
assert after_file_data is not None
|
||||
|
||||
openai_gpt4o = ChatOpenAI(
|
||||
model="gpt-4o",
|
||||
temperature=0,
|
||||
max_tokens=None,
|
||||
max_retries=2,
|
||||
)
|
||||
|
||||
llm_comparator = True if document_type == DocumentType.ETIQUETTE else False
|
||||
report = await create_modification_report(
|
||||
before_file=before_file_path,
|
||||
after_file=after_file_path,
|
||||
type=document_type,
|
||||
llm=openai_gpt4o,
|
||||
partition=hard_to_read_document,
|
||||
use_llm_comparator=llm_comparator,
|
||||
)
|
||||
|
||||
os.unlink(before_file_path)
|
||||
os.unlink(after_file_path)
|
||||
return report
|
||||
|
||||
|
||||
async def create_modification_report(
|
||||
before_file: str | Path | bytes,
|
||||
after_file: str | Path | bytes,
|
||||
type: DocumentType,
|
||||
llm: BaseChatModel,
|
||||
partition: bool = False,
|
||||
use_llm_comparator: bool = False,
|
||||
parser=DeadlyParser(),
|
||||
) -> str:
|
||||
if type == DocumentType.ETIQUETTE:
|
||||
logger.debug("parsing before file")
|
||||
before_text = parser.deep_parse(before_file, partition=partition, llm=llm)
|
||||
logger.debug("parsing after file")
|
||||
after_text = parser.deep_parse(after_file, partition=partition, llm=llm)
|
||||
elif type == DocumentType.CAHIER_DES_CHARGES:
|
||||
before_text = await parser.aparse(before_file)
|
||||
after_text = await parser.aparse(after_file)
|
||||
|
||||
logger.debug(before_text.page_content)
|
||||
logger.debug(after_text.page_content)
|
||||
text_after_sections = before_text.page_content.split("\n# ")
|
||||
text_before_sections = after_text.page_content.split("\n# ")
|
||||
assert len(text_after_sections) == len(text_before_sections)
|
||||
|
||||
if use_llm_comparator:
|
||||
logger.debug("using llm comparator")
|
||||
llm_comparator_result = llm_comparator(
|
||||
before_text.page_content, after_text.page_content, llm=llm
|
||||
)
|
||||
return llm_comparator_result
|
||||
logger.debug("using diff match patch")
|
||||
dmp = diff_match_patch()
|
||||
section_diffs = []
|
||||
for after_section, before_section in zip(
|
||||
text_after_sections, text_before_sections, strict=False
|
||||
):
|
||||
main_diff: list[tuple[int, str]] = dmp.diff_main(after_section, before_section)
|
||||
section_diffs.append(DiffResult(main_diff))
|
||||
|
||||
logger.debug(section_diffs)
|
||||
report = redact_report(section_diffs, llm=llm)
|
||||
return report
|
||||
|
||||
|
||||
def save_uploaded_file(uploaded_file):
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False, suffix=os.path.splitext(uploaded_file.name)[1]
|
||||
) as tmp_file:
|
||||
tmp_file.write(uploaded_file.getvalue())
|
||||
return tmp_file.name
|
||||
|
||||
|
||||
# st.title("Document Modification Report Generator : Use Case 3")
|
||||
|
||||
# # File uploaders
|
||||
# before_file = st.file_uploader("Upload 'Before' file", type=["pdf", "docx"])
|
||||
# after_file = st.file_uploader("Upload 'After' file", type=["pdf", "docx"])
|
||||
|
||||
# # Document type selector
|
||||
# doc_type = st.selectbox("Select document type", ["ETIQUETTE", "CAHIER_DES_CHARGES"])
|
||||
|
||||
# # Complexity of document
|
||||
# complexity = st.checkbox("Complex document (lot of text of OCRise)")
|
||||
|
||||
# # Process button
|
||||
# if st.button("Process"):
|
||||
# if before_file and after_file:
|
||||
# with st.spinner("Processing files..."):
|
||||
# # Save uploaded files
|
||||
# before_path = save_uploaded_file(before_file)
|
||||
# after_path = save_uploaded_file(after_file)
|
||||
|
||||
# # Initialize LLM
|
||||
# openai_gpt4o = ChatOpenAI(
|
||||
# model="gpt-4o",
|
||||
# temperature=0,
|
||||
# max_tokens=None,
|
||||
# max_retries=2,
|
||||
# )
|
||||
# use_llm_comparator = True if doc_type == "ETIQUETTE" else False
|
||||
|
||||
# # Generate report
|
||||
# logger.debug("generating report")
|
||||
# report = asyncio.run(
|
||||
# create_modification_report(
|
||||
# before_path,
|
||||
# after_path,
|
||||
# DocumentType[doc_type],
|
||||
# openai_gpt4o,
|
||||
# partition=complexity,
|
||||
# use_llm_comparator=use_llm_comparator,
|
||||
# )
|
||||
# )
|
||||
# logger.debug("report generated")
|
||||
# # Display results
|
||||
# st.subheader("Modification Report")
|
||||
# st.write(report)
|
||||
|
||||
# # Clean up temporary files
|
||||
# os.unlink(before_path)
|
||||
# os.unlink(after_path)
|
||||
# else:
|
||||
# st.error("Please upload both 'Before' and 'After' files.")
|
@ -2,6 +2,7 @@ import asyncio
|
||||
import os
|
||||
from uuid import UUID
|
||||
|
||||
import torch
|
||||
from celery.schedules import crontab
|
||||
from celery.signals import worker_process_init
|
||||
from dotenv import load_dotenv
|
||||
@ -32,8 +33,8 @@ from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
|
||||
from sqlmodel import Session, text
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
|
||||
from quivr_worker.celery_monitor import is_being_executed
|
||||
from quivr_worker.assistants.assistants import process_assistant
|
||||
from quivr_worker.celery_monitor import is_being_executed
|
||||
from quivr_worker.check_premium import check_is_premium
|
||||
from quivr_worker.process.process_s3_file import process_uploaded_file
|
||||
from quivr_worker.process.process_url import process_url_func
|
||||
@ -46,6 +47,9 @@ from quivr_worker.syncs.process_active_syncs import (
|
||||
from quivr_worker.syncs.store_notion import fetch_and_store_notion_files_async
|
||||
from quivr_worker.utils.utils import _patch_json
|
||||
|
||||
torch.set_num_threads(1)
|
||||
|
||||
|
||||
load_dotenv()
|
||||
|
||||
get_logger("quivr_core")
|
||||
@ -130,6 +134,8 @@ async def aprocess_assistant_task(
|
||||
task_id: int,
|
||||
user_id: str,
|
||||
):
|
||||
global async_engine
|
||||
assert async_engine
|
||||
async with AsyncSession(async_engine) as async_session:
|
||||
try:
|
||||
await async_session.execute(
|
||||
|
@ -34,9 +34,9 @@ class PDFGenerator(FPDF):
|
||||
)
|
||||
|
||||
def header(self):
|
||||
# Logo
|
||||
logo_path = os.path.join(os.path.dirname(__file__), "logo.png")
|
||||
self.image(logo_path, 10, 10, 20) # Adjust size as needed
|
||||
# # Logo
|
||||
# logo_path = os.path.join(os.path.dirname(__file__), "logo.png")
|
||||
# self.image(logo_path, 10, 10, 20) # Adjust size as needed
|
||||
|
||||
# Move cursor to right of image
|
||||
self.set_xy(20, 15)
|
||||
@ -59,15 +59,31 @@ class PDFGenerator(FPDF):
|
||||
|
||||
def chapter_body(self):
|
||||
self.set_font("DejaVu", "", 12)
|
||||
self.multi_cell(
|
||||
0,
|
||||
10,
|
||||
self.pdf_model.content,
|
||||
markdown=True,
|
||||
new_x=XPos.RIGHT,
|
||||
new_y=YPos.TOP,
|
||||
)
|
||||
self.ln()
|
||||
content_lines = self.pdf_model.content.split("\n")
|
||||
for line in content_lines:
|
||||
if line.startswith("# "):
|
||||
self.ln() # Add line break before header
|
||||
self.set_font("DejaVu", "B", 16)
|
||||
self.multi_cell(0, 10, line[2:], markdown=False)
|
||||
elif line.startswith("## "):
|
||||
self.ln() # Add line break before header
|
||||
self.set_font("DejaVu", "B", 14)
|
||||
self.multi_cell(0, 10, line[3:], markdown=False)
|
||||
elif line.startswith("### "):
|
||||
self.ln() # Add line break before header
|
||||
self.set_font("DejaVu", "B", 12)
|
||||
self.multi_cell(0, 10, line[4:], markdown=False)
|
||||
else:
|
||||
self.set_font("DejaVu", "", 12)
|
||||
self.multi_cell(
|
||||
0,
|
||||
10,
|
||||
line,
|
||||
markdown=True,
|
||||
new_x=XPos.RIGHT,
|
||||
new_y=YPos.TOP,
|
||||
)
|
||||
self.ln()
|
||||
|
||||
def print_pdf(self):
|
||||
self.add_page()
|
||||
@ -78,7 +94,11 @@ if __name__ == "__main__":
|
||||
pdf_model = PDFModel(
|
||||
title="Summary of Legal Services Rendered by Orrick",
|
||||
content="""
|
||||
# Main Header
|
||||
## Sub Header
|
||||
### Sub Sub Header
|
||||
**Summary:**
|
||||
This is a summary of the legal services rendered.
|
||||
""",
|
||||
)
|
||||
pdf = PDFGenerator(pdf_model)
|
||||
|
@ -53,7 +53,7 @@ services:
|
||||
volumes:
|
||||
- ./backend/:/app/
|
||||
command: >
|
||||
/bin/bash -c "python -m celery -A quivr_worker.celery_worker worker -l info -E"
|
||||
/bin/bash -c "python -m celery -A quivr_worker.celery_worker worker -l info -E -P solo"
|
||||
restart: always
|
||||
depends_on:
|
||||
- redis
|
||||
|
@ -1,29 +0,0 @@
|
||||
@use "styles/Spacings.module.scss";
|
||||
|
||||
.modal_content_container {
|
||||
padding: Spacings.$spacing05;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 100%;
|
||||
justify-content: space-between;
|
||||
|
||||
.modal_content_wrapper {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing05;
|
||||
|
||||
.message_wrapper {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.title {
|
||||
font-weight: 600;
|
||||
}
|
||||
}
|
||||
|
||||
.button {
|
||||
display: flex;
|
||||
align-self: flex-end;
|
||||
}
|
||||
}
|
@ -1,151 +0,0 @@
|
||||
import { useState } from "react";
|
||||
|
||||
import { Assistant } from "@/lib/api/assistants/types";
|
||||
import { useAssistants } from "@/lib/api/assistants/useAssistants";
|
||||
import { Stepper } from "@/lib/components/AddBrainModal/components/Stepper/Stepper";
|
||||
import { StepValue } from "@/lib/components/AddBrainModal/types/types";
|
||||
import { MessageInfoBox } from "@/lib/components/ui/MessageInfoBox/MessageInfoBox";
|
||||
import { Modal } from "@/lib/components/ui/Modal/Modal";
|
||||
import { QuivrButton } from "@/lib/components/ui/QuivrButton/QuivrButton";
|
||||
import { Step } from "@/lib/types/Modal";
|
||||
|
||||
import styles from "./AssistantModal.module.scss";
|
||||
import { InputsStep } from "./InputsStep/InputsStep";
|
||||
import { OutputsStep } from "./OutputsStep/OutputsStep";
|
||||
|
||||
interface AssistantModalProps {
|
||||
isOpen: boolean;
|
||||
setIsOpen: (value: boolean) => void;
|
||||
assistant: Assistant;
|
||||
}
|
||||
|
||||
export const AssistantModal = ({
|
||||
isOpen,
|
||||
setIsOpen,
|
||||
assistant,
|
||||
}: AssistantModalProps): JSX.Element => {
|
||||
const steps: Step[] = [
|
||||
{
|
||||
label: "Inputs",
|
||||
value: "FIRST_STEP",
|
||||
},
|
||||
{
|
||||
label: "Outputs",
|
||||
value: "SECOND_STEP",
|
||||
},
|
||||
];
|
||||
const [currentStep, setCurrentStep] = useState<StepValue>("FIRST_STEP");
|
||||
const [emailOutput, setEmailOutput] = useState<boolean>(true);
|
||||
const [brainOutput, setBrainOutput] = useState<string>("");
|
||||
const [files, setFiles] = useState<{ key: string; file: File | null }[]>(
|
||||
assistant.inputs.files.map((fileInput) => ({
|
||||
key: fileInput.key,
|
||||
file: null,
|
||||
}))
|
||||
);
|
||||
const { processAssistant } = useAssistants();
|
||||
|
||||
const handleFileChange = (file: File, inputKey: string) => {
|
||||
setFiles((prevFiles) =>
|
||||
prevFiles.map((fileObj) =>
|
||||
fileObj.key === inputKey ? { ...fileObj, file } : fileObj
|
||||
)
|
||||
);
|
||||
};
|
||||
|
||||
const handleSetIsOpen = (value: boolean) => {
|
||||
if (!value) {
|
||||
setCurrentStep("FIRST_STEP");
|
||||
}
|
||||
setIsOpen(value);
|
||||
};
|
||||
|
||||
const handleProcessAssistant = async () => {
|
||||
handleSetIsOpen(false);
|
||||
await processAssistant(
|
||||
{
|
||||
name: assistant.name,
|
||||
inputs: {
|
||||
files: files.map((file) => ({
|
||||
key: file.key,
|
||||
value: (file.file as File).name,
|
||||
})),
|
||||
urls: [],
|
||||
texts: [],
|
||||
},
|
||||
outputs: {
|
||||
email: {
|
||||
activated: emailOutput,
|
||||
},
|
||||
brain: {
|
||||
activated: brainOutput !== "",
|
||||
value: brainOutput,
|
||||
},
|
||||
},
|
||||
},
|
||||
files.map((file) => file.file as File)
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title={assistant.name}
|
||||
desc={assistant.description}
|
||||
isOpen={isOpen}
|
||||
setOpen={handleSetIsOpen}
|
||||
size="big"
|
||||
CloseTrigger={<div />}
|
||||
>
|
||||
<div className={styles.modal_content_container}>
|
||||
<div className={styles.modal_content_wrapper}>
|
||||
<Stepper steps={steps} currentStep={currentStep} />
|
||||
{currentStep === "FIRST_STEP" ? (
|
||||
<MessageInfoBox type="tutorial">
|
||||
<div className={styles.message_wrapper}>
|
||||
<span className={styles.title}>Expected Input</span>
|
||||
{assistant.input_description}
|
||||
</div>
|
||||
</MessageInfoBox>
|
||||
) : (
|
||||
<MessageInfoBox type="tutorial">
|
||||
<div className={styles.message_wrapper}>
|
||||
<span className={styles.title}>Output</span>
|
||||
{assistant.output_description}
|
||||
</div>
|
||||
</MessageInfoBox>
|
||||
)}
|
||||
{currentStep === "FIRST_STEP" ? (
|
||||
<InputsStep
|
||||
inputs={assistant.inputs}
|
||||
onFileChange={handleFileChange}
|
||||
/>
|
||||
) : (
|
||||
<OutputsStep
|
||||
setEmailOutput={setEmailOutput}
|
||||
setBrainOutput={setBrainOutput}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
<div className={styles.button}>
|
||||
{currentStep === "FIRST_STEP" ? (
|
||||
<QuivrButton
|
||||
label="Next"
|
||||
color="primary"
|
||||
iconName="chevronRight"
|
||||
onClick={() => setCurrentStep("SECOND_STEP")}
|
||||
disabled={!!files.find((file) => !file.file)}
|
||||
/>
|
||||
) : (
|
||||
<QuivrButton
|
||||
label="Process"
|
||||
color="primary"
|
||||
iconName="chevronRight"
|
||||
onClick={() => handleProcessAssistant()}
|
||||
disabled={!emailOutput && brainOutput === ""}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</Modal>
|
||||
);
|
||||
};
|
@ -1,28 +0,0 @@
|
||||
import { capitalCase } from "change-case";
|
||||
|
||||
import { AssistantInputs } from "@/lib/api/assistants/types";
|
||||
import { FileInput } from "@/lib/components/ui/FileInput/FileInput";
|
||||
|
||||
interface InputsStepProps {
|
||||
inputs: AssistantInputs;
|
||||
onFileChange: (file: File, inputKey: string) => void; //
|
||||
}
|
||||
|
||||
export const InputsStep = ({
|
||||
inputs,
|
||||
onFileChange,
|
||||
}: InputsStepProps): JSX.Element => {
|
||||
return (
|
||||
<div>
|
||||
{inputs.files.map((fileInput) => (
|
||||
<FileInput
|
||||
key={fileInput.key}
|
||||
label={capitalCase(fileInput.key)}
|
||||
icon="file"
|
||||
acceptedFileTypes={fileInput.allowed_extensions}
|
||||
onFileChange={(file) => onFileChange(file, fileInput.key)}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
};
|
@ -1,16 +0,0 @@
|
||||
@use "styles/Spacings.module.scss";
|
||||
|
||||
.outputs_wrapper {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing03;
|
||||
|
||||
.message_wrapper {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.brain_selector {
|
||||
padding-block: Spacings.$spacing02;
|
||||
max-width: 250px;
|
||||
}
|
||||
}
|
@ -1,83 +0,0 @@
|
||||
import { useMemo, useState } from "react";
|
||||
|
||||
import { formatMinimalBrainsToSelectComponentInput } from "@/app/chat/[chatId]/components/ActionsBar/components/KnowledgeToFeed/utils/formatMinimalBrainsToSelectComponentInput";
|
||||
import { Checkbox } from "@/lib/components/ui/Checkbox/Checkbox";
|
||||
import { MessageInfoBox } from "@/lib/components/ui/MessageInfoBox/MessageInfoBox";
|
||||
import { SingleSelector } from "@/lib/components/ui/SingleSelector/SingleSelector";
|
||||
import { requiredRolesForUpload } from "@/lib/config/upload";
|
||||
import { useBrainContext } from "@/lib/context/BrainProvider/hooks/useBrainContext";
|
||||
|
||||
import styles from "./OutputsStep.module.scss";
|
||||
|
||||
interface OutputsStepProps {
|
||||
setEmailOutput: (value: boolean) => void;
|
||||
setBrainOutput: (value: string) => void;
|
||||
}
|
||||
|
||||
export const OutputsStep = ({
|
||||
setEmailOutput,
|
||||
setBrainOutput,
|
||||
}: OutputsStepProps): JSX.Element => {
|
||||
const [existingBrainChecked, setExistingBrainChecked] =
|
||||
useState<boolean>(false);
|
||||
const [selectedBrainId, setSelectedBrainId] = useState<string>("");
|
||||
const { allBrains } = useBrainContext();
|
||||
|
||||
const brainsWithUploadRights = formatMinimalBrainsToSelectComponentInput(
|
||||
useMemo(
|
||||
() =>
|
||||
allBrains.filter(
|
||||
(brain) =>
|
||||
requiredRolesForUpload.includes(brain.role) && !!brain.max_files
|
||||
),
|
||||
[allBrains]
|
||||
)
|
||||
);
|
||||
|
||||
return (
|
||||
<div className={styles.outputs_wrapper}>
|
||||
<MessageInfoBox type="info">
|
||||
It can take a few minutes to process.
|
||||
</MessageInfoBox>
|
||||
<Checkbox
|
||||
label="Receive the results by Email"
|
||||
checked={true}
|
||||
setChecked={setEmailOutput}
|
||||
/>
|
||||
<Checkbox
|
||||
label="Upload the results on an existing Brain"
|
||||
checked={existingBrainChecked}
|
||||
setChecked={() => {
|
||||
if (existingBrainChecked) {
|
||||
setBrainOutput("");
|
||||
setSelectedBrainId("");
|
||||
}
|
||||
setExistingBrainChecked(!existingBrainChecked);
|
||||
}}
|
||||
/>
|
||||
{existingBrainChecked && (
|
||||
<div className={styles.brain_selector}>
|
||||
<SingleSelector
|
||||
options={brainsWithUploadRights}
|
||||
onChange={(brain) => {
|
||||
setBrainOutput(brain);
|
||||
setSelectedBrainId(brain);
|
||||
}}
|
||||
selectedOption={
|
||||
selectedBrainId
|
||||
? {
|
||||
value: selectedBrainId,
|
||||
label: allBrains.find(
|
||||
(brain) => brain.id === selectedBrainId
|
||||
)?.name as string,
|
||||
}
|
||||
: undefined
|
||||
}
|
||||
placeholder="Select a brain"
|
||||
iconName="brain"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
@ -1,20 +0,0 @@
|
||||
@use "styles/Spacings.module.scss";
|
||||
|
||||
.content_wrapper {
|
||||
padding: Spacings.$spacing06;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing05;
|
||||
|
||||
.assistants_grid {
|
||||
display: flex;
|
||||
gap: Spacings.$spacing03;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.message_wrapper {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing02;
|
||||
}
|
||||
}
|
@ -1,109 +0,0 @@
|
||||
"use client";
|
||||
import { redirect, usePathname } from "next/navigation";
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
import { Assistant } from "@/lib/api/assistants/types";
|
||||
import { useAssistants } from "@/lib/api/assistants/useAssistants";
|
||||
import { PageHeader } from "@/lib/components/PageHeader/PageHeader";
|
||||
import { BrainCard } from "@/lib/components/ui/BrainCard/BrainCard";
|
||||
import { MessageInfoBox } from "@/lib/components/ui/MessageInfoBox/MessageInfoBox";
|
||||
import { useSupabase } from "@/lib/context/SupabaseProvider";
|
||||
import { redirectToLogin } from "@/lib/router/redirectToLogin";
|
||||
|
||||
import { AssistantModal } from "./AssistantModal/AssistantModal";
|
||||
import styles from "./page.module.scss";
|
||||
|
||||
const Assistants = (): JSX.Element => {
|
||||
const pathname = usePathname();
|
||||
const { session } = useSupabase();
|
||||
const [assistants, setAssistants] = useState<Assistant[]>([]);
|
||||
const [assistantModalOpened, setAssistantModalOpened] =
|
||||
useState<boolean>(false);
|
||||
const [currentAssistant, setCurrentAssistant] = useState<Assistant | null>(
|
||||
null
|
||||
);
|
||||
|
||||
const { getAssistants } = useAssistants();
|
||||
|
||||
useEffect(() => {
|
||||
// REMOVE FOR NOW ACCESS TO QUIVR ASSISTANTS
|
||||
redirect("/search");
|
||||
if (session === null) {
|
||||
redirectToLogin();
|
||||
}
|
||||
|
||||
void (async () => {
|
||||
try {
|
||||
const res = await getAssistants();
|
||||
if (res) {
|
||||
setAssistants(res);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
}
|
||||
})();
|
||||
}, [pathname, session]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className={styles.page_header}>
|
||||
<PageHeader
|
||||
iconName="assistant"
|
||||
label="Quivr Assistants"
|
||||
buttons={[]}
|
||||
/>
|
||||
<div className={styles.content_wrapper}>
|
||||
<MessageInfoBox type="info">
|
||||
<div className={styles.message_wrapper}>
|
||||
<span>
|
||||
A Quivr Assistant is an AI agent that apply specific processes
|
||||
to an input in order to generate a usable output.
|
||||
</span>
|
||||
<span>
|
||||
For now, you can try the summary assistant, that summarizes a
|
||||
document and send the result by email or upload it in one of
|
||||
your brains.
|
||||
</span>
|
||||
<span> But don't worry! Other assistants are cooking!</span>
|
||||
</div>
|
||||
</MessageInfoBox>
|
||||
<MessageInfoBox type="warning">
|
||||
<div className={styles.message_wrapper}>
|
||||
<span>
|
||||
<strong>Feature still in Beta.</strong> Please provide feedbacks
|
||||
on the chat below!
|
||||
</span>
|
||||
</div>
|
||||
</MessageInfoBox>
|
||||
<div className={styles.assistants_grid}>
|
||||
{assistants.map((assistant) => {
|
||||
return (
|
||||
<BrainCard
|
||||
tooltip={assistant.description}
|
||||
brainName={assistant.name}
|
||||
tags={assistant.tags}
|
||||
imageUrl={assistant.icon_url}
|
||||
callback={() => {
|
||||
setAssistantModalOpened(true);
|
||||
setCurrentAssistant(assistant);
|
||||
}}
|
||||
key={assistant.name}
|
||||
cardKey={assistant.name}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{currentAssistant && (
|
||||
<AssistantModal
|
||||
isOpen={assistantModalOpened}
|
||||
setIsOpen={setAssistantModalOpened}
|
||||
assistant={currentAssistant}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default Assistants;
|
@ -7,6 +7,7 @@
|
||||
}
|
||||
|
||||
.markdown {
|
||||
font-size: Typography.$small;
|
||||
p {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
|
@ -3,9 +3,7 @@
|
||||
@import "tailwindcss/utilities";
|
||||
@import './colors.css';
|
||||
|
||||
* {
|
||||
@apply scrollbar;
|
||||
}
|
||||
|
||||
|
||||
main {
|
||||
@apply max-w-screen-xl mx-auto flex flex-col;
|
||||
@ -64,6 +62,7 @@ div:focus {
|
||||
--background-blur: rgba(0, 0, 0, 0.9);
|
||||
--background-success: var(--success-lightest);
|
||||
--background-error: var(--dangerous-lightest);
|
||||
--background-pending: var(--background-3);
|
||||
|
||||
/* Borders */
|
||||
--border-0: var(--grey-5);
|
||||
@ -101,6 +100,7 @@ body.dark_mode {
|
||||
--background-blur: rgba(0, 0, 0, 0.9);
|
||||
--background-success: var(--black-5);
|
||||
--background-error: var(--black-5);
|
||||
--background-pending: var(--black-5);
|
||||
|
||||
/* Borders */
|
||||
--border-0: var(--black-5);
|
||||
|
@ -0,0 +1,41 @@
|
||||
@use "styles/Radius.module.scss";
|
||||
@use "styles/Spacings.module.scss";
|
||||
@use "styles/Typography.module.scss";
|
||||
|
||||
.assistant_tab_wrapper {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: Spacings.$spacing05;
|
||||
border-radius: Radius.$normal;
|
||||
border: 1px solid var(--border-0);
|
||||
padding: Spacings.$spacing05;
|
||||
width: 250px;
|
||||
cursor: pointer;
|
||||
height: 100%;
|
||||
|
||||
&.disabled {
|
||||
pointer-events: none;
|
||||
opacity: 0.3;
|
||||
}
|
||||
|
||||
.header {
|
||||
display: flex;
|
||||
align-self: flex-start;
|
||||
align-items: center;
|
||||
gap: Spacings.$spacing03;
|
||||
|
||||
.title {
|
||||
@include Typography.H3;
|
||||
}
|
||||
}
|
||||
|
||||
.description {
|
||||
font-size: Typography.$small;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
&:hover {
|
||||
background-color: var(--background-3);
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
"use client";
|
||||
|
||||
import { Icon } from "@/lib/components/ui/Icon/Icon";
|
||||
|
||||
import styles from "./AssistantCard.module.scss";
|
||||
|
||||
import { Assistant } from "../../types/assistant";
|
||||
|
||||
interface AssistantCardProps {
|
||||
assistant: Assistant;
|
||||
}
|
||||
|
||||
const AssistantCard = ({ assistant }: AssistantCardProps): JSX.Element => {
|
||||
return (
|
||||
<div
|
||||
className={`${styles.assistant_tab_wrapper} ${
|
||||
assistant.tags.includes("Disabled") ? styles.disabled : ""
|
||||
}`}
|
||||
>
|
||||
<div className={styles.header}>
|
||||
<Icon name="assistant" color="black" size="normal" />
|
||||
<span className={styles.title}>{assistant.name}</span>
|
||||
</div>
|
||||
<span className={styles.description}>{assistant.description}</span>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default AssistantCard;
|
@ -0,0 +1,62 @@
|
||||
@use "styles/Spacings.module.scss";
|
||||
@use "styles/Typography.module.scss";
|
||||
|
||||
.assistant_tab_wrapper {
|
||||
height: 100%;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: space-between;
|
||||
|
||||
.content_section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing06;
|
||||
|
||||
.title {
|
||||
@include Typography.H2;
|
||||
}
|
||||
|
||||
.assistant_choice_wrapper {
|
||||
display: flex;
|
||||
gap: Spacings.$spacing05;
|
||||
align-items: stretch;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
}
|
||||
|
||||
.form_wrapper {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing06;
|
||||
|
||||
.title {
|
||||
@include Typography.H2;
|
||||
}
|
||||
|
||||
.file_inputs_wrapper {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
width: 100%;
|
||||
gap: Spacings.$spacing05;
|
||||
|
||||
.file_input_wrapper {
|
||||
width: 100%;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing03;
|
||||
|
||||
.file_header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: Spacings.$spacing03;
|
||||
font-size: Typography.$small;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.buttons_wrapper {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
}
|
||||
}
|
267
frontend/app/quality-assistant/AssistantTab/AssistantTab.tsx
Normal file
267
frontend/app/quality-assistant/AssistantTab/AssistantTab.tsx
Normal file
@ -0,0 +1,267 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
import { useAssistants } from "@/lib/api/assistants/useAssistants";
|
||||
import { FileInput } from "@/lib/components/ui/FileInput/FileInput";
|
||||
import { Icon } from "@/lib/components/ui/Icon/Icon";
|
||||
import QuivrButton from "@/lib/components/ui/QuivrButton/QuivrButton";
|
||||
|
||||
import AssistantCard from "./AssistantCard/AssistantCard";
|
||||
import styles from "./AssistantTab.module.scss";
|
||||
import BooleansInputs from "./BooleansInputs/BooleansInputs";
|
||||
import SelectorsInputs from "./SelectorsInput/SelectorsInputs";
|
||||
|
||||
import { Assistant, ProcessAssistantData } from "../types/assistant";
|
||||
|
||||
export interface ProcessAssistantInput {
|
||||
input: ProcessAssistantData;
|
||||
files: File[];
|
||||
}
|
||||
|
||||
interface AssistantTabProps {
|
||||
setSelectedTab: (tab: string) => void;
|
||||
}
|
||||
|
||||
const FILE_TYPES = ["pdf", "docx", "doc", "txt"];
|
||||
|
||||
const useAssistantData = () => {
|
||||
const [assistants, setAssistants] = useState<Assistant[]>([]);
|
||||
const [assistantChoosed, setAssistantChoosed] = useState<
|
||||
Assistant | undefined
|
||||
>(undefined);
|
||||
const { getAssistants } = useAssistants();
|
||||
|
||||
useEffect(() => {
|
||||
void (async () => {
|
||||
try {
|
||||
const res = await getAssistants();
|
||||
setAssistants(res);
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
}
|
||||
})();
|
||||
}, []);
|
||||
|
||||
return { assistants, assistantChoosed, setAssistantChoosed };
|
||||
};
|
||||
|
||||
const useFormStates = (assistantChoosed: Assistant | undefined) => {
|
||||
const [booleanStates, setBooleanStates] = useState<{
|
||||
[key: string]: boolean | null;
|
||||
}>({});
|
||||
const [selectTextStates, setSelectTextStates] = useState<{
|
||||
[key: string]: string | null;
|
||||
}>({});
|
||||
const [fileStates, setFileStates] = useState<{ [key: string]: File }>({});
|
||||
const [isFormValid, setIsFormValid] = useState<boolean>(false);
|
||||
|
||||
useEffect(() => {
|
||||
if (assistantChoosed?.inputs.booleans) {
|
||||
const initialBooleanStates = assistantChoosed.inputs.booleans.reduce(
|
||||
(acc, input) => ({ ...acc, [input.key]: false }),
|
||||
{}
|
||||
);
|
||||
setBooleanStates(initialBooleanStates);
|
||||
}
|
||||
if (assistantChoosed?.inputs.select_texts) {
|
||||
const initialSelectTextStates =
|
||||
assistantChoosed.inputs.select_texts.reduce(
|
||||
(acc, input) => ({ ...acc, [input.key]: input.options[0] }),
|
||||
{}
|
||||
);
|
||||
setSelectTextStates(initialSelectTextStates);
|
||||
}
|
||||
}, [assistantChoosed]);
|
||||
|
||||
return {
|
||||
booleanStates,
|
||||
setBooleanStates,
|
||||
selectTextStates,
|
||||
setSelectTextStates,
|
||||
fileStates,
|
||||
setFileStates,
|
||||
isFormValid,
|
||||
setIsFormValid,
|
||||
};
|
||||
};
|
||||
|
||||
const validateForm = (
|
||||
assistantChoosed: Assistant | undefined,
|
||||
booleanStates: { [x: string]: boolean | null },
|
||||
fileStates: { [x: string]: File | undefined },
|
||||
selectTextStates: { [x: string]: string | null }
|
||||
) => {
|
||||
if (!assistantChoosed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const allBooleansSet =
|
||||
assistantChoosed.inputs.booleans?.every(
|
||||
(input) =>
|
||||
booleanStates[input.key] !== undefined &&
|
||||
booleanStates[input.key] !== null
|
||||
) ?? true;
|
||||
|
||||
const allFilesSet = assistantChoosed.inputs.files.every(
|
||||
(input) => fileStates[input.key] !== undefined
|
||||
);
|
||||
|
||||
const allSelectTextsSet =
|
||||
assistantChoosed.inputs.select_texts?.every(
|
||||
(input) =>
|
||||
selectTextStates[input.key] !== undefined &&
|
||||
selectTextStates[input.key] !== null
|
||||
) ?? true;
|
||||
|
||||
return allBooleansSet && allFilesSet && allSelectTextsSet;
|
||||
};
|
||||
|
||||
const AssistantTab = ({ setSelectedTab }: AssistantTabProps): JSX.Element => {
|
||||
const { assistants, assistantChoosed, setAssistantChoosed } =
|
||||
useAssistantData();
|
||||
const {
|
||||
booleanStates,
|
||||
setBooleanStates,
|
||||
selectTextStates,
|
||||
setSelectTextStates,
|
||||
fileStates,
|
||||
setFileStates,
|
||||
isFormValid,
|
||||
setIsFormValid,
|
||||
} = useFormStates(assistantChoosed);
|
||||
const { processTask } = useAssistants();
|
||||
const [loading, setLoading] = useState<boolean>(false);
|
||||
|
||||
const handleFileChange = (key: string, file: File) => {
|
||||
setFileStates((prevState) => ({
|
||||
...prevState,
|
||||
[key]: file,
|
||||
}));
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
setIsFormValid(
|
||||
validateForm(
|
||||
assistantChoosed,
|
||||
booleanStates,
|
||||
fileStates,
|
||||
selectTextStates
|
||||
)
|
||||
);
|
||||
}, [booleanStates, fileStates, selectTextStates, assistantChoosed]);
|
||||
|
||||
const handleSubmit = async () => {
|
||||
if (assistantChoosed) {
|
||||
const processAssistantData: ProcessAssistantData = {
|
||||
id: assistantChoosed.id,
|
||||
name: assistantChoosed.name,
|
||||
inputs: {
|
||||
files: Object.keys(fileStates).map((key) => ({
|
||||
key,
|
||||
value: fileStates[key].name,
|
||||
})),
|
||||
booleans: Object.keys(booleanStates).map((key) => ({
|
||||
key,
|
||||
value: booleanStates[key] ?? null,
|
||||
})),
|
||||
select_texts: Object.keys(selectTextStates).map((key) => ({
|
||||
key,
|
||||
value: selectTextStates[key],
|
||||
})),
|
||||
},
|
||||
};
|
||||
|
||||
const processAssistantInput: ProcessAssistantInput = {
|
||||
input: processAssistantData,
|
||||
files: Object.values(fileStates),
|
||||
};
|
||||
|
||||
setLoading(true);
|
||||
await processTask(processAssistantInput);
|
||||
setSelectedTab("Process");
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const resetForm = () => {
|
||||
setBooleanStates({});
|
||||
setSelectTextStates({});
|
||||
setFileStates({});
|
||||
setIsFormValid(false);
|
||||
};
|
||||
|
||||
const handleBack = () => {
|
||||
resetForm();
|
||||
setAssistantChoosed(undefined);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className={styles.assistant_tab_wrapper}>
|
||||
{!assistantChoosed ? (
|
||||
<div className={styles.content_section}>
|
||||
<span className={styles.title}>Choose an assistant</span>
|
||||
<div className={styles.assistant_choice_wrapper}>
|
||||
{assistants.map((assistant, index) => (
|
||||
<div key={index} onClick={() => setAssistantChoosed(assistant)}>
|
||||
<AssistantCard assistant={assistant} />
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className={styles.form_wrapper}>
|
||||
<span className={styles.title}>{assistantChoosed.name}</span>
|
||||
<div className={styles.file_inputs_wrapper}>
|
||||
{assistantChoosed.inputs.files.map((input, index) => (
|
||||
<div className={styles.file_input_wrapper} key={index}>
|
||||
<div className={styles.file_header}>
|
||||
<Icon name="file" color="black" size="small" />
|
||||
<span>{input.key}</span>
|
||||
</div>
|
||||
<FileInput
|
||||
label={input.key}
|
||||
onFileChange={(file) => handleFileChange(input.key, file)}
|
||||
acceptedFileTypes={FILE_TYPES}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
<SelectorsInputs
|
||||
selectTexts={assistantChoosed.inputs.select_texts ?? []}
|
||||
selectTextStates={selectTextStates}
|
||||
setSelectTextStates={setSelectTextStates}
|
||||
/>
|
||||
<BooleansInputs
|
||||
booleans={assistantChoosed.inputs.booleans ?? []}
|
||||
conditionalInputs={assistantChoosed.inputs.conditional_inputs}
|
||||
booleanStates={booleanStates}
|
||||
setBooleanStates={setBooleanStates}
|
||||
selectTextStates={selectTextStates}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
{assistantChoosed && (
|
||||
<div className={styles.buttons_wrapper}>
|
||||
<QuivrButton
|
||||
iconName="chevronLeft"
|
||||
label="Back"
|
||||
color="primary"
|
||||
onClick={() => handleBack()}
|
||||
/>
|
||||
<QuivrButton
|
||||
iconName="chevronRight"
|
||||
label="EXECUTE"
|
||||
color="primary"
|
||||
important={true}
|
||||
onClick={handleSubmit}
|
||||
isLoading={loading}
|
||||
disabled={!isFormValid}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default AssistantTab;
|
@ -0,0 +1,5 @@
|
||||
@use "styles/Variables.module.scss";
|
||||
|
||||
.boolean_inputs_wrapper {
|
||||
width: Variables.$assistantInputWidth;
|
||||
}
|
@ -0,0 +1,74 @@
|
||||
"use client";
|
||||
|
||||
import { Checkbox } from "@/lib/components/ui/Checkbox/Checkbox";
|
||||
|
||||
import styles from "./BooleansInputs.module.scss";
|
||||
|
||||
import { ConditionalInput } from "../../types/assistant";
|
||||
|
||||
interface BooleansInputsProps {
|
||||
booleans: { key: string; description: string }[];
|
||||
conditionalInputs?: ConditionalInput[];
|
||||
booleanStates: { [key: string]: boolean | null };
|
||||
setBooleanStates: React.Dispatch<
|
||||
React.SetStateAction<{ [key: string]: boolean | null }>
|
||||
>;
|
||||
selectTextStates: { [key: string]: string | null };
|
||||
}
|
||||
|
||||
const BooleansInputs = ({
|
||||
booleans,
|
||||
conditionalInputs,
|
||||
booleanStates,
|
||||
setBooleanStates,
|
||||
selectTextStates,
|
||||
}: BooleansInputsProps): JSX.Element => {
|
||||
const handleCheckboxChange = (key: string, checked: boolean) => {
|
||||
setBooleanStates((prevState: { [key: string]: boolean | null }) => ({
|
||||
...prevState,
|
||||
[key]: checked,
|
||||
}));
|
||||
};
|
||||
|
||||
const checkCondition = (conditionalInput: ConditionalInput): boolean => {
|
||||
const { key, condition, value } = conditionalInput;
|
||||
const targetValue =
|
||||
booleanStates[key]?.toString() ?? selectTextStates[key] ?? "";
|
||||
|
||||
if (condition === "equals") {
|
||||
return targetValue === value;
|
||||
} else {
|
||||
return targetValue !== value;
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className={styles.boolean_inputs_wrapper}>
|
||||
{booleans.map((input, index) => {
|
||||
const shouldShow = !!conditionalInputs?.every((conditionalInput) => {
|
||||
if (conditionalInput.conditional_key === input.key) {
|
||||
return checkCondition(conditionalInput);
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (!shouldShow) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<div key={index} className={styles.boolean_input}>
|
||||
<Checkbox
|
||||
label={input.key}
|
||||
checked={!!booleanStates[input.key]}
|
||||
setChecked={(checked) => handleCheckboxChange(input.key, checked)}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default BooleansInputs;
|
@ -0,0 +1,5 @@
|
||||
@use "styles/Variables.module.scss";
|
||||
|
||||
.select_texts_wrapper {
|
||||
width: Variables.$assistantInputWidth;
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
import React from "react";
|
||||
|
||||
import { SingleSelector } from "@/lib/components/ui/SingleSelector/SingleSelector";
|
||||
|
||||
import styles from "./SelectorsInputs.module.scss";
|
||||
|
||||
interface SelectorsInputsProps {
|
||||
selectTexts: { key: string; options: string[] }[];
|
||||
selectTextStates: { [key: string]: string | null };
|
||||
setSelectTextStates: React.Dispatch<
|
||||
React.SetStateAction<{ [key: string]: string | null }>
|
||||
>;
|
||||
}
|
||||
|
||||
const SelectorsInputs = ({
|
||||
selectTexts,
|
||||
selectTextStates,
|
||||
setSelectTextStates,
|
||||
}: SelectorsInputsProps): JSX.Element => {
|
||||
const handleSelectTextChange = (key: string, value: string) => {
|
||||
setSelectTextStates((prevState) => ({
|
||||
...prevState,
|
||||
[key]: value,
|
||||
}));
|
||||
};
|
||||
|
||||
return (
|
||||
<div className={styles.select_texts_wrapper}>
|
||||
{selectTexts.map((input, index) => (
|
||||
<div key={index} className={styles.select_text}>
|
||||
<SingleSelector
|
||||
iconName="brain"
|
||||
placeholder={input.key}
|
||||
options={input.options.map((option) => {
|
||||
return { label: option, value: option };
|
||||
})}
|
||||
onChange={(value) => handleSelectTextChange(input.key, value)}
|
||||
selectedOption={{
|
||||
label: selectTextStates[input.key] ?? input.options[0],
|
||||
value: selectTextStates[input.key] ?? input.options[0],
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default SelectorsInputs;
|
@ -0,0 +1,193 @@
|
||||
@use "styles/Radius.module.scss";
|
||||
@use "styles/Spacings.module.scss";
|
||||
@use "styles/Typography.module.scss";
|
||||
@use "styles/Variables.module.scss";
|
||||
|
||||
.process_wrapper {
|
||||
padding-inline: Spacings.$spacing06;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
gap: Spacings.$spacing02;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
border: 1px solid var(--border-0);
|
||||
padding-block: Spacings.$spacing03;
|
||||
position: relative;
|
||||
overflow: visible;
|
||||
font-size: Typography.$small;
|
||||
border-bottom: none;
|
||||
|
||||
&.last {
|
||||
border-radius: 0 0 Radius.$normal Radius.$normal;
|
||||
border-bottom: 1px solid var(--border-0);
|
||||
}
|
||||
|
||||
&.clickable {
|
||||
cursor: pointer;
|
||||
|
||||
&:hover {
|
||||
background-color: var(--background-1);
|
||||
}
|
||||
}
|
||||
|
||||
.left {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: calc(Spacings.$spacing06 + 6px);
|
||||
overflow: hidden;
|
||||
|
||||
.left_fields {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
overflow: hidden;
|
||||
|
||||
.assistant {
|
||||
font-size: Typography.$small;
|
||||
min-width: Variables.$menuSectionWidth;
|
||||
max-width: Variables.$menuSectionWidth;
|
||||
}
|
||||
|
||||
.files {
|
||||
font-size: Typography.$tiny;
|
||||
color: var(--text-4);
|
||||
overflow: hidden;
|
||||
|
||||
.filename {
|
||||
@include Typography.EllipsisOverflow;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.right {
|
||||
display: flex;
|
||||
gap: Spacings.$spacing05;
|
||||
align-items: center;
|
||||
|
||||
.date {
|
||||
font-size: Typography.$very_tiny;
|
||||
width: 150px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
@include Typography.EllipsisOverflow;
|
||||
}
|
||||
|
||||
.status {
|
||||
width: 100px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
@include Typography.EllipsisOverflow;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.markdown {
|
||||
p {
|
||||
margin: 0;
|
||||
padding-block: Spacings.$spacing06;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
ul {
|
||||
list-style-type: disc;
|
||||
margin-top: 0;
|
||||
padding: 0;
|
||||
margin-left: Spacings.$spacing05;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing03;
|
||||
|
||||
li {
|
||||
white-space-collapse: collapse;
|
||||
}
|
||||
}
|
||||
|
||||
ol {
|
||||
list-style-type: decimal;
|
||||
padding-left: Spacings.$spacing05;
|
||||
list-style-position: outside;
|
||||
|
||||
li {
|
||||
white-space-collapse: collapse;
|
||||
}
|
||||
}
|
||||
|
||||
h1 {
|
||||
@include Typography.H1;
|
||||
}
|
||||
|
||||
h2 {
|
||||
@include Typography.H2;
|
||||
}
|
||||
|
||||
h3 {
|
||||
@include Typography.H3;
|
||||
}
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin: Spacings.$spacing05 0;
|
||||
}
|
||||
|
||||
thead {
|
||||
background-color: var(--background-1);
|
||||
}
|
||||
|
||||
tr {
|
||||
border-bottom: 1px solid var(--border-0);
|
||||
}
|
||||
|
||||
th,
|
||||
td {
|
||||
padding: Spacings.$spacing03;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
th {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
pre[class*="language-"] {
|
||||
background: var(--background-5);
|
||||
color: var(--white-0);
|
||||
padding: Spacings.$spacing05;
|
||||
border-radius: Radius.$normal;
|
||||
overflow: auto;
|
||||
margin: 0 0 Spacings.$spacing05 0;
|
||||
white-space: pre-wrap;
|
||||
font-size: Typography.$small;
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
}
|
||||
|
||||
code[class*="language-"] {
|
||||
background: none;
|
||||
color: inherit;
|
||||
border-radius: Radius.$normal;
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: Typography.$small;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
code {
|
||||
background: var(--background-5);
|
||||
color: var(--white-0);
|
||||
padding: Spacings.$spacing01;
|
||||
border-radius: Radius.$normal;
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: Typography.$medium;
|
||||
}
|
||||
|
||||
.code_block {
|
||||
.icon {
|
||||
position: absolute;
|
||||
right: 0;
|
||||
padding: Spacings.$spacing05;
|
||||
}
|
||||
code {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,173 @@
|
||||
"use client";
|
||||
|
||||
import { capitalCase } from "change-case";
|
||||
import format from "date-fns/format";
|
||||
import { fr } from "date-fns/locale";
|
||||
import { saveAs } from "file-saver";
|
||||
import { useState } from "react";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
import gfm from "remark-gfm";
|
||||
|
||||
import { useAssistants } from "@/lib/api/assistants/useAssistants";
|
||||
import { Checkbox } from "@/lib/components/ui/Checkbox/Checkbox";
|
||||
import { Icon } from "@/lib/components/ui/Icon/Icon";
|
||||
import { LoaderIcon } from "@/lib/components/ui/LoaderIcon/LoaderIcon";
|
||||
import { Modal } from "@/lib/components/ui/Modal/Modal";
|
||||
import { Tag } from "@/lib/components/ui/Tag/Tag";
|
||||
import { useDevice } from "@/lib/hooks/useDevice";
|
||||
|
||||
import styles from "./ProcessLine.module.scss";
|
||||
|
||||
import { Process } from "../../types/process";
|
||||
|
||||
interface ProcessLineProps {
|
||||
process: Process;
|
||||
last?: boolean;
|
||||
selected: boolean;
|
||||
setSelected: (selected: boolean, event: React.MouseEvent) => void;
|
||||
}
|
||||
|
||||
const ProcessLine = ({
|
||||
process,
|
||||
last,
|
||||
selected,
|
||||
setSelected,
|
||||
}: ProcessLineProps): JSX.Element => {
|
||||
const [showResult, setShowResult] = useState(false);
|
||||
const [downloadUrl, setDownloadUrl] = useState<string | null>(null);
|
||||
const { isMobile } = useDevice();
|
||||
const { downloadTaskResult } = useAssistants();
|
||||
|
||||
const handleMouseEnter = async () => {
|
||||
if (process.status === "completed" && !downloadUrl) {
|
||||
const res: string = await downloadTaskResult(process.id);
|
||||
setDownloadUrl(res);
|
||||
}
|
||||
};
|
||||
|
||||
const handleDownload = async () => {
|
||||
if (downloadUrl) {
|
||||
const response = await fetch(
|
||||
downloadUrl.replace("host.docker.internal", "localhost")
|
||||
);
|
||||
const blob = await response.blob();
|
||||
const formattedDate = format(
|
||||
new Date(process.creation_time),
|
||||
"yyyy-MM-dd",
|
||||
{ locale: fr }
|
||||
);
|
||||
const fileName = `${process.assistant_name}_${formattedDate}.pdf`;
|
||||
saveAs(blob, fileName);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<div
|
||||
className={`${styles.process_wrapper} ${last ? styles.last : ""} ${
|
||||
process.status === "completed" ? styles.clickable : ""
|
||||
}`}
|
||||
onClick={() => {
|
||||
if (process.status === "completed") {
|
||||
setShowResult(!showResult);
|
||||
}
|
||||
}}
|
||||
onMouseEnter={() => void handleMouseEnter()}
|
||||
>
|
||||
<div className={styles.left}>
|
||||
<Checkbox
|
||||
checked={selected}
|
||||
setChecked={(checked, event) => setSelected(checked, event)}
|
||||
/>
|
||||
<div className={styles.left_fields}>
|
||||
<span className={styles.assistant}>{process.assistant_name}</span>
|
||||
<span className={styles.files}>
|
||||
{process.task_metadata.input_files.map((file, index) => (
|
||||
<div className={styles.filename} key={index}>
|
||||
<span>{file}</span>
|
||||
</div>
|
||||
))}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<div className={styles.right}>
|
||||
{!isMobile && (
|
||||
<>
|
||||
<span className={styles.date}>
|
||||
{format(
|
||||
new Date(process.creation_time),
|
||||
"d MMMM yyyy '-' HH:mm:ss",
|
||||
{
|
||||
locale: fr,
|
||||
}
|
||||
)}
|
||||
</span>
|
||||
<div className={styles.status}>
|
||||
<Tag
|
||||
name={capitalCase(process.status)}
|
||||
color={
|
||||
process.status === "error"
|
||||
? "dangerous"
|
||||
: process.status === "processing"
|
||||
? "primary"
|
||||
: process.status === "completed"
|
||||
? "success"
|
||||
: "grey"
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
<div
|
||||
onClick={(event: React.MouseEvent<HTMLDivElement>) => {
|
||||
event.stopPropagation();
|
||||
}}
|
||||
>
|
||||
{process.status === "processing" ? (
|
||||
<LoaderIcon size="normal" color="primary" />
|
||||
) : downloadUrl ? (
|
||||
<div onClick={() => void handleDownload()}>
|
||||
<Icon
|
||||
name="download"
|
||||
size="normal"
|
||||
color="black"
|
||||
handleHover={process.status === "completed"}
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<Icon
|
||||
name={
|
||||
process.status === "completed"
|
||||
? "download"
|
||||
: process.status === "error"
|
||||
? "warning"
|
||||
: "waiting"
|
||||
}
|
||||
size="normal"
|
||||
color="black"
|
||||
handleHover={process.status === "completed"}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Modal
|
||||
size="big"
|
||||
isOpen={showResult}
|
||||
setOpen={setShowResult}
|
||||
CloseTrigger={<div />}
|
||||
>
|
||||
{process.answer && (
|
||||
<div className={styles.markdown}>
|
||||
<ReactMarkdown remarkPlugins={[gfm]}>
|
||||
{process.answer.replace(/\n/g, "\n")}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
)}
|
||||
</Modal>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default ProcessLine;
|
122
frontend/app/quality-assistant/ProcessTab/ProcessTab.module.scss
Normal file
122
frontend/app/quality-assistant/ProcessTab/ProcessTab.module.scss
Normal file
@ -0,0 +1,122 @@
|
||||
@use "styles/Radius.module.scss";
|
||||
@use "styles/ScreenSizes.module.scss";
|
||||
@use "styles/Spacings.module.scss";
|
||||
@use "styles/Typography.module.scss";
|
||||
@use "styles/Variables.module.scss";
|
||||
|
||||
.process_tab_wrapper {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing05;
|
||||
padding-bottom: Spacings.$spacing10;
|
||||
border-radius: Radius.$normal;
|
||||
|
||||
@media screen and (max-width: ScreenSizes.$small) {
|
||||
overflow-x: auto;
|
||||
}
|
||||
|
||||
.title {
|
||||
@include Typography.H2;
|
||||
}
|
||||
|
||||
.table_header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
gap: Spacings.$spacing03;
|
||||
|
||||
.search {
|
||||
width: 250px;
|
||||
}
|
||||
}
|
||||
|
||||
.first_line {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
padding-left: calc(Spacings.$spacing06);
|
||||
padding-right: calc(Spacings.$spacing11 + 6px);
|
||||
padding-block: Spacings.$spacing02;
|
||||
font-weight: 500;
|
||||
background-color: var(--background-1);
|
||||
font-size: Typography.$small;
|
||||
border: 1px solid var(--border-0);
|
||||
border-radius: Radius.$normal Radius.$normal 0 0;
|
||||
border-bottom: none;
|
||||
|
||||
&.empty {
|
||||
border: 1px solid var(--border-0);
|
||||
border-radius: Radius.$normal;
|
||||
}
|
||||
|
||||
.left {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: calc(Spacings.$spacing06 + 6px);
|
||||
|
||||
.left_fields {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
|
||||
.field {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: Spacings.$spacing02;
|
||||
cursor: pointer;
|
||||
|
||||
.icon {
|
||||
visibility: hidden;
|
||||
}
|
||||
|
||||
&:hover {
|
||||
.icon {
|
||||
visibility: visible;
|
||||
}
|
||||
}
|
||||
|
||||
&.assistant {
|
||||
width: Variables.$menuSectionWidth;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.right {
|
||||
display: flex;
|
||||
gap: calc(Spacings.$spacing12 + Spacings.$spacing06 + 2px);
|
||||
|
||||
.status {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: Spacings.$spacing02;
|
||||
cursor: pointer;
|
||||
|
||||
.icon {
|
||||
visibility: hidden;
|
||||
}
|
||||
|
||||
&:hover {
|
||||
.icon {
|
||||
visibility: visible;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.date {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: Spacings.$spacing02;
|
||||
cursor: pointer;
|
||||
|
||||
.icon {
|
||||
visibility: hidden;
|
||||
}
|
||||
|
||||
&:hover {
|
||||
.icon {
|
||||
visibility: visible;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
239
frontend/app/quality-assistant/ProcessTab/ProcessTab.tsx
Normal file
239
frontend/app/quality-assistant/ProcessTab/ProcessTab.tsx
Normal file
@ -0,0 +1,239 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
import { useAssistants } from "@/lib/api/assistants/useAssistants";
|
||||
import { Checkbox } from "@/lib/components/ui/Checkbox/Checkbox";
|
||||
import { Icon } from "@/lib/components/ui/Icon/Icon";
|
||||
import { QuivrButton } from "@/lib/components/ui/QuivrButton/QuivrButton";
|
||||
import { TextInput } from "@/lib/components/ui/TextInput/TextInput";
|
||||
import { useSupabase } from "@/lib/context/SupabaseProvider";
|
||||
import { filterAndSort, updateSelectedItems } from "@/lib/helpers/table";
|
||||
import { useDevice } from "@/lib/hooks/useDevice";
|
||||
|
||||
import ProcessLine from "./Process/ProcessLine";
|
||||
import styles from "./ProcessTab.module.scss";
|
||||
|
||||
import { Process } from "../types/process";
|
||||
|
||||
const ProcessTab = (): JSX.Element => {
|
||||
const [processes, setProcesses] = useState<Process[]>([]);
|
||||
const [searchQuery, setSearchQuery] = useState<string>("");
|
||||
const [selectedProcess, setSelectedProcess] = useState<Process[]>([]);
|
||||
const [allChecked, setAllChecked] = useState<boolean>(false);
|
||||
const [sortConfig, setSortConfig] = useState<{
|
||||
key: keyof Process;
|
||||
direction: "ascending" | "descending";
|
||||
}>({ key: "creation_time", direction: "descending" });
|
||||
const [filteredProcess, setFilteredProcess] = useState<Process[]>([]);
|
||||
const [lastSelectedIndex, setLastSelectedIndex] = useState<number | null>(
|
||||
null
|
||||
);
|
||||
const [loading, setLoading] = useState<boolean>(false);
|
||||
|
||||
const { getTasks, deleteTask } = useAssistants();
|
||||
const { supabase } = useSupabase();
|
||||
const { isMobile } = useDevice();
|
||||
|
||||
const loadTasks = async () => {
|
||||
try {
|
||||
const res = await getTasks();
|
||||
setProcesses(res);
|
||||
setFilteredProcess(res);
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
}
|
||||
};
|
||||
|
||||
const handleStatusChange = () => {
|
||||
void loadTasks();
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
void loadTasks();
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
const channel = supabase
|
||||
.channel("tasks")
|
||||
.on(
|
||||
"postgres_changes",
|
||||
{ event: "UPDATE", schema: "public", table: "tasks" },
|
||||
handleStatusChange
|
||||
)
|
||||
.subscribe();
|
||||
|
||||
return () => {
|
||||
void supabase.removeChannel(channel);
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
setFilteredProcess(
|
||||
filterAndSort(
|
||||
processes,
|
||||
searchQuery,
|
||||
sortConfig,
|
||||
(process) => process[sortConfig.key]
|
||||
)
|
||||
);
|
||||
}, [processes, searchQuery, sortConfig]);
|
||||
|
||||
const handleDelete = async () => {
|
||||
setLoading(true);
|
||||
await Promise.all(
|
||||
selectedProcess.map(async (process) => await deleteTask(process.id))
|
||||
);
|
||||
|
||||
const remainingProcesses = processes.filter(
|
||||
(process) =>
|
||||
!selectedProcess.some((selected) => selected.id === process.id)
|
||||
);
|
||||
|
||||
setProcesses(remainingProcesses);
|
||||
setFilteredProcess(
|
||||
filterAndSort(
|
||||
remainingProcesses,
|
||||
searchQuery,
|
||||
sortConfig,
|
||||
(process) => process[sortConfig.key]
|
||||
)
|
||||
);
|
||||
|
||||
setSelectedProcess([]);
|
||||
setAllChecked(false);
|
||||
setLoading(false);
|
||||
};
|
||||
|
||||
const handleSelect = (
|
||||
process: Process,
|
||||
index: number,
|
||||
event: React.MouseEvent
|
||||
) => {
|
||||
const newSelectedProcess = updateSelectedItems<Process>({
|
||||
item: process,
|
||||
index,
|
||||
event,
|
||||
lastSelectedIndex,
|
||||
filteredList: filteredProcess,
|
||||
selectedItems: selectedProcess,
|
||||
});
|
||||
setSelectedProcess(newSelectedProcess.selectedItems);
|
||||
setLastSelectedIndex(newSelectedProcess.lastSelectedIndex);
|
||||
};
|
||||
|
||||
const handleSort = (key: keyof Process) => {
|
||||
setSortConfig((prevSortConfig) => {
|
||||
let direction: "ascending" | "descending" = "ascending";
|
||||
if (
|
||||
prevSortConfig.key === key &&
|
||||
prevSortConfig.direction === "ascending"
|
||||
) {
|
||||
direction = "descending";
|
||||
}
|
||||
|
||||
return { key, direction };
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<div className={styles.process_tab_wrapper}>
|
||||
<span className={styles.title}>My Results</span>
|
||||
<div className={styles.table_header}>
|
||||
<div className={styles.search}>
|
||||
<TextInput
|
||||
iconName="search"
|
||||
label="Search"
|
||||
inputValue={searchQuery}
|
||||
setInputValue={setSearchQuery}
|
||||
small={true}
|
||||
/>
|
||||
</div>
|
||||
<QuivrButton
|
||||
label="Delete"
|
||||
iconName="delete"
|
||||
color="dangerous"
|
||||
disabled={selectedProcess.length === 0}
|
||||
onClick={handleDelete}
|
||||
isLoading={loading}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<div
|
||||
className={`${styles.first_line} ${
|
||||
!filteredProcess.length ? styles.empty : ""
|
||||
}`}
|
||||
>
|
||||
<div className={styles.left}>
|
||||
<Checkbox
|
||||
checked={allChecked}
|
||||
setChecked={(checked) => {
|
||||
setAllChecked(checked);
|
||||
setSelectedProcess(checked ? filteredProcess : []);
|
||||
}}
|
||||
/>
|
||||
<div className={styles.left_fields}>
|
||||
<div
|
||||
className={`${styles.field} ${styles.assistant}`}
|
||||
onClick={() => handleSort("assistant_name")}
|
||||
>
|
||||
Assistant
|
||||
<div className={styles.icon}>
|
||||
<Icon name="sort" size="small" color="black" />
|
||||
</div>
|
||||
</div>
|
||||
<div className={styles.field} onClick={() => handleSort("name")}>
|
||||
Files
|
||||
<div className={styles.icon}>
|
||||
<Icon name="sort" size="small" color="black" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className={styles.right}>
|
||||
{!isMobile && (
|
||||
<>
|
||||
<div
|
||||
className={styles.date}
|
||||
onClick={() => handleSort("creation_time")}
|
||||
>
|
||||
Date
|
||||
<div className={styles.icon}>
|
||||
<Icon name="sort" size="small" color="black" />
|
||||
</div>
|
||||
</div>
|
||||
<div
|
||||
className={styles.status}
|
||||
onClick={() => handleSort("status")}
|
||||
>
|
||||
Statut
|
||||
<div className={styles.icon}>
|
||||
<Icon name="sort" size="small" color="black" />
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className={styles.process_list}>
|
||||
{filteredProcess.map((process, index) => (
|
||||
<div key={process.id} className={styles.process_line}>
|
||||
<ProcessLine
|
||||
process={process}
|
||||
last={index === filteredProcess.length - 1}
|
||||
selected={selectedProcess.some(
|
||||
(item) => item.id === process.id
|
||||
)}
|
||||
setSelected={(_selected, event) =>
|
||||
handleSelect(process, index, event)
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ProcessTab;
|
20
frontend/app/quality-assistant/page.module.scss
Normal file
20
frontend/app/quality-assistant/page.module.scss
Normal file
@ -0,0 +1,20 @@
|
||||
@use "styles/Spacings.module.scss";
|
||||
|
||||
.page_wrapper {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing05;
|
||||
width: 100%;
|
||||
height: 100vh;
|
||||
overflow: hidden;
|
||||
|
||||
.content_wrapper {
|
||||
padding-inline: Spacings.$spacing09;
|
||||
padding-block: Spacings.$spacing05;
|
||||
overflow-y: auto;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: Spacings.$spacing05;
|
||||
height: 100%;
|
||||
}
|
||||
}
|
47
frontend/app/quality-assistant/page.tsx
Normal file
47
frontend/app/quality-assistant/page.tsx
Normal file
@ -0,0 +1,47 @@
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
|
||||
import PageHeader from "@/lib/components/PageHeader/PageHeader";
|
||||
import { Tabs } from "@/lib/components/ui/Tabs/Tabs";
|
||||
import { Tab } from "@/lib/types/Tab";
|
||||
|
||||
import AssistantTab from "./AssistantTab/AssistantTab";
|
||||
import ProcessTab from "./ProcessTab/ProcessTab";
|
||||
import styles from "./page.module.scss";
|
||||
|
||||
const QualityAssistant = (): JSX.Element => {
|
||||
const [selectedTab, setSelectedTab] = useState("Assistants");
|
||||
|
||||
const qualityAssistantTab: Tab[] = [
|
||||
{
|
||||
label: "Assistants",
|
||||
isSelected: selectedTab === "Assistants",
|
||||
onClick: () => setSelectedTab("Assistants"),
|
||||
iconName: "assistant",
|
||||
},
|
||||
{
|
||||
label: "Process",
|
||||
isSelected: selectedTab === "Process",
|
||||
onClick: () => setSelectedTab("Process"),
|
||||
iconName: "waiting",
|
||||
},
|
||||
];
|
||||
|
||||
return (
|
||||
<div className={styles.page_wrapper}>
|
||||
<div className={styles.page_header}>
|
||||
<PageHeader iconName="assistant" label="Assistants" buttons={[]} />
|
||||
</div>
|
||||
<div className={styles.content_wrapper}>
|
||||
<Tabs tabList={qualityAssistantTab} />
|
||||
{selectedTab === "Assistants" && (
|
||||
<AssistantTab setSelectedTab={setSelectedTab} />
|
||||
)}
|
||||
{selectedTab === "Process" && <ProcessTab />}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default QualityAssistant;
|
123
frontend/app/quality-assistant/types/assistant.ts
Normal file
123
frontend/app/quality-assistant/types/assistant.ts
Normal file
@ -0,0 +1,123 @@
|
||||
interface Pricing {
|
||||
cost: number;
|
||||
description: string;
|
||||
}
|
||||
|
||||
interface InputFile {
|
||||
key: string;
|
||||
allowed_extensions: string[];
|
||||
required: boolean;
|
||||
description: string;
|
||||
}
|
||||
|
||||
interface InputUrl {
|
||||
key: string;
|
||||
required: boolean;
|
||||
description: string;
|
||||
}
|
||||
|
||||
interface InputText {
|
||||
key: string;
|
||||
required: boolean;
|
||||
description: string;
|
||||
validation_regex: string;
|
||||
}
|
||||
|
||||
interface InputBoolean {
|
||||
key: string;
|
||||
required: boolean;
|
||||
description: string;
|
||||
}
|
||||
|
||||
interface InputNumber {
|
||||
key: string;
|
||||
required: boolean;
|
||||
description: string;
|
||||
min: number;
|
||||
max: number;
|
||||
increment: number;
|
||||
default: number;
|
||||
}
|
||||
|
||||
interface SelectText {
|
||||
key: string;
|
||||
required: boolean;
|
||||
description: string;
|
||||
options: string[];
|
||||
default: string;
|
||||
}
|
||||
|
||||
interface SelectNumber {
|
||||
key: string;
|
||||
required: boolean;
|
||||
description: string;
|
||||
options: number[];
|
||||
default: number;
|
||||
}
|
||||
|
||||
interface Brain {
|
||||
required: boolean;
|
||||
description: string;
|
||||
type: string;
|
||||
}
|
||||
|
||||
interface Inputs {
|
||||
files: InputFile[];
|
||||
urls: InputUrl[];
|
||||
texts: InputText[];
|
||||
booleans?: InputBoolean[];
|
||||
numbers: InputNumber[];
|
||||
select_texts?: SelectText[];
|
||||
select_numbers: SelectNumber[];
|
||||
brain: Brain;
|
||||
conditional_inputs?: ConditionalInput[];
|
||||
}
|
||||
|
||||
export interface Assistant {
|
||||
id: number;
|
||||
name: string;
|
||||
description: string;
|
||||
pricing: Pricing;
|
||||
tags: string[];
|
||||
input_description: string;
|
||||
output_description: string;
|
||||
inputs: Inputs;
|
||||
icon_url: string;
|
||||
}
|
||||
|
||||
interface ProcessAssistantInputFile {
|
||||
key: string;
|
||||
value: string;
|
||||
}
|
||||
|
||||
export interface ConditionalInput {
|
||||
key: string;
|
||||
conditional_key: string;
|
||||
condition: "equals" | "not_equals";
|
||||
value: string;
|
||||
}
|
||||
|
||||
export interface ProcessAssistantData {
|
||||
id: number;
|
||||
name: string;
|
||||
inputs: {
|
||||
files?: ProcessAssistantInputFile[];
|
||||
urls?: { key: string; value: string }[];
|
||||
texts?: { key: string; value: string }[];
|
||||
booleans?: { key: string; value: boolean | null }[];
|
||||
numbers?: { key: string; value: number }[];
|
||||
select_texts?: { key: string; value: string | null }[];
|
||||
select_numbers?: { key: string; value: number }[];
|
||||
brain?: { value: string };
|
||||
conditional_inputs?: ConditionalInput[];
|
||||
};
|
||||
}
|
||||
|
||||
export interface ProcessAssistantInput {
|
||||
input: ProcessAssistantData;
|
||||
files: File[];
|
||||
}
|
||||
|
||||
export interface ResultDownload {
|
||||
data: string;
|
||||
}
|
13
frontend/app/quality-assistant/types/process.ts
Normal file
13
frontend/app/quality-assistant/types/process.ts
Normal file
@ -0,0 +1,13 @@
|
||||
export interface ProcessMetadata {
|
||||
input_files: string[];
|
||||
}
|
||||
|
||||
export interface Process {
|
||||
answer: string;
|
||||
id: number;
|
||||
name: string;
|
||||
creation_time: string;
|
||||
status: "pending" | "processing" | "completed" | "error";
|
||||
assistant_name: string;
|
||||
task_metadata: ProcessMetadata;
|
||||
}
|
@ -14,6 +14,7 @@
|
||||
margin-left: -(Spacings.$spacing05 + Spacings.$spacing03);
|
||||
gap: Spacings.$spacing03;
|
||||
align-items: center;
|
||||
padding-top: Spacings.$spacing05;
|
||||
|
||||
.tabs {
|
||||
width: 100%;
|
||||
|
@ -4,6 +4,7 @@ import { Checkbox } from "@/lib/components/ui/Checkbox/Checkbox";
|
||||
import { Icon } from "@/lib/components/ui/Icon/Icon";
|
||||
import { QuivrButton } from "@/lib/components/ui/QuivrButton/QuivrButton";
|
||||
import { TextInput } from "@/lib/components/ui/TextInput/TextInput";
|
||||
import { updateSelectedItems } from "@/lib/helpers/table";
|
||||
import { useDevice } from "@/lib/hooks/useDevice";
|
||||
import { isUploadedKnowledge, Knowledge } from "@/lib/types/Knowledge";
|
||||
|
||||
@ -58,51 +59,6 @@ const filterAndSortKnowledge = (
|
||||
return filteredList;
|
||||
};
|
||||
|
||||
const updateSelectedKnowledge = ({
|
||||
knowledge,
|
||||
index,
|
||||
event,
|
||||
lastSelectedIndex,
|
||||
filteredKnowledgeList,
|
||||
selectedKnowledge,
|
||||
}: {
|
||||
knowledge: Knowledge;
|
||||
index: number;
|
||||
event: React.MouseEvent;
|
||||
lastSelectedIndex: number | null;
|
||||
filteredKnowledgeList: Knowledge[];
|
||||
selectedKnowledge: Knowledge[];
|
||||
}): { selectedKnowledge: Knowledge[]; lastSelectedIndex: number | null } => {
|
||||
if (event.shiftKey && lastSelectedIndex !== null) {
|
||||
const start = Math.min(lastSelectedIndex, index);
|
||||
const end = Math.max(lastSelectedIndex, index);
|
||||
const range = filteredKnowledgeList.slice(start, end + 1);
|
||||
|
||||
const newSelected = [...selectedKnowledge];
|
||||
range.forEach((item) => {
|
||||
if (!newSelected.some((selectedItem) => selectedItem.id === item.id)) {
|
||||
newSelected.push(item);
|
||||
}
|
||||
});
|
||||
|
||||
return { selectedKnowledge: newSelected, lastSelectedIndex: index };
|
||||
} else {
|
||||
const isSelected = selectedKnowledge.some(
|
||||
(item) => item.id === knowledge.id
|
||||
);
|
||||
const newSelectedKnowledge = isSelected
|
||||
? selectedKnowledge.filter(
|
||||
(selectedItem) => selectedItem.id !== knowledge.id
|
||||
)
|
||||
: [...selectedKnowledge, knowledge];
|
||||
|
||||
return {
|
||||
selectedKnowledge: newSelectedKnowledge,
|
||||
lastSelectedIndex: isSelected ? null : index,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const KnowledgeTable = React.forwardRef<HTMLDivElement, KnowledgeTableProps>(
|
||||
({ knowledgeList }, ref) => {
|
||||
const [selectedKnowledge, setSelectedKnowledge] = useState<Knowledge[]>([]);
|
||||
@ -131,15 +87,15 @@ const KnowledgeTable = React.forwardRef<HTMLDivElement, KnowledgeTableProps>(
|
||||
index: number,
|
||||
event: React.MouseEvent
|
||||
) => {
|
||||
const newSelectedKnowledge = updateSelectedKnowledge({
|
||||
knowledge,
|
||||
const newSelectedKnowledge = updateSelectedItems<Knowledge>({
|
||||
item: knowledge,
|
||||
index,
|
||||
event,
|
||||
lastSelectedIndex,
|
||||
filteredKnowledgeList,
|
||||
selectedKnowledge,
|
||||
filteredList: filteredKnowledgeList,
|
||||
selectedItems: selectedKnowledge,
|
||||
});
|
||||
setSelectedKnowledge(newSelectedKnowledge.selectedKnowledge);
|
||||
setSelectedKnowledge(newSelectedKnowledge.selectedItems);
|
||||
setLastSelectedIndex(newSelectedKnowledge.lastSelectedIndex);
|
||||
};
|
||||
|
||||
|
@ -1,38 +1,63 @@
|
||||
import { AxiosInstance } from "axios";
|
||||
|
||||
import { Assistant, ProcessAssistantRequest } from "./types";
|
||||
import {
|
||||
Assistant,
|
||||
ProcessAssistantInput,
|
||||
} from "@/app/quality-assistant/types/assistant";
|
||||
import { Process } from "@/app/quality-assistant/types/process";
|
||||
|
||||
export const getAssistants = async (
|
||||
axiosInstance: AxiosInstance
|
||||
): Promise<Assistant[] | undefined> => {
|
||||
return (await axiosInstance.get<Assistant[] | undefined>("/assistants")).data;
|
||||
): Promise<Assistant[]> => {
|
||||
return (await axiosInstance.get<Assistant[]>(`/assistants`)).data;
|
||||
};
|
||||
|
||||
export const processAssistant = async (
|
||||
export const getTasks = async (
|
||||
axiosInstance: AxiosInstance
|
||||
): Promise<Process[]> => {
|
||||
return (await axiosInstance.get<Process[]>(`/assistants/tasks`)).data;
|
||||
};
|
||||
|
||||
export const processTask = async (
|
||||
axiosInstance: AxiosInstance,
|
||||
input: ProcessAssistantRequest,
|
||||
files: File[]
|
||||
): Promise<string | undefined> => {
|
||||
processAssistantInput: ProcessAssistantInput
|
||||
): Promise<string> => {
|
||||
const formData = new FormData();
|
||||
|
||||
formData.append(
|
||||
"input",
|
||||
JSON.stringify({
|
||||
name: input.name,
|
||||
inputs: {
|
||||
files: input.inputs.files,
|
||||
urls: input.inputs.urls,
|
||||
texts: input.inputs.texts,
|
||||
},
|
||||
outputs: input.outputs,
|
||||
})
|
||||
);
|
||||
formData.append("input", JSON.stringify(processAssistantInput.input));
|
||||
|
||||
files.forEach((file) => {
|
||||
formData.append("files", file);
|
||||
processAssistantInput.files.forEach((file) => {
|
||||
if (file instanceof File) {
|
||||
formData.append("files", file);
|
||||
} else {
|
||||
console.error("L'élément n'est pas un fichier valide", file);
|
||||
}
|
||||
});
|
||||
|
||||
return (
|
||||
await axiosInstance.post<string | undefined>("/assistant/process", formData)
|
||||
).data;
|
||||
const response = await axiosInstance.post<string>(
|
||||
`/assistants/task`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "multipart/form-data",
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
};
|
||||
|
||||
export const deleteTask = async (
|
||||
axiosInstance: AxiosInstance,
|
||||
taskId: number
|
||||
): Promise<void> => {
|
||||
await axiosInstance.delete(`/assistants/task/${taskId}`);
|
||||
};
|
||||
|
||||
export const downloadTaskResult = async (
|
||||
axiosInstance: AxiosInstance,
|
||||
taskId: number
|
||||
): Promise<string> => {
|
||||
return (await axiosInstance<string>(`/assistants/task/${taskId}/download`))
|
||||
.data;
|
||||
};
|
||||
|
@ -1,7 +1,13 @@
|
||||
import { ProcessAssistantInput } from "@/app/quality-assistant/types/assistant";
|
||||
import { useAxios } from "@/lib/hooks";
|
||||
|
||||
import { getAssistants, processAssistant } from "./assistants";
|
||||
import { ProcessAssistantRequest } from "./types";
|
||||
import {
|
||||
deleteTask,
|
||||
downloadTaskResult,
|
||||
getAssistants,
|
||||
getTasks,
|
||||
processTask,
|
||||
} from "./assistants";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
|
||||
export const useAssistants = () => {
|
||||
@ -9,7 +15,11 @@ export const useAssistants = () => {
|
||||
|
||||
return {
|
||||
getAssistants: async () => getAssistants(axiosInstance),
|
||||
processAssistant: async (input: ProcessAssistantRequest, files: File[]) =>
|
||||
processAssistant(axiosInstance, input, files),
|
||||
getTasks: async () => getTasks(axiosInstance),
|
||||
processTask: async (processAssistantInput: ProcessAssistantInput) =>
|
||||
processTask(axiosInstance, processAssistantInput),
|
||||
deleteTask: async (taskId: number) => deleteTask(axiosInstance, taskId),
|
||||
downloadTaskResult: async (taskId: number) =>
|
||||
downloadTaskResult(axiosInstance, taskId),
|
||||
};
|
||||
};
|
||||
|
@ -1,5 +1,6 @@
|
||||
import { MotionConfig } from "framer-motion";
|
||||
import { usePathname, useRouter } from "next/navigation";
|
||||
import { useFeatureFlagEnabled } from 'posthog-js/react';
|
||||
import { useState } from "react";
|
||||
|
||||
import { MenuControlButton } from "@/app/chat/[chatId]/components/ActionsBar/components/ChatInput/components/MenuControlButton/MenuControlButton";
|
||||
@ -17,11 +18,13 @@ import { HomeButton } from "./components/HomeButton/HomeButton";
|
||||
import { Notifications } from "./components/Notifications/Notifications";
|
||||
import { NotificationsButton } from "./components/NotificationsButton/NotificationsButton";
|
||||
import { ProfileButton } from "./components/ProfileButton/ProfileButton";
|
||||
import { QualityAssistantButton } from "./components/QualityAssistantButton/QualityAssistantButton";
|
||||
import { SocialsButtons } from "./components/SocialsButtons/SocialsButtons";
|
||||
import { StudioButton } from "./components/StudioButton/StudioButton";
|
||||
import { ThreadsButton } from "./components/ThreadsButton/ThreadsButton";
|
||||
import { UpgradeToPlusButton } from "./components/UpgradeToPlusButton/UpgradeToPlusButton";
|
||||
|
||||
|
||||
const showUpgradeButton = process.env.NEXT_PUBLIC_SHOW_TOKENS === "true";
|
||||
|
||||
export const Menu = (): JSX.Element => {
|
||||
@ -31,6 +34,8 @@ export const Menu = (): JSX.Element => {
|
||||
const pathname = usePathname() ?? "";
|
||||
const [isLogoHovered, setIsLogoHovered] = useState<boolean>(false);
|
||||
const { isDarkMode } = useUserSettingsContext();
|
||||
const flagEnabled = useFeatureFlagEnabled('show-quality-assistant')
|
||||
|
||||
|
||||
useChatsList();
|
||||
|
||||
@ -44,6 +49,7 @@ export const Menu = (): JSX.Element => {
|
||||
"/library",
|
||||
"/search",
|
||||
"studio",
|
||||
"/quality-assistant",
|
||||
"/user",
|
||||
];
|
||||
|
||||
@ -59,9 +65,8 @@ export const Menu = (): JSX.Element => {
|
||||
<div>
|
||||
<MotionConfig transition={{ mass: 1, damping: 10, duration: 0.1 }}>
|
||||
<div
|
||||
className={`${styles.menu_container} ${
|
||||
!isOpened ? styles.hidden : ""
|
||||
}`}
|
||||
className={`${styles.menu_container} ${!isOpened ? styles.hidden : ""
|
||||
}`}
|
||||
>
|
||||
<AnimatedDiv>
|
||||
<div className={styles.menu_wrapper}>
|
||||
@ -83,6 +88,8 @@ export const Menu = (): JSX.Element => {
|
||||
<div className={styles.block}>
|
||||
<DiscussionButton />
|
||||
<HomeButton />
|
||||
|
||||
{flagEnabled && <QualityAssistantButton />}
|
||||
<StudioButton />
|
||||
<NotificationsButton />
|
||||
<ThreadsButton />
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user