mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-14 17:03:29 +03:00
feat: 🎸 ocr (#2187)
added ocr # Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate):
This commit is contained in:
parent
d4b40b3b42
commit
2ba3bc1f07
@ -4,8 +4,8 @@
|
||||
{
|
||||
"name": "quivr-chat",
|
||||
"image": "253053805092.dkr.ecr.eu-west-3.amazonaws.com/quivr:600ff1ede02741c66853cc3e4e7f5001aaba3bc2",
|
||||
"cpu": "1024",
|
||||
"memory": "2048",
|
||||
"cpu": "4096",
|
||||
"memory": "8192",
|
||||
"essential": true,
|
||||
"command": ["celery", "-A", "celery_worker", "worker", "-l", "info"],
|
||||
"environment": [],
|
||||
@ -70,8 +70,8 @@
|
||||
"placementConstraints": [],
|
||||
"compatibilities": ["EC2", "FARGATE"],
|
||||
"requiresCompatibilities": ["FARGATE"],
|
||||
"cpu": "1024",
|
||||
"memory": "2048",
|
||||
"cpu": "4096",
|
||||
"memory": "8192",
|
||||
"runtimePlatform": {
|
||||
"cpuArchitecture": "X86_64",
|
||||
"operatingSystemFamily": "LINUX"
|
||||
|
@ -4,9 +4,9 @@
|
||||
{
|
||||
"name": "quivr-chat",
|
||||
"image": "253053805092.dkr.ecr.eu-west-3.amazonaws.com/quivr:35bd4727c67790d295a474dd81dfbef8469365e8",
|
||||
"cpu": 2048,
|
||||
"memory": 4096,
|
||||
"memoryReservation": 4096,
|
||||
"cpu": "4096",
|
||||
"memory": "8192",
|
||||
"memoryReservation": 8192,
|
||||
"portMappings": [],
|
||||
"essential": true,
|
||||
"command": ["celery", "-A", "celery_worker", "worker", "-l", "info"],
|
||||
@ -76,8 +76,8 @@
|
||||
"placementConstraints": [],
|
||||
"compatibilities": ["EC2", "FARGATE"],
|
||||
"requiresCompatibilities": ["FARGATE"],
|
||||
"cpu": "2048",
|
||||
"memory": "4096",
|
||||
"cpu": "4096",
|
||||
"memory": "8192",
|
||||
"runtimePlatform": {
|
||||
"cpuArchitecture": "X86_64",
|
||||
"operatingSystemFamily": "LINUX"
|
||||
|
2
Pipfile
2
Pipfile
@ -13,7 +13,6 @@ nest-asyncio = "==1.5.6"
|
||||
pypdf = "==3.9.0"
|
||||
supabase = "==1.1.0"
|
||||
tiktoken = "==0.4.0"
|
||||
unstructured = "==0.6.7"
|
||||
fastapi = "==0.95.2"
|
||||
python-multipart = "==0.0.6"
|
||||
uvicorn = "==0.22.0"
|
||||
@ -46,6 +45,7 @@ pytest = "*"
|
||||
ddtrace = "*"
|
||||
watchdog = "*"
|
||||
langchain-community = "*"
|
||||
unstructured = {extras = ["all-docs"], version = "*"}
|
||||
|
||||
[dev-packages]
|
||||
black = "*"
|
||||
|
1957
Pipfile.lock
generated
1957
Pipfile.lock
generated
File diff suppressed because it is too large
Load Diff
@ -20,7 +20,13 @@ RUN apt-get clean && apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
libtool \
|
||||
python-dev \
|
||||
build-essential && \
|
||||
build-essential \
|
||||
# Additional dependencies for document handling
|
||||
libmagic-dev \
|
||||
poppler-utils \
|
||||
tesseract-ocr \
|
||||
libreoffice \
|
||||
pandoc && \
|
||||
rm -rf /var/lib/apt/lists/* && apt-get clean
|
||||
|
||||
# Add Rust binaries to the PATH
|
||||
|
@ -17,7 +17,13 @@ RUN apt-get clean && apt-get update && apt-get install -y \
|
||||
automake \
|
||||
libtool \
|
||||
python-dev \
|
||||
build-essential && \
|
||||
build-essential \
|
||||
# Additional dependencies for document handling
|
||||
libmagic-dev \
|
||||
poppler-utils \
|
||||
tesseract-ocr \
|
||||
libreoffice \
|
||||
pandoc && \
|
||||
rm -rf /var/lib/apt/lists/* && apt-get clean
|
||||
|
||||
# Add Rust binaries to the PATH
|
||||
|
@ -2,8 +2,8 @@
|
||||
aiohttp==3.9.3; python_version >= '3.8'
|
||||
aiosignal==1.3.1; python_version >= '3.7'
|
||||
amqp==5.2.0; python_version >= '3.6'
|
||||
antlr4-python3-runtime==4.9.3
|
||||
anyio==3.7.1; python_version >= '3.7'
|
||||
argilla==1.23.0; python_version < '3.12' and python_version >= '3.8'
|
||||
async-generator==1.10; python_version >= '3.5'
|
||||
async-timeout==4.0.3; python_full_version <= '3.11.2'
|
||||
asyncpg==0.27.0; python_full_version >= '3.7.0'
|
||||
@ -11,7 +11,7 @@ attrs==23.2.0; python_version >= '3.7'
|
||||
backoff==2.2.1; python_version >= '3.7' and python_version < '4.0'
|
||||
beautifulsoup4==4.12.3; python_full_version >= '3.6.0'
|
||||
billiard==4.2.0; python_version >= '3.7'
|
||||
black==24.1.1; python_version >= '3.8'
|
||||
black==24.2.0; python_version >= '3.8'
|
||||
boto3==1.33.7; python_version >= '3.7'
|
||||
botocore==1.33.7; python_version >= '3.7'
|
||||
bytecode==0.15.1; python_version >= '3.8'
|
||||
@ -19,30 +19,40 @@ cattrs==23.2.3; python_version >= '3.8'
|
||||
celery[sqs]==5.3.6; python_version >= '3.8'
|
||||
certifi==2024.2.2; python_version >= '3.6'
|
||||
cffi==1.16.0; platform_python_implementation != 'PyPy'
|
||||
chardet==5.2.0; python_version >= '3.7'
|
||||
charset-normalizer==3.3.2; python_full_version >= '3.7.0'
|
||||
click==8.1.7; python_version >= '3.7'
|
||||
click-didyoumean==0.3.0; python_full_version >= '3.6.2' and python_full_version < '4.0.0'
|
||||
click-plugins==1.1.1
|
||||
click-repl==0.3.0; python_version >= '3.6'
|
||||
coloredlogs==15.0.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
contourpy==1.2.0; python_version >= '3.9'
|
||||
cryptography==42.0.2; python_version >= '3.7'
|
||||
cssselect==1.2.0; python_version >= '3.7'
|
||||
cycler==0.12.1; python_version >= '3.8'
|
||||
dataclasses-json==0.6.4; python_version >= '3.7' and python_version < '4.0'
|
||||
dataclasses-json-speakeasy==0.5.11; python_version >= '3.7' and python_version < '4.0'
|
||||
ddsketch==2.0.4; python_version >= '2.7'
|
||||
ddtrace==2.5.2; python_version >= '3.7'
|
||||
ddtrace==2.6.0; python_version >= '3.7'
|
||||
deprecated==1.2.14; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
deprecation==2.1.0
|
||||
distro==1.9.0; python_version >= '3.6'
|
||||
docx2txt==0.8
|
||||
ecdsa==0.18.0; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
effdet==0.4.1
|
||||
emoji==2.10.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
envier==0.5.1; python_version >= '3.7'
|
||||
et-xmlfile==1.1.0; python_version >= '3.6'
|
||||
fastapi==0.95.2; python_version >= '3.7'
|
||||
feedfinder2==0.0.4
|
||||
feedparser==6.0.11; python_version >= '3.6'
|
||||
filelock==3.13.1; python_version >= '3.8'
|
||||
filetype==1.2.0
|
||||
flake8==6.0.0; python_full_version >= '3.8.1'
|
||||
flake8-black==0.3.6; python_version >= '3.7'
|
||||
flatbuffers==23.5.26
|
||||
flower==2.0.1; python_version >= '3.7'
|
||||
fonttools==4.48.1; python_version >= '3.8'
|
||||
frozenlist==1.4.1; python_version >= '3.8'
|
||||
fsspec==2024.2.0; python_version >= '3.8'
|
||||
gitdb==4.0.11; python_version >= '3.7'
|
||||
@ -53,65 +63,82 @@ html5lib==1.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2,
|
||||
httpcore==0.17.3; python_version >= '3.7'
|
||||
httpx==0.24.1; python_version >= '3.7'
|
||||
huggingface-hub==0.20.3; python_full_version >= '3.8.0'
|
||||
humanfriendly==10.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
humanize==4.9.0; python_version >= '3.8'
|
||||
idna==3.6; python_version >= '3.5'
|
||||
importlib-metadata==6.11.0; python_version >= '3.8'
|
||||
iniconfig==2.0.0; python_version >= '3.7'
|
||||
iopath==0.1.10; python_version >= '3.6'
|
||||
jieba3k==0.35.1
|
||||
jinja2==3.1.3; python_version >= '3.7'
|
||||
jmespath==1.0.1; python_version >= '3.7'
|
||||
joblib==1.3.2; python_version >= '3.7'
|
||||
jq==1.6.0; python_version >= '3.5'
|
||||
jsonpatch==1.33; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
|
||||
jsonpath-python==1.0.6; python_version >= '3.6'
|
||||
jsonpointer==2.4; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
|
||||
kiwisolver==1.4.5; python_version >= '3.7'
|
||||
kombu[sqs]==5.3.5; python_version >= '3.8'
|
||||
langchain==0.1.5; python_version < '4.0' and python_full_version >= '3.8.1'
|
||||
langchain-community==0.0.17; python_version < '4.0' and python_full_version >= '3.8.1'
|
||||
langchain-core==0.1.18; python_version < '4.0' and python_full_version >= '3.8.1'
|
||||
langsmith==0.0.86; python_version < '4.0' and python_full_version >= '3.8.1'
|
||||
litellm==1.22.5; python_version not in '2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7' and python_version >= '3.8'
|
||||
langchain==0.1.6; python_version < '4.0' and python_full_version >= '3.8.1'
|
||||
langchain-community==0.0.19; python_version < '4.0' and python_full_version >= '3.8.1'
|
||||
langchain-core==0.1.22; python_version < '4.0' and python_full_version >= '3.8.1'
|
||||
langdetect==1.0.9
|
||||
langsmith==0.0.87; python_version < '4.0' and python_full_version >= '3.8.1'
|
||||
layoutparser[layoutmodels,tesseract]==0.3.4; python_version >= '3.6'
|
||||
litellm==1.23.10; python_version not in '2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7' and python_version >= '3.8'
|
||||
lxml==5.1.0; python_version >= '3.6'
|
||||
markdown==3.5.2; python_version >= '3.8'
|
||||
markdown-it-py==3.0.0; python_version >= '3.8'
|
||||
markupsafe==2.1.5; python_version >= '3.7'
|
||||
marshmallow==3.20.2; python_version >= '3.8'
|
||||
matplotlib==3.8.2; python_version >= '3.9'
|
||||
mccabe==0.7.0; python_version >= '3.6'
|
||||
mdurl==0.1.2; python_version >= '3.7'
|
||||
monotonic==1.6
|
||||
mpmath==1.3.0
|
||||
msg-parser==1.2.0; python_version >= '3.4'
|
||||
multidict==6.0.5; python_version >= '3.7'
|
||||
mypy-extensions==1.0.0; python_version >= '3.5'
|
||||
nest-asyncio==1.5.6; python_version >= '3.5'
|
||||
networkx==3.2.1
|
||||
newspaper3k==0.2.8
|
||||
nltk==3.8.1; python_version >= '3.7'
|
||||
nodeenv==1.8.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
|
||||
numpy==1.23.5; python_version >= '3.8'
|
||||
numpy==1.26.4; python_version >= '3.9'
|
||||
olefile==0.47; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
omegaconf==2.3.0; python_version >= '3.6'
|
||||
onnx==1.15.0
|
||||
onnxruntime==1.15.1
|
||||
openai==1.1.1; python_full_version >= '3.7.1'
|
||||
opencv-python==4.9.0.80; python_version >= '3.6'
|
||||
openpyxl==3.1.2; python_version >= '3.6'
|
||||
opentelemetry-api==1.22.0; python_version >= '3.7'
|
||||
packaging==23.2; python_version >= '3.7'
|
||||
pandas==1.5.3; python_version >= '3.8'
|
||||
pandas==2.2.0; python_version >= '3.8'
|
||||
pathspec==0.12.1; python_version >= '3.8'
|
||||
pdf2image==1.16.3
|
||||
pdfminer.six==20231228; python_version >= '3.6'
|
||||
pdfminer.six==20221105; python_version >= '3.6'
|
||||
pdfplumber==0.10.4; python_version >= '3.8'
|
||||
pikepdf==8.12.0
|
||||
pillow==10.2.0; python_version >= '3.8'
|
||||
pillow-heif==0.15.0
|
||||
platformdirs==4.2.0; python_version >= '3.8'
|
||||
pluggy==1.4.0; python_version >= '3.8'
|
||||
portalocker==2.8.2; python_version >= '3.8'
|
||||
postgrest==0.11.0; python_version >= '3.8' and python_version < '4.0'
|
||||
posthog==3.1.0
|
||||
prometheus-client==0.19.0; python_version >= '3.8'
|
||||
prompt-toolkit==3.0.43; python_full_version >= '3.7.0'
|
||||
protobuf==4.25.2; python_version >= '3.8'
|
||||
pyasn1==0.5.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
|
||||
pycocotools==2.0.7; python_version >= '3.5'
|
||||
pycodestyle==2.10.0; python_version >= '3.6'
|
||||
pycparser==2.21
|
||||
pycurl==7.45.2
|
||||
pydantic==1.10.14; python_version >= '3.7'
|
||||
pyflakes==3.0.1; python_version >= '3.6'
|
||||
pygments==2.17.2; python_version >= '3.7'
|
||||
pypandoc==1.11; python_version >= '3.6'
|
||||
pyparsing==3.1.1; python_full_version >= '3.6.8'
|
||||
pypdf==3.9.0; python_version >= '3.6'
|
||||
pypdfium2==4.27.0; python_version >= '3.6'
|
||||
pyright==1.1.316; python_version >= '3.7'
|
||||
pytesseract==0.3.10; python_version >= '3.7'
|
||||
pytest==8.0.0; python_version >= '3.8'
|
||||
@ -120,54 +147,65 @@ pytest-mock==3.12.0; python_version >= '3.8'
|
||||
python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
python-docx==1.1.0; python_version >= '3.7'
|
||||
python-dotenv==1.0.1; python_version >= '3.8'
|
||||
python-iso639==2024.2.7; python_version >= '3.8'
|
||||
python-jose==3.3.0
|
||||
python-magic==0.4.27; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
python-multipart==0.0.6; python_version >= '3.7'
|
||||
python-pptx==0.6.23
|
||||
pytz==2024.1
|
||||
pyyaml==6.0.1; python_version >= '3.6'
|
||||
rapidfuzz==3.6.1; python_version >= '3.8'
|
||||
realtime==1.0.2; python_version >= '3.8' and python_version < '4.0'
|
||||
redis==4.5.4; python_version >= '3.7'
|
||||
regex==2023.12.25; python_version >= '3.7'
|
||||
requests==2.31.0; python_version >= '3.7'
|
||||
requests-file==2.0.0
|
||||
resend==0.5.1; python_version >= '3.7'
|
||||
rich==13.7.0; python_full_version >= '3.7.0'
|
||||
rsa==4.9; python_version >= '3.6' and python_version < '4'
|
||||
s3transfer==0.8.2; python_version >= '3.7'
|
||||
sentry-sdk[fastapi]==1.40.0
|
||||
setuptools==69.0.3; python_version >= '3.8'
|
||||
safetensors==0.4.2; python_version >= '3.7'
|
||||
scipy==1.12.0; python_version >= '3.9'
|
||||
sentry-sdk[fastapi]==1.40.3
|
||||
setuptools==69.1.0; python_version >= '3.8'
|
||||
sgmllib3k==1.0.0
|
||||
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
smmap==5.0.1; python_version >= '3.7'
|
||||
sniffio==1.3.0; python_version >= '3.7'
|
||||
soupsieve==2.5; python_version >= '3.8'
|
||||
sqlalchemy==2.0.25; python_version >= '3.7'
|
||||
sqlalchemy==2.0.26; python_version >= '3.7'
|
||||
starlette==0.27.0; python_version >= '3.7'
|
||||
storage3==0.6.1; python_version >= '3.8' and python_version < '4.0'
|
||||
strenum==0.4.15
|
||||
supabase==1.1.0; python_version >= '3.8' and python_version < '4.0'
|
||||
supafunc==0.2.3; python_version >= '3.8' and python_version < '4.0'
|
||||
sympy==1.12; python_version >= '3.8'
|
||||
tabulate==0.9.0; python_version >= '3.7'
|
||||
tenacity==8.2.3; python_version >= '3.7'
|
||||
tiktoken==0.4.0; python_version >= '3.8'
|
||||
timm==0.9.12; python_version >= '3.7'
|
||||
tinysegmenter==0.3
|
||||
tldextract==5.1.1; python_version >= '3.8'
|
||||
tokenizers==0.15.1; python_version >= '3.7'
|
||||
tokenizers==0.15.2; python_version >= '3.7'
|
||||
torch==2.2.0
|
||||
torchvision==0.17.0
|
||||
tornado==6.4; python_version >= '3.8'
|
||||
tqdm==4.66.1; python_version >= '3.7'
|
||||
typer==0.9.0; python_version >= '3.6'
|
||||
tqdm==4.66.2; python_version >= '3.7'
|
||||
transformers==4.37.2; python_full_version >= '3.8.0'
|
||||
typing-extensions==4.9.0; python_version >= '3.8'
|
||||
typing-inspect==0.9.0
|
||||
tzdata==2023.4; python_version >= '2'
|
||||
unstructured==0.6.7; python_full_version >= '3.7.0'
|
||||
tzdata==2024.1; python_version >= '2'
|
||||
unstructured[all-docs]==0.12.4; python_version < '3.12' and python_full_version >= '3.9.0'
|
||||
unstructured-client==0.18.0; python_version >= '3.8'
|
||||
unstructured-inference==0.7.23
|
||||
unstructured.pytesseract==0.3.12
|
||||
urllib3==2.0.7; python_version >= '3.10'
|
||||
uvicorn==0.22.0; python_version >= '3.7'
|
||||
vine==5.1.0; python_version >= '3.6'
|
||||
watchdog==3.0.0; python_version >= '3.7'
|
||||
watchdog==4.0.0; python_version >= '3.8'
|
||||
wcwidth==0.2.13
|
||||
webencodings==0.5.1
|
||||
websockets==11.0.3; python_version >= '3.7'
|
||||
wrapt==1.14.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
wrapt==1.16.0; python_version >= '3.6'
|
||||
xlrd==1.0.0
|
||||
xlsxwriter==3.1.9; python_version >= '3.6'
|
||||
xmltodict==0.13.0; python_version >= '3.4'
|
||||
|
@ -71,7 +71,7 @@ site_url = "http://localhost:3000"
|
||||
# A list of *exact* URLs that auth providers are permitted to redirect to post authentication.
|
||||
additional_redirect_urls = ["https://localhost:3000"]
|
||||
# How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 (1 week).
|
||||
jwt_expiry = 3600
|
||||
jwt_expiry = 604800
|
||||
# If disabled, the refresh token will never expire.
|
||||
enable_refresh_token_rotation = true
|
||||
# Allows refresh tokens to be reused after expiry, up to the specified interval in seconds.
|
||||
|
Loading…
Reference in New Issue
Block a user