mirror of
https://github.com/StanGirard/quivr.git
synced 2024-12-26 12:52:05 +03:00
Better envs
This commit is contained in:
parent
d39efcddab
commit
eaed176f0a
@ -1,4 +1,4 @@
|
|||||||
SUPABASE_URL="XXXXX"
|
SUPABASE_URL="XXXXX"
|
||||||
SUPABASE_SERVICE_KEY="eyXXXXX"
|
SUPABASE_SERVICE_KEY="eyXXXXX"
|
||||||
OPENAI_API_KEY="sk-XXXXXX"
|
OPENAI_API_KEY="sk-XXXXXX"
|
||||||
anthropic_api_key="XXXXXX"
|
ANTHROPIC_API_KEY="XXXXXX"
|
@ -1 +1,2 @@
|
|||||||
ENV=local
|
ENV=local
|
||||||
|
NEXT_PUBLIC_BACKEND_URL=http://localhost:5000
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -3,6 +3,7 @@ secondbrain/
|
|||||||
.streamlit/secrets.toml
|
.streamlit/secrets.toml
|
||||||
**/*.pyc
|
**/*.pyc
|
||||||
toto.txt
|
toto.txt
|
||||||
|
*.ipynb
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,11 +81,11 @@ Additionally, you'll need a [Supabase](https://supabase.com/) account for:
|
|||||||
- **Step 2**: Copy the `.XXXXX_env` files
|
- **Step 2**: Copy the `.XXXXX_env` files
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cp .backend_env.example .backend_env
|
cp .backend_env.example backend/.env
|
||||||
cp .frontend_env.example .frontend_env
|
cp .frontend_env.example frontend/.env
|
||||||
```
|
```
|
||||||
|
|
||||||
- **Step 3**: Update the `.backend_env` file
|
- **Step 3**: Update the `backend/.env` file
|
||||||
|
|
||||||
> _Your `supabase_service_key` can be found in your Supabase dashboard under Project Settings -> API. Use the `anon` `public` key found in the `Project API keys` section._
|
> _Your `supabase_service_key` can be found in your Supabase dashboard under Project Settings -> API. Use the `anon` `public` key found in the `Project API keys` section._
|
||||||
|
|
||||||
@ -95,6 +95,8 @@ cp .frontend_env.example .frontend_env
|
|||||||
|
|
||||||
[Migration Script 2](scripts/supabase_usage_table.sql)
|
[Migration Script 2](scripts/supabase_usage_table.sql)
|
||||||
|
|
||||||
|
[Migration Script 3](scripts/supabase_vector_store_document.sql)
|
||||||
|
|
||||||
- **Step 5**: Launch the app
|
- **Step 5**: Launch the app
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -8,4 +8,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100
|
|||||||
|
|
||||||
COPY . /code/
|
COPY . /code/
|
||||||
|
|
||||||
CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5000"]
|
CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
|
||||||
|
@ -67,14 +67,12 @@ memory = ConversationBufferMemory(
|
|||||||
class ChatMessage(BaseModel):
|
class ChatMessage(BaseModel):
|
||||||
model: str = "gpt-3.5-turbo"
|
model: str = "gpt-3.5-turbo"
|
||||||
question: str
|
question: str
|
||||||
history: List[Tuple[str, str]] # A list of tuples where each tuple is (speaker, text)
|
# A list of tuples where each tuple is (speaker, text)
|
||||||
|
history: List[Tuple[str, str]]
|
||||||
temperature: float = 0.0
|
temperature: float = 0.0
|
||||||
max_tokens: int = 256
|
max_tokens: int = 256
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
file_processors = {
|
file_processors = {
|
||||||
".txt": process_txt,
|
".txt": process_txt,
|
||||||
".csv": process_csv,
|
".csv": process_csv,
|
||||||
@ -95,6 +93,7 @@ file_processors = {
|
|||||||
".ipynb": process_ipnyb,
|
".ipynb": process_ipnyb,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
|
async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
|
||||||
if await file_already_exists(supabase, file):
|
if await file_already_exists(supabase, file):
|
||||||
return {"message": f"🤔 {file.filename} already exists.", "type": "warning"}
|
return {"message": f"🤔 {file.filename} already exists.", "type": "warning"}
|
||||||
@ -108,17 +107,19 @@ async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
|
|||||||
else:
|
else:
|
||||||
return {"message": f"❌ {file.filename} is not supported.", "type": "error"}
|
return {"message": f"❌ {file.filename} is not supported.", "type": "error"}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/upload")
|
@app.post("/upload")
|
||||||
async def upload_file(file: UploadFile):
|
async def upload_file(file: UploadFile):
|
||||||
message = await filter_file(file, supabase, vector_store, stats_db=None)
|
message = await filter_file(file, supabase, vector_store, stats_db=None)
|
||||||
return message
|
return message
|
||||||
|
|
||||||
|
|
||||||
@app.post("/chat/")
|
@app.post("/chat/")
|
||||||
async def chat_endpoint(chat_message: ChatMessage):
|
async def chat_endpoint(chat_message: ChatMessage):
|
||||||
history = chat_message.history
|
history = chat_message.history
|
||||||
# Logic from your Streamlit app goes here. For example:
|
# Logic from your Streamlit app goes here. For example:
|
||||||
|
|
||||||
#this overwrites the built-in prompt of the ConversationalRetrievalChain
|
# this overwrites the built-in prompt of the ConversationalRetrievalChain
|
||||||
ConversationalRetrievalChain.prompts = LANGUAGE_PROMPT
|
ConversationalRetrievalChain.prompts = LANGUAGE_PROMPT
|
||||||
|
|
||||||
qa = None
|
qa = None
|
||||||
@ -137,9 +138,10 @@ async def chat_endpoint(chat_message: ChatMessage):
|
|||||||
|
|
||||||
return {"history": history}
|
return {"history": history}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/crawl/")
|
@app.post("/crawl/")
|
||||||
async def crawl_endpoint(crawl_website: CrawlWebsite):
|
async def crawl_endpoint(crawl_website: CrawlWebsite):
|
||||||
|
|
||||||
file_path, file_name = crawl_website.process()
|
file_path, file_name = crawl_website.process()
|
||||||
|
|
||||||
# Create a SpooledTemporaryFile from the file_path
|
# Create a SpooledTemporaryFile from the file_path
|
||||||
@ -152,9 +154,11 @@ async def crawl_endpoint(crawl_website: CrawlWebsite):
|
|||||||
message = await filter_file(file, supabase, vector_store, stats_db=None)
|
message = await filter_file(file, supabase, vector_store, stats_db=None)
|
||||||
return message
|
return message
|
||||||
|
|
||||||
|
|
||||||
@app.get("/explore")
|
@app.get("/explore")
|
||||||
async def explore_endpoint():
|
async def explore_endpoint():
|
||||||
response = supabase.table("documents").select("name:metadata->>file_name, size:metadata->>file_size", count="exact").execute()
|
response = supabase.table("documents").select(
|
||||||
|
"name:metadata->>file_name, size:metadata->>file_size", count="exact").execute()
|
||||||
documents = response.data # Access the data from the response
|
documents = response.data # Access the data from the response
|
||||||
# Convert each dictionary to a tuple of items, then to a set to remove duplicates, and then back to a dictionary
|
# Convert each dictionary to a tuple of items, then to a set to remove duplicates, and then back to a dictionary
|
||||||
unique_data = [dict(t) for t in set(tuple(d.items()) for d in documents)]
|
unique_data = [dict(t) for t in set(tuple(d.items()) for d in documents)]
|
||||||
@ -163,22 +167,23 @@ async def explore_endpoint():
|
|||||||
|
|
||||||
return {"documents": unique_data}
|
return {"documents": unique_data}
|
||||||
|
|
||||||
|
|
||||||
@app.delete("/explore/{file_name}")
|
@app.delete("/explore/{file_name}")
|
||||||
async def delete_endpoint(file_name: str):
|
async def delete_endpoint(file_name: str):
|
||||||
response = supabase.table("documents").delete().match({"metadata->>file_name": file_name}).execute()
|
response = supabase.table("documents").delete().match(
|
||||||
|
{"metadata->>file_name": file_name}).execute()
|
||||||
return {"message": f"{file_name} has been deleted."}
|
return {"message": f"{file_name} has been deleted."}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/explore/{file_name}")
|
@app.get("/explore/{file_name}")
|
||||||
async def download_endpoint(file_name: str):
|
async def download_endpoint(file_name: str):
|
||||||
response = supabase.table("documents").select("metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url").match({"metadata->>file_name": file_name}).execute()
|
response = supabase.table("documents").select(
|
||||||
|
"metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url").match({"metadata->>file_name": file_name}).execute()
|
||||||
documents = response.data
|
documents = response.data
|
||||||
### Returns all documents with the same file name
|
# Returns all documents with the same file name
|
||||||
return {"documents": documents}
|
return {"documents": documents}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def root():
|
async def root():
|
||||||
return {"message": "Hello World"}
|
return {"message": "Hello World"}
|
||||||
|
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ version: "3"
|
|||||||
services:
|
services:
|
||||||
frontend:
|
frontend:
|
||||||
env_file:
|
env_file:
|
||||||
- .frontend_env
|
- ./frontend/.env
|
||||||
build:
|
build:
|
||||||
context: frontend
|
context: frontend
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
@ -17,9 +17,7 @@ services:
|
|||||||
- 3000:3000
|
- 3000:3000
|
||||||
backend:
|
backend:
|
||||||
env_file:
|
env_file:
|
||||||
- .backend_env
|
- ./backend/.env
|
||||||
environment:
|
|
||||||
- supabase_url="totot"
|
|
||||||
build:
|
build:
|
||||||
context: backend
|
context: backend
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
@ -28,4 +26,4 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./backend/:/code/
|
- ./backend/:/code/
|
||||||
ports:
|
ports:
|
||||||
- 5000:5000
|
- 5050:5050
|
@ -1 +1,2 @@
|
|||||||
ENV=local
|
ENV=local
|
||||||
|
BACKEND_URL="http://localhost:5050"
|
@ -28,7 +28,7 @@ export default function ChatPage() {
|
|||||||
const askQuestion = async () => {
|
const askQuestion = async () => {
|
||||||
setHistory((hist) => [...hist, ["user", question]]);
|
setHistory((hist) => [...hist, ["user", question]]);
|
||||||
setIsPending(true);
|
setIsPending(true);
|
||||||
const response = await axios.post("http://localhost:5000/chat/", {
|
const response = await axios.post(`${process.env.NEXT_PUBLIC_BACKEND_URL}/chat/`, {
|
||||||
model,
|
model,
|
||||||
question,
|
question,
|
||||||
history,
|
history,
|
||||||
|
@ -18,7 +18,8 @@ export default function ExplorePage() {
|
|||||||
|
|
||||||
const fetchDocuments = async () => {
|
const fetchDocuments = async () => {
|
||||||
try {
|
try {
|
||||||
const response = await axios.get<{ documents: Document[] }>('http://localhost:5000/explore');
|
console.log(`Fetching documents from ${process.env.NEXT_PUBLIC_BACKEND_URL}/explore`);
|
||||||
|
const response = await axios.get<{ documents: Document[] }>(`${process.env.NEXT_PUBLIC_BACKEND_URL}/explore`);
|
||||||
setDocuments(response.data.documents);
|
setDocuments(response.data.documents);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching documents', error);
|
console.error('Error fetching documents', error);
|
||||||
|
@ -61,7 +61,7 @@ export default function UploadPage() {
|
|||||||
formData.append("file", file);
|
formData.append("file", file);
|
||||||
try {
|
try {
|
||||||
const response = await axios.post(
|
const response = await axios.post(
|
||||||
"http://localhost:5000/upload",
|
`${process.env.NEXT_PUBLIC_BACKEND_URL}/upload`,
|
||||||
formData
|
formData
|
||||||
);
|
);
|
||||||
|
|
||||||
|
1627
frontend/package-lock.json
generated
1627
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -34,6 +34,7 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@tailwindcss/typography": "^0.5.9",
|
"@tailwindcss/typography": "^0.5.9",
|
||||||
|
"@types/next": "^9.0.0",
|
||||||
"react-icons": "^4.8.0"
|
"react-icons": "^4.8.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
create extension vector;
|
create extension vector;
|
||||||
|
|
||||||
-- Create a table to store your documents
|
-- Create a table to store your documents
|
||||||
create table documents (
|
create table if not exists documents (
|
||||||
id bigserial primary key,
|
id bigserial primary key,
|
||||||
content text, -- corresponds to Document.pageContent
|
content text, -- corresponds to Document.pageContent
|
||||||
metadata jsonb, -- corresponds to Document.metadata
|
metadata jsonb, -- corresponds to Document.metadata
|
||||||
|
38
scripts/supabase_vector_store_summary.sql
Normal file
38
scripts/supabase_vector_store_summary.sql
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
-- Create a table to store your summaries
|
||||||
|
create table if not exists summaries (
|
||||||
|
id bigserial primary key,
|
||||||
|
document_id bigint references documents(id),
|
||||||
|
content text, -- corresponds to the summarized content
|
||||||
|
metadata jsonb, -- corresponds to Document.metadata
|
||||||
|
embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION match_summaries(query_embedding vector(1536), match_count int, match_threshold float)
|
||||||
|
RETURNS TABLE(
|
||||||
|
id bigint,
|
||||||
|
document_id bigint,
|
||||||
|
content text,
|
||||||
|
metadata jsonb,
|
||||||
|
-- we return matched vectors to enable maximal marginal relevance searches
|
||||||
|
embedding vector(1536),
|
||||||
|
similarity float)
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $$
|
||||||
|
# variable_conflict use_column
|
||||||
|
BEGIN
|
||||||
|
RETURN query
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
document_id,
|
||||||
|
content,
|
||||||
|
metadata,
|
||||||
|
embedding,
|
||||||
|
1 -(summaries.embedding <=> query_embedding) AS similarity
|
||||||
|
FROM
|
||||||
|
summaries
|
||||||
|
WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
|
||||||
|
ORDER BY
|
||||||
|
summaries.embedding <=> query_embedding
|
||||||
|
LIMIT match_count;
|
||||||
|
END;
|
||||||
|
$$;
|
Loading…
Reference in New Issue
Block a user