Better envs

This commit is contained in:
shaun 2023-05-20 23:32:22 -07:00
parent d39efcddab
commit eaed176f0a
15 changed files with 1704 additions and 29 deletions

View File

@ -1,4 +1,4 @@
SUPABASE_URL="XXXXX" SUPABASE_URL="XXXXX"
SUPABASE_SERVICE_KEY="eyXXXXX" SUPABASE_SERVICE_KEY="eyXXXXX"
OPENAI_API_KEY="sk-XXXXXX" OPENAI_API_KEY="sk-XXXXXX"
anthropic_api_key="XXXXXX" ANTHROPIC_API_KEY="XXXXXX"

View File

@ -1 +1,2 @@
ENV=local ENV=local
NEXT_PUBLIC_BACKEND_URL=http://localhost:5000

1
.gitignore vendored
View File

@ -3,6 +3,7 @@ secondbrain/
.streamlit/secrets.toml .streamlit/secrets.toml
**/*.pyc **/*.pyc
toto.txt toto.txt
*.ipynb

View File

@ -81,11 +81,11 @@ Additionally, you'll need a [Supabase](https://supabase.com/) account for:
- **Step 2**: Copy the `.XXXXX_env` files - **Step 2**: Copy the `.XXXXX_env` files
```bash ```bash
cp .backend_env.example .backend_env cp .backend_env.example backend/.env
cp .frontend_env.example .frontend_env cp .frontend_env.example frontend/.env
``` ```
- **Step 3**: Update the `.backend_env` file - **Step 3**: Update the `backend/.env` file
> _Your `supabase_service_key` can be found in your Supabase dashboard under Project Settings -> API. Use the `anon` `public` key found in the `Project API keys` section._ > _Your `supabase_service_key` can be found in your Supabase dashboard under Project Settings -> API. Use the `anon` `public` key found in the `Project API keys` section._
@ -95,6 +95,8 @@ cp .frontend_env.example .frontend_env
[Migration Script 2](scripts/supabase_usage_table.sql) [Migration Script 2](scripts/supabase_usage_table.sql)
[Migration Script 3](scripts/supabase_vector_store_document.sql)
- **Step 5**: Launch the app - **Step 5**: Launch the app
```bash ```bash

View File

@ -8,4 +8,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100
COPY . /code/ COPY . /code/
CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5000"] CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]

View File

@ -67,14 +67,12 @@ memory = ConversationBufferMemory(
class ChatMessage(BaseModel): class ChatMessage(BaseModel):
model: str = "gpt-3.5-turbo" model: str = "gpt-3.5-turbo"
question: str question: str
history: List[Tuple[str, str]] # A list of tuples where each tuple is (speaker, text) # A list of tuples where each tuple is (speaker, text)
history: List[Tuple[str, str]]
temperature: float = 0.0 temperature: float = 0.0
max_tokens: int = 256 max_tokens: int = 256
file_processors = { file_processors = {
".txt": process_txt, ".txt": process_txt,
".csv": process_csv, ".csv": process_csv,
@ -95,6 +93,7 @@ file_processors = {
".ipynb": process_ipnyb, ".ipynb": process_ipnyb,
} }
async def filter_file(file: UploadFile, supabase, vector_store, stats_db): async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
if await file_already_exists(supabase, file): if await file_already_exists(supabase, file):
return {"message": f"🤔 {file.filename} already exists.", "type": "warning"} return {"message": f"🤔 {file.filename} already exists.", "type": "warning"}
@ -108,17 +107,19 @@ async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
else: else:
return {"message": f"{file.filename} is not supported.", "type": "error"} return {"message": f"{file.filename} is not supported.", "type": "error"}
@app.post("/upload") @app.post("/upload")
async def upload_file(file: UploadFile): async def upload_file(file: UploadFile):
message = await filter_file(file, supabase, vector_store, stats_db=None) message = await filter_file(file, supabase, vector_store, stats_db=None)
return message return message
@app.post("/chat/") @app.post("/chat/")
async def chat_endpoint(chat_message: ChatMessage): async def chat_endpoint(chat_message: ChatMessage):
history = chat_message.history history = chat_message.history
# Logic from your Streamlit app goes here. For example: # Logic from your Streamlit app goes here. For example:
#this overwrites the built-in prompt of the ConversationalRetrievalChain # this overwrites the built-in prompt of the ConversationalRetrievalChain
ConversationalRetrievalChain.prompts = LANGUAGE_PROMPT ConversationalRetrievalChain.prompts = LANGUAGE_PROMPT
qa = None qa = None
@ -137,9 +138,10 @@ async def chat_endpoint(chat_message: ChatMessage):
return {"history": history} return {"history": history}
@app.post("/crawl/") @app.post("/crawl/")
async def crawl_endpoint(crawl_website: CrawlWebsite): async def crawl_endpoint(crawl_website: CrawlWebsite):
file_path, file_name = crawl_website.process() file_path, file_name = crawl_website.process()
# Create a SpooledTemporaryFile from the file_path # Create a SpooledTemporaryFile from the file_path
@ -152,9 +154,11 @@ async def crawl_endpoint(crawl_website: CrawlWebsite):
message = await filter_file(file, supabase, vector_store, stats_db=None) message = await filter_file(file, supabase, vector_store, stats_db=None)
return message return message
@app.get("/explore") @app.get("/explore")
async def explore_endpoint(): async def explore_endpoint():
response = supabase.table("documents").select("name:metadata->>file_name, size:metadata->>file_size", count="exact").execute() response = supabase.table("documents").select(
"name:metadata->>file_name, size:metadata->>file_size", count="exact").execute()
documents = response.data # Access the data from the response documents = response.data # Access the data from the response
# Convert each dictionary to a tuple of items, then to a set to remove duplicates, and then back to a dictionary # Convert each dictionary to a tuple of items, then to a set to remove duplicates, and then back to a dictionary
unique_data = [dict(t) for t in set(tuple(d.items()) for d in documents)] unique_data = [dict(t) for t in set(tuple(d.items()) for d in documents)]
@ -163,22 +167,23 @@ async def explore_endpoint():
return {"documents": unique_data} return {"documents": unique_data}
@app.delete("/explore/{file_name}") @app.delete("/explore/{file_name}")
async def delete_endpoint(file_name: str): async def delete_endpoint(file_name: str):
response = supabase.table("documents").delete().match({"metadata->>file_name": file_name}).execute() response = supabase.table("documents").delete().match(
{"metadata->>file_name": file_name}).execute()
return {"message": f"{file_name} has been deleted."} return {"message": f"{file_name} has been deleted."}
@app.get("/explore/{file_name}") @app.get("/explore/{file_name}")
async def download_endpoint(file_name: str): async def download_endpoint(file_name: str):
response = supabase.table("documents").select("metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url").match({"metadata->>file_name": file_name}).execute() response = supabase.table("documents").select(
"metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url").match({"metadata->>file_name": file_name}).execute()
documents = response.data documents = response.data
### Returns all documents with the same file name # Returns all documents with the same file name
return {"documents": documents} return {"documents": documents}
@app.get("/") @app.get("/")
async def root(): async def root():
return {"message": "Hello World"} return {"message": "Hello World"}

View File

@ -3,7 +3,7 @@ version: "3"
services: services:
frontend: frontend:
env_file: env_file:
- .frontend_env - ./frontend/.env
build: build:
context: frontend context: frontend
dockerfile: Dockerfile dockerfile: Dockerfile
@ -17,9 +17,7 @@ services:
- 3000:3000 - 3000:3000
backend: backend:
env_file: env_file:
- .backend_env - ./backend/.env
environment:
- supabase_url="totot"
build: build:
context: backend context: backend
dockerfile: Dockerfile dockerfile: Dockerfile
@ -28,4 +26,4 @@ services:
volumes: volumes:
- ./backend/:/code/ - ./backend/:/code/
ports: ports:
- 5000:5000 - 5050:5050

View File

@ -1 +1,2 @@
ENV=local ENV=local
BACKEND_URL="http://localhost:5050"

View File

@ -28,7 +28,7 @@ export default function ChatPage() {
const askQuestion = async () => { const askQuestion = async () => {
setHistory((hist) => [...hist, ["user", question]]); setHistory((hist) => [...hist, ["user", question]]);
setIsPending(true); setIsPending(true);
const response = await axios.post("http://localhost:5000/chat/", { const response = await axios.post(`${process.env.NEXT_PUBLIC_BACKEND_URL}/chat/`, {
model, model,
question, question,
history, history,

View File

@ -18,7 +18,8 @@ export default function ExplorePage() {
const fetchDocuments = async () => { const fetchDocuments = async () => {
try { try {
const response = await axios.get<{ documents: Document[] }>('http://localhost:5000/explore'); console.log(`Fetching documents from ${process.env.NEXT_PUBLIC_BACKEND_URL}/explore`);
const response = await axios.get<{ documents: Document[] }>(`${process.env.NEXT_PUBLIC_BACKEND_URL}/explore`);
setDocuments(response.data.documents); setDocuments(response.data.documents);
} catch (error) { } catch (error) {
console.error('Error fetching documents', error); console.error('Error fetching documents', error);

View File

@ -61,7 +61,7 @@ export default function UploadPage() {
formData.append("file", file); formData.append("file", file);
try { try {
const response = await axios.post( const response = await axios.post(
"http://localhost:5000/upload", `${process.env.NEXT_PUBLIC_BACKEND_URL}/upload`,
formData formData
); );

File diff suppressed because it is too large Load Diff

View File

@ -34,6 +34,7 @@
}, },
"devDependencies": { "devDependencies": {
"@tailwindcss/typography": "^0.5.9", "@tailwindcss/typography": "^0.5.9",
"@types/next": "^9.0.0",
"react-icons": "^4.8.0" "react-icons": "^4.8.0"
} }
} }

View File

@ -1,7 +1,7 @@
create extension vector; create extension vector;
-- Create a table to store your documents -- Create a table to store your documents
create table documents ( create table if not exists documents (
id bigserial primary key, id bigserial primary key,
content text, -- corresponds to Document.pageContent content text, -- corresponds to Document.pageContent
metadata jsonb, -- corresponds to Document.metadata metadata jsonb, -- corresponds to Document.metadata

View File

@ -0,0 +1,38 @@
-- Create a table to store your summaries
create table if not exists summaries (
id bigserial primary key,
document_id bigint references documents(id),
content text, -- corresponds to the summarized content
metadata jsonb, -- corresponds to Document.metadata
embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
);
CREATE OR REPLACE FUNCTION match_summaries(query_embedding vector(1536), match_count int, match_threshold float)
RETURNS TABLE(
id bigint,
document_id bigint,
content text,
metadata jsonb,
-- we return matched vectors to enable maximal marginal relevance searches
embedding vector(1536),
similarity float)
LANGUAGE plpgsql
AS $$
# variable_conflict use_column
BEGIN
RETURN query
SELECT
id,
document_id,
content,
metadata,
embedding,
1 -(summaries.embedding <=> query_embedding) AS similarity
FROM
summaries
WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
ORDER BY
summaries.embedding <=> query_embedding
LIMIT match_count;
END;
$$;