Better envs

This commit is contained in:
shaun 2023-05-20 23:32:22 -07:00
parent d39efcddab
commit eaed176f0a
15 changed files with 1704 additions and 29 deletions

View File

@ -1,4 +1,4 @@
SUPABASE_URL="XXXXX"
SUPABASE_SERVICE_KEY="eyXXXXX"
OPENAI_API_KEY="sk-XXXXXX"
anthropic_api_key="XXXXXX"
ANTHROPIC_API_KEY="XXXXXX"

View File

@ -1 +1,2 @@
ENV=local
ENV=local
NEXT_PUBLIC_BACKEND_URL=http://localhost:5000

1
.gitignore vendored
View File

@ -3,6 +3,7 @@ secondbrain/
.streamlit/secrets.toml
**/*.pyc
toto.txt
*.ipynb

View File

@ -81,11 +81,11 @@ Additionally, you'll need a [Supabase](https://supabase.com/) account for:
- **Step 2**: Copy the `.XXXXX_env` files
```bash
cp .backend_env.example .backend_env
cp .frontend_env.example .frontend_env
cp .backend_env.example backend/.env
cp .frontend_env.example frontend/.env
```
- **Step 3**: Update the `.backend_env` file
- **Step 3**: Update the `backend/.env` file
> _Your `supabase_service_key` can be found in your Supabase dashboard under Project Settings -> API. Use the `anon` `public` key found in the `Project API keys` section._
@ -95,6 +95,8 @@ cp .frontend_env.example .frontend_env
[Migration Script 2](scripts/supabase_usage_table.sql)
[Migration Script 3](scripts/supabase_vector_store_document.sql)
- **Step 5**: Launch the app
```bash

View File

@ -8,4 +8,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100
COPY . /code/
CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5000"]
CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]

View File

@ -67,14 +67,12 @@ memory = ConversationBufferMemory(
class ChatMessage(BaseModel):
model: str = "gpt-3.5-turbo"
question: str
history: List[Tuple[str, str]] # A list of tuples where each tuple is (speaker, text)
# A list of tuples where each tuple is (speaker, text)
history: List[Tuple[str, str]]
temperature: float = 0.0
max_tokens: int = 256
file_processors = {
".txt": process_txt,
".csv": process_csv,
@ -95,6 +93,7 @@ file_processors = {
".ipynb": process_ipnyb,
}
async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
if await file_already_exists(supabase, file):
return {"message": f"🤔 {file.filename} already exists.", "type": "warning"}
@ -108,17 +107,19 @@ async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
else:
return {"message": f"{file.filename} is not supported.", "type": "error"}
@app.post("/upload")
async def upload_file(file: UploadFile):
message = await filter_file(file, supabase, vector_store, stats_db=None)
return message
@app.post("/chat/")
async def chat_endpoint(chat_message: ChatMessage):
history = chat_message.history
# Logic from your Streamlit app goes here. For example:
#this overwrites the built-in prompt of the ConversationalRetrievalChain
# this overwrites the built-in prompt of the ConversationalRetrievalChain
ConversationalRetrievalChain.prompts = LANGUAGE_PROMPT
qa = None
@ -137,9 +138,10 @@ async def chat_endpoint(chat_message: ChatMessage):
return {"history": history}
@app.post("/crawl/")
async def crawl_endpoint(crawl_website: CrawlWebsite):
file_path, file_name = crawl_website.process()
# Create a SpooledTemporaryFile from the file_path
@ -152,9 +154,11 @@ async def crawl_endpoint(crawl_website: CrawlWebsite):
message = await filter_file(file, supabase, vector_store, stats_db=None)
return message
@app.get("/explore")
async def explore_endpoint():
response = supabase.table("documents").select("name:metadata->>file_name, size:metadata->>file_size", count="exact").execute()
response = supabase.table("documents").select(
"name:metadata->>file_name, size:metadata->>file_size", count="exact").execute()
documents = response.data # Access the data from the response
# Convert each dictionary to a tuple of items, then to a set to remove duplicates, and then back to a dictionary
unique_data = [dict(t) for t in set(tuple(d.items()) for d in documents)]
@ -163,22 +167,23 @@ async def explore_endpoint():
return {"documents": unique_data}
@app.delete("/explore/{file_name}")
async def delete_endpoint(file_name: str):
response = supabase.table("documents").delete().match({"metadata->>file_name": file_name}).execute()
response = supabase.table("documents").delete().match(
{"metadata->>file_name": file_name}).execute()
return {"message": f"{file_name} has been deleted."}
@app.get("/explore/{file_name}")
async def download_endpoint(file_name: str):
response = supabase.table("documents").select("metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url").match({"metadata->>file_name": file_name}).execute()
response = supabase.table("documents").select(
"metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url").match({"metadata->>file_name": file_name}).execute()
documents = response.data
### Returns all documents with the same file name
# Returns all documents with the same file name
return {"documents": documents}
@app.get("/")
async def root():
return {"message": "Hello World"}

View File

@ -3,7 +3,7 @@ version: "3"
services:
frontend:
env_file:
- .frontend_env
- ./frontend/.env
build:
context: frontend
dockerfile: Dockerfile
@ -17,9 +17,7 @@ services:
- 3000:3000
backend:
env_file:
- .backend_env
environment:
- supabase_url="totot"
- ./backend/.env
build:
context: backend
dockerfile: Dockerfile
@ -28,4 +26,4 @@ services:
volumes:
- ./backend/:/code/
ports:
- 5000:5000
- 5050:5050

View File

@ -1 +1,2 @@
ENV=local
ENV=local
BACKEND_URL="http://localhost:5050"

View File

@ -28,7 +28,7 @@ export default function ChatPage() {
const askQuestion = async () => {
setHistory((hist) => [...hist, ["user", question]]);
setIsPending(true);
const response = await axios.post("http://localhost:5000/chat/", {
const response = await axios.post(`${process.env.NEXT_PUBLIC_BACKEND_URL}/chat/`, {
model,
question,
history,

View File

@ -18,7 +18,8 @@ export default function ExplorePage() {
const fetchDocuments = async () => {
try {
const response = await axios.get<{ documents: Document[] }>('http://localhost:5000/explore');
console.log(`Fetching documents from ${process.env.NEXT_PUBLIC_BACKEND_URL}/explore`);
const response = await axios.get<{ documents: Document[] }>(`${process.env.NEXT_PUBLIC_BACKEND_URL}/explore`);
setDocuments(response.data.documents);
} catch (error) {
console.error('Error fetching documents', error);

View File

@ -61,7 +61,7 @@ export default function UploadPage() {
formData.append("file", file);
try {
const response = await axios.post(
"http://localhost:5000/upload",
`${process.env.NEXT_PUBLIC_BACKEND_URL}/upload`,
formData
);

File diff suppressed because it is too large Load Diff

View File

@ -34,6 +34,7 @@
},
"devDependencies": {
"@tailwindcss/typography": "^0.5.9",
"@types/next": "^9.0.0",
"react-icons": "^4.8.0"
}
}

View File

@ -1,7 +1,7 @@
create extension vector;
-- Create a table to store your documents
create table documents (
create table if not exists documents (
id bigserial primary key,
content text, -- corresponds to Document.pageContent
metadata jsonb, -- corresponds to Document.metadata

View File

@ -0,0 +1,38 @@
-- Create a table to store your summaries
create table if not exists summaries (
id bigserial primary key,
document_id bigint references documents(id),
content text, -- corresponds to the summarized content
metadata jsonb, -- corresponds to Document.metadata
embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
);
CREATE OR REPLACE FUNCTION match_summaries(query_embedding vector(1536), match_count int, match_threshold float)
RETURNS TABLE(
id bigint,
document_id bigint,
content text,
metadata jsonb,
-- we return matched vectors to enable maximal marginal relevance searches
embedding vector(1536),
similarity float)
LANGUAGE plpgsql
AS $$
# variable_conflict use_column
BEGIN
RETURN query
SELECT
id,
document_id,
content,
metadata,
embedding,
1 -(summaries.embedding <=> query_embedding) AS similarity
FROM
summaries
WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
ORDER BY
summaries.embedding <=> query_embedding
LIMIT match_count;
END;
$$;