Better envs

2024-09-11 14:36:35 +03:00 · 2023-05-20 23:32:22 -07:00 · 2023-05-20 23:32:22 -07:00 · eaed176f0a
commit eaed176f0a
parent d39efcddab
15 changed files with 1704 additions and 29 deletions
--- a/.backend_env.example
+++ b/.backend_env.example
@ -1,4 +1,4 @@
 SUPABASE_URL="XXXXX"
 SUPABASE_SERVICE_KEY="eyXXXXX"
 OPENAI_API_KEY="sk-XXXXXX"
-anthropic_api_key="XXXXXX"
+ANTHROPIC_API_KEY="XXXXXX"
--- a/.frontend_env.example
+++ b/.frontend_env.example
@ -1 +1,2 @@
-ENV=local
+ENV=local
+NEXT_PUBLIC_BACKEND_URL=http://localhost:5000
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,7 @@ secondbrain/
 .streamlit/secrets.toml
 **/*.pyc
 toto.txt
+*.ipynb



--- a/README.md
+++ b/README.md
@ -81,11 +81,11 @@ Additionally, you'll need a [Supabase](https://supabase.com/) account for:
 - **Step 2**: Copy the `.XXXXX_env` files

 ```bash
-cp .backend_env.example .backend_env
-cp .frontend_env.example .frontend_env
+cp .backend_env.example backend/.env
+cp .frontend_env.example frontend/.env
 ```

- **Step 3**: Update the `.backend_env` file 
+- **Step 3**: Update the `backend/.env` file 

 > _Your `supabase_service_key` can be found in your Supabase dashboard under Project Settings -> API. Use the `anon` `public` key found in the `Project API keys` section._

@ -95,6 +95,8 @@ cp .frontend_env.example .frontend_env

 [Migration Script 2](scripts/supabase_usage_table.sql)

+[Migration Script 3](scripts/supabase_vector_store_document.sql)
+
 - **Step 5**: Launch the app

 ```bash
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -8,4 +8,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100

 COPY . /code/

-CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5000"]
+CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
--- a/backend/api.py
+++ b/backend/api.py
@ -67,14 +67,12 @@ memory = ConversationBufferMemory(
 class ChatMessage(BaseModel):
    model: str = "gpt-3.5-turbo"
    question: str
-    history: List[Tuple[str, str]]  # A list of tuples where each tuple is (speaker, text)
+    # A list of tuples where each tuple is (speaker, text)
+    history: List[Tuple[str, str]]
    temperature: float = 0.0
    max_tokens: int = 256


-
-
-
 file_processors = {
    ".txt": process_txt,
    ".csv": process_csv,
@ -95,6 +93,7 @@ file_processors = {
    ".ipynb": process_ipnyb,
 }

+
 async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
    if await file_already_exists(supabase, file):
        return {"message": f"🤔 {file.filename} already exists.", "type": "warning"}
@ -108,17 +107,19 @@ async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
        else:
            return {"message": f"❌ {file.filename} is not supported.", "type": "error"}

+
@app.post("/upload")
 async def upload_file(file: UploadFile):
    message = await filter_file(file, supabase, vector_store, stats_db=None)
    return message

+
@app.post("/chat/")
 async def chat_endpoint(chat_message: ChatMessage):
    history = chat_message.history
    # Logic from your Streamlit app goes here. For example:

-    #this overwrites the built-in prompt of the ConversationalRetrievalChain
+    # this overwrites the built-in prompt of the ConversationalRetrievalChain
    ConversationalRetrievalChain.prompts = LANGUAGE_PROMPT

    qa = None
@ -137,9 +138,10 @@ async def chat_endpoint(chat_message: ChatMessage):

    return {"history": history}

+
@app.post("/crawl/")
 async def crawl_endpoint(crawl_website: CrawlWebsite):
-    
+
    file_path, file_name = crawl_website.process()

    # Create a SpooledTemporaryFile from the file_path
@ -152,9 +154,11 @@ async def crawl_endpoint(crawl_website: CrawlWebsite):
    message = await filter_file(file, supabase, vector_store, stats_db=None)
    return message

+
@app.get("/explore")
 async def explore_endpoint():
-    response = supabase.table("documents").select("name:metadata->>file_name, size:metadata->>file_size", count="exact").execute()
+    response = supabase.table("documents").select(
+        "name:metadata->>file_name, size:metadata->>file_size", count="exact").execute()
    documents = response.data  # Access the data from the response
    # Convert each dictionary to a tuple of items, then to a set to remove duplicates, and then back to a dictionary
    unique_data = [dict(t) for t in set(tuple(d.items()) for d in documents)]
@ -163,22 +167,23 @@ async def explore_endpoint():

    return {"documents": unique_data}

+
@app.delete("/explore/{file_name}")
 async def delete_endpoint(file_name: str):
-    response = supabase.table("documents").delete().match({"metadata->>file_name": file_name}).execute()
+    response = supabase.table("documents").delete().match(
+        {"metadata->>file_name": file_name}).execute()
    return {"message": f"{file_name} has been deleted."}

+
@app.get("/explore/{file_name}")
 async def download_endpoint(file_name: str):
-    response = supabase.table("documents").select("metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url").match({"metadata->>file_name": file_name}).execute()
+    response = supabase.table("documents").select(
+        "metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url").match({"metadata->>file_name": file_name}).execute()
    documents = response.data
-    ### Returns all documents with the same file name
+    # Returns all documents with the same file name
    return {"documents": documents}


-
@app.get("/")
 async def root():
    return {"message": "Hello World"}
-
-
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -3,7 +3,7 @@ version: "3"
 services:
  frontend:
    env_file:
-      - .frontend_env
+      - ./frontend/.env
    build:
      context: frontend
      dockerfile: Dockerfile
@ -17,9 +17,7 @@ services:
      - 3000:3000
  backend:
    env_file:
-      - .backend_env
-    environment:
-      - supabase_url="totot"
+      - ./backend/.env
    build:
      context: backend
      dockerfile: Dockerfile
@ -28,4 +26,4 @@ services:
    volumes:
      - ./backend/:/code/
    ports:
-      - 5000:5000
+      - 5050:5050
--- a/frontend/.env.example
+++ b/frontend/.env.example
@ -1 +1,2 @@
-ENV=local
+ENV=local
+BACKEND_URL="http://localhost:5050"
--- a/frontend/app/chat/page.tsx
+++ b/frontend/app/chat/page.tsx
@ -28,7 +28,7 @@ export default function ChatPage() {
  const askQuestion = async () => {
    setHistory((hist) => [...hist, ["user", question]]);
    setIsPending(true);
-    const response = await axios.post("http://localhost:5000/chat/", {
+    const response = await axios.post(`${process.env.NEXT_PUBLIC_BACKEND_URL}/chat/`, {
      model,
      question,
      history,
--- a/frontend/app/explore/page.tsx
+++ b/frontend/app/explore/page.tsx
@ -18,7 +18,8 @@ export default function ExplorePage() {

    const fetchDocuments = async () => {
        try {
-            const response = await axios.get<{ documents: Document[] }>('http://localhost:5000/explore');
+            console.log(`Fetching documents from ${process.env.NEXT_PUBLIC_BACKEND_URL}/explore`);
+            const response = await axios.get<{ documents: Document[] }>(`${process.env.NEXT_PUBLIC_BACKEND_URL}/explore`);
            setDocuments(response.data.documents);
        } catch (error) {
            console.error('Error fetching documents', error);
--- a/frontend/app/upload/page.tsx
+++ b/frontend/app/upload/page.tsx
@ -61,7 +61,7 @@ export default function UploadPage() {
    formData.append("file", file);
    try {
      const response = await axios.post(
-        "http://localhost:5000/upload",
+        `${process.env.NEXT_PUBLIC_BACKEND_URL}/upload`,
        formData
      );

--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
--- a/frontend/package.json
+++ b/frontend/package.json
@ -34,6 +34,7 @@
  },
  "devDependencies": {
    "@tailwindcss/typography": "^0.5.9",
+    "@types/next": "^9.0.0",
    "react-icons": "^4.8.0"
  }
 }
--- a/scripts/supabase_vector_store_document.sql
+++ b/scripts/supabase_vector_store_document.sql
@ -1,7 +1,7 @@
 create extension vector;

 -- Create a table to store your documents
-create table documents (
+create table if not exists documents (
 id bigserial primary key,
 content text, -- corresponds to Document.pageContent
 metadata jsonb, -- corresponds to Document.metadata
--- a/scripts/supabase_vector_store_summary.sql
+++ b/scripts/supabase_vector_store_summary.sql
@ -0,0 +1,38 @@
+-- Create a table to store your summaries
+create table if not exists summaries (
+    id bigserial primary key,
+    document_id bigint references documents(id),
+    content text, -- corresponds to the summarized content
+    metadata jsonb, -- corresponds to Document.metadata
+    embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
+);
+
+CREATE OR REPLACE FUNCTION match_summaries(query_embedding vector(1536), match_count int, match_threshold float)
+    RETURNS TABLE(
+        id bigint,
+        document_id bigint,
+        content text,
+        metadata jsonb,
+        -- we return matched vectors to enable maximal marginal relevance searches
+        embedding vector(1536),
+        similarity float)
+    LANGUAGE plpgsql
+    AS $$
+    # variable_conflict use_column
+BEGIN
+    RETURN query
+    SELECT
+        id,
+        document_id,
+        content,
+        metadata,
+        embedding,
+        1 -(summaries.embedding <=> query_embedding) AS similarity
+    FROM
+        summaries
+    WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
+    ORDER BY
+        summaries.embedding <=> query_embedding
+    LIMIT match_count;
+END;
+$$;