Better envs

2024-12-26 12:52:05 +03:00 · 2023-05-20 23:32:22 -07:00 · 2023-05-20 23:32:22 -07:00 · eaed176f0a
commit eaed176f0a
parent d39efcddab
15 changed files with 1704 additions and 29 deletions
--- a/.backend_env.example
+++ b/.backend_env.example
@ -1,4 +1,4 @@
 SUPABASE_URL="XXXXX"
 SUPABASE_SERVICE_KEY="eyXXXXX"
 OPENAI_API_KEY="sk-XXXXXX"
-anthropic_api_key="XXXXXX"
+ANTHROPIC_API_KEY="XXXXXX"
--- a/.frontend_env.example
+++ b/.frontend_env.example
@ -1 +1,2 @@
-ENV=local
+ENV=local
 NEXT_PUBLIC_BACKEND_URL=http://localhost:5000
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,7 @@ secondbrain/
 .streamlit/secrets.toml
 **/*.pyc
 toto.txt
 *.ipynb
--- a/README.md
+++ b/README.md
@ -81,11 +81,11 @@ Additionally, you'll need a [Supabase](https://supabase.com/) account for:
 - **Step 2**: Copy the `.XXXXX_env` files
 ```bash
-cp .backend_env.example .backend_env
+cp .backend_env.example backend/.env
-cp .frontend_env.example .frontend_env
+cp .frontend_env.example frontend/.env
 ```
- **Step 3**: Update the `.backend_env` file 
+- **Step 3**: Update the `backend/.env` file 
 > _Your `supabase_service_key` can be found in your Supabase dashboard under Project Settings -> API. Use the `anon` `public` key found in the `Project API keys` section._
@ -95,6 +95,8 @@ cp .frontend_env.example .frontend_env
 [Migration Script 2](scripts/supabase_usage_table.sql)
 [Migration Script 3](scripts/supabase_vector_store_document.sql)
 - **Step 5**: Launch the app
 ```bash
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -8,4 +8,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100
 COPY . /code/
-CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5000"]
+CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
--- a/backend/api.py
+++ b/backend/api.py
@ -67,14 +67,12 @@ memory = ConversationBufferMemory(
 class ChatMessage(BaseModel):
    model: str = "gpt-3.5-turbo"
    question: str
-    history: List[Tuple[str, str]]  # A list of tuples where each tuple is (speaker, text)
+    # A list of tuples where each tuple is (speaker, text)
    history: List[Tuple[str, str]]
    temperature: float = 0.0
    max_tokens: int = 256
 file_processors = {
    ".txt": process_txt,
    ".csv": process_csv,
@ -95,6 +93,7 @@ file_processors = {
    ".ipynb": process_ipnyb,
 }
 async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
    if await file_already_exists(supabase, file):
        return {"message": f"🤔 {file.filename} already exists.", "type": "warning"}
@ -108,17 +107,19 @@ async def filter_file(file: UploadFile, supabase, vector_store, stats_db):
        else:
            return {"message": f"❌ {file.filename} is not supported.", "type": "error"}
@app.post("/upload")
 async def upload_file(file: UploadFile):
    message = await filter_file(file, supabase, vector_store, stats_db=None)
    return message
@app.post("/chat/")
 async def chat_endpoint(chat_message: ChatMessage):
    history = chat_message.history
    # Logic from your Streamlit app goes here. For example:
-    #this overwrites the built-in prompt of the ConversationalRetrievalChain
+    # this overwrites the built-in prompt of the ConversationalRetrievalChain
    ConversationalRetrievalChain.prompts = LANGUAGE_PROMPT
    qa = None
@ -137,9 +138,10 @@ async def chat_endpoint(chat_message: ChatMessage):
    return {"history": history}
@app.post("/crawl/")
 async def crawl_endpoint(crawl_website: CrawlWebsite):
-    
+
    file_path, file_name = crawl_website.process()
    # Create a SpooledTemporaryFile from the file_path
@ -152,9 +154,11 @@ async def crawl_endpoint(crawl_website: CrawlWebsite):
    message = await filter_file(file, supabase, vector_store, stats_db=None)
    return message
@app.get("/explore")
 async def explore_endpoint():
-    response = supabase.table("documents").select("name:metadata->>file_name, size:metadata->>file_size", count="exact").execute()
+    response = supabase.table("documents").select(
        "name:metadata->>file_name, size:metadata->>file_size", count="exact").execute()
    documents = response.data  # Access the data from the response
    # Convert each dictionary to a tuple of items, then to a set to remove duplicates, and then back to a dictionary
    unique_data = [dict(t) for t in set(tuple(d.items()) for d in documents)]
@ -163,22 +167,23 @@ async def explore_endpoint():
    return {"documents": unique_data}
@app.delete("/explore/{file_name}")
 async def delete_endpoint(file_name: str):
-    response = supabase.table("documents").delete().match({"metadata->>file_name": file_name}).execute()
+    response = supabase.table("documents").delete().match(
        {"metadata->>file_name": file_name}).execute()
    return {"message": f"{file_name} has been deleted."}
@app.get("/explore/{file_name}")
 async def download_endpoint(file_name: str):
-    response = supabase.table("documents").select("metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url").match({"metadata->>file_name": file_name}).execute()
+    response = supabase.table("documents").select(
        "metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url").match({"metadata->>file_name": file_name}).execute()
    documents = response.data
-    ### Returns all documents with the same file name
+    # Returns all documents with the same file name
    return {"documents": documents}
@app.get("/")
 async def root():
    return {"message": "Hello World"}
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -3,7 +3,7 @@ version: "3"
 services:
  frontend:
    env_file:
-      - .frontend_env
+      - ./frontend/.env
    build:
      context: frontend
      dockerfile: Dockerfile
@ -17,9 +17,7 @@ services:
      - 3000:3000
  backend:
    env_file:
-      - .backend_env
+      - ./backend/.env
    environment:
      - supabase_url="totot"
    build:
      context: backend
      dockerfile: Dockerfile
@ -28,4 +26,4 @@ services:
    volumes:
      - ./backend/:/code/
    ports:
-      - 5000:5000
+      - 5050:5050
--- a/frontend/.env.example
+++ b/frontend/.env.example
@ -1 +1,2 @@
-ENV=local
+ENV=local
 BACKEND_URL="http://localhost:5050"
--- a/frontend/app/chat/page.tsx
+++ b/frontend/app/chat/page.tsx
@ -28,7 +28,7 @@ export default function ChatPage() {
  const askQuestion = async () => {
    setHistory((hist) => [...hist, ["user", question]]);
    setIsPending(true);
-    const response = await axios.post("http://localhost:5000/chat/", {
+    const response = await axios.post(`${process.env.NEXT_PUBLIC_BACKEND_URL}/chat/`, {
      model,
      question,
      history,
--- a/frontend/app/explore/page.tsx
+++ b/frontend/app/explore/page.tsx
@ -18,7 +18,8 @@ export default function ExplorePage() {
    const fetchDocuments = async () => {
        try {
-            const response = await axios.get<{ documents: Document[] }>('http://localhost:5000/explore');
+            console.log(`Fetching documents from ${process.env.NEXT_PUBLIC_BACKEND_URL}/explore`);
            const response = await axios.get<{ documents: Document[] }>(`${process.env.NEXT_PUBLIC_BACKEND_URL}/explore`);
            setDocuments(response.data.documents);
        } catch (error) {
            console.error('Error fetching documents', error);
--- a/frontend/app/upload/page.tsx
+++ b/frontend/app/upload/page.tsx
@ -61,7 +61,7 @@ export default function UploadPage() {
    formData.append("file", file);
    try {
      const response = await axios.post(
-        "http://localhost:5000/upload",
+        `${process.env.NEXT_PUBLIC_BACKEND_URL}/upload`,
        formData
      );
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
--- a/frontend/package.json
+++ b/frontend/package.json
@ -34,6 +34,7 @@
  },
  "devDependencies": {
    "@tailwindcss/typography": "^0.5.9",
    "@types/next": "^9.0.0",
    "react-icons": "^4.8.0"
  }
 }
--- a/scripts/supabase_vector_store_document.sql
+++ b/scripts/supabase_vector_store_document.sql
@ -1,7 +1,7 @@
 create extension vector;
 -- Create a table to store your documents
-create table documents (
+create table if not exists documents (
 id bigserial primary key,
 content text, -- corresponds to Document.pageContent
 metadata jsonb, -- corresponds to Document.metadata
--- a/scripts/supabase_vector_store_summary.sql
+++ b/scripts/supabase_vector_store_summary.sql
@ -0,0 +1,38 @@
 -- Create a table to store your summaries
 create table if not exists summaries (
    id bigserial primary key,
    document_id bigint references documents(id),
    content text, -- corresponds to the summarized content
    metadata jsonb, -- corresponds to Document.metadata
    embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
 );
 CREATE OR REPLACE FUNCTION match_summaries(query_embedding vector(1536), match_count int, match_threshold float)
    RETURNS TABLE(
        id bigint,
        document_id bigint,
        content text,
        metadata jsonb,
        -- we return matched vectors to enable maximal marginal relevance searches
        embedding vector(1536),
        similarity float)
    LANGUAGE plpgsql
    AS $$
    # variable_conflict use_column
 BEGIN
    RETURN query
    SELECT
        id,
        document_id,
        content,
        metadata,
        embedding,
        1 -(summaries.embedding <=> query_embedding) AS similarity
    FROM
        summaries
    WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
    ORDER BY
        summaries.embedding <=> query_embedding
    LIMIT match_count;
 END;
 $$;
`@ -1 +1,2 @@`
	`ENV=local`	`ENV=local`
		`NEXT_PUBLIC_BACKEND_URL=http://localhost:5000`
`@ -8,4 +8,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100`

	`COPY . /code/`	`COPY . /code/`

	`CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5000"]`	`CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]`