From 92ac5e8dfc5920942570179416d59c6ed49585e4 Mon Sep 17 00:00:00 2001 From: shaun Date: Sun, 14 May 2023 01:22:49 -0700 Subject: [PATCH] Support for Anthropics Models This update enhances the "Second Brain" application by adding support for Anthropics AI models. Users can now use not only OpenAI's GPT-3/4, but also Anthropics' Claude models to store and query their knowledge. Key changes include: Added an anthropic_api_key field in the secrets configuration file. Introduced a selection for different AI models including GPT-3, GPT-4, and various versions of Claude. Updated question handling to be model-agnostic, and added support for Anthropics' Claude models in the question processing workflow. Modified the streamlit interface to allow users to input their choice of model, control the "temperature" of the model's responses, and set the max tokens limit. Upgraded requirements.txt file with the latest version of the Anthropics library. This update empowers users to leverage different AI models based on their needs, providing a more flexible and robust tool for knowledge management. --- .streamlit/secrets.toml.example | 3 +- .vscode/settings.json | 6 ++ diff.txt | 163 ++++++++++++++++++++++++++++++++ main.py | 50 ++++++---- question.py | 33 +++++-- requirements.txt | 2 +- 6 files changed, 231 insertions(+), 26 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 diff.txt diff --git a/.streamlit/secrets.toml.example b/.streamlit/secrets.toml.example index 093c5bf9b..4c405dc3b 100644 --- a/.streamlit/secrets.toml.example +++ b/.streamlit/secrets.toml.example @@ -1,3 +1,4 @@ supabase_url = "https://lalalala.supabase.co" supabase_service_key = "lalalala" -openai_api_key = "sk-lalalala" \ No newline at end of file +openai_api_key = "sk-lalalala" +anthropic_api_key = "" diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..3ecb8fcd5 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.autopep8" + }, + "python.formatting.provider": "none" +} diff --git a/diff.txt b/diff.txt new file mode 100644 index 000000000..b1edd2c65 --- /dev/null +++ b/diff.txt @@ -0,0 +1,163 @@ +diff --git a/.streamlit/secrets.toml.example b/.streamlit/secrets.toml.example +index 093c5bf..4c405dc 100644 +--- a/.streamlit/secrets.toml.example ++++ b/.streamlit/secrets.toml.example +@@ -1,3 +1,4 @@ + supabase_url = "https://lalalala.supabase.co" + supabase_service_key = "lalalala" +-openai_api_key = "sk-lalalala" +\ No newline at end of file ++openai_api_key = "sk-lalalala" ++anthropic_api_key = "" +diff --git a/main.py b/main.py +index 6ed2560..88af1c5 100644 +--- a/main.py ++++ b/main.py +@@ -10,25 +10,30 @@ from langchain.embeddings.openai import OpenAIEmbeddings + from langchain.vectorstores import SupabaseVectorStore + from supabase import Client, create_client + +-# supabase_url = "https://fqgpcifsfmamprzldyiv.supabase.co" + supabase_url = st.secrets.supabase_url + supabase_key = st.secrets.supabase_service_key + openai_api_key = st.secrets.openai_api_key ++anthropic_api_key = st.secrets.anthropic_api_key + supabase: Client = create_client(supabase_url, supabase_key) + + embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) +-vector_store = SupabaseVectorStore(supabase, embeddings, table_name="documents") ++vector_store = SupabaseVectorStore( ++ supabase, embeddings, table_name="documents") ++models = ["gpt-3.5-turbo", "gpt-4"] ++if anthropic_api_key: ++ models += ["claude-v1", "claude-v1.3", ++ "claude-instant-v1-100k", "claude-instant-v1.1-100k"] + + # Set the theme + st.set_page_config( + page_title="Second Brain", + layout="wide", + initial_sidebar_state="expanded", +- + ) + + st.title("🧠 Second Brain 🧠") +-st.markdown("Store your knowledge in a vector store and query it with OpenAI's GPT-3/4.") ++st.markdown( ++ "Store your knowledge in a vector store and query it with OpenAI's GPT-3/4.") + st.markdown("---\n\n") + + # Initialize session state variables +@@ -40,31 +45,40 @@ if 'chunk_size' not in st.session_state: + st.session_state['chunk_size'] = 500 + if 'chunk_overlap' not in st.session_state: + st.session_state['chunk_overlap'] = 0 ++if 'max_tokens' not in st.session_state: ++ st.session_state['max_tokens'] = 256 + + # Create a radio button for user to choose between adding knowledge or asking a question +-user_choice = st.radio("Choose an action", ('Add Knowledge', 'Chat with your Brain','Forget' )) ++user_choice = st.radio( ++ "Choose an action", ('Add Knowledge', 'Chat with your Brain', 'Forget')) + + st.markdown("---\n\n") + +- +- + if user_choice == 'Add Knowledge': + # Display chunk size and overlap selection only when adding knowledge +- st.sidebar.title("Configuration") +- st.sidebar.markdown("Choose your chunk size and overlap for adding knowledge.") +- st.session_state['chunk_size'] = st.sidebar.slider("Select Chunk Size", 100, 1000, st.session_state['chunk_size'], 50) +- st.session_state['chunk_overlap'] = st.sidebar.slider("Select Chunk Overlap", 0, 100, st.session_state['chunk_overlap'], 10) ++ st.sidebar.title("Configuration") ++ st.sidebar.markdown( ++ "Choose your chunk size and overlap for adding knowledge.") ++ st.session_state['chunk_size'] = st.sidebar.slider( ++ "Select Chunk Size", 100, 1000, st.session_state['chunk_size'], 50) ++ st.session_state['chunk_overlap'] = st.sidebar.slider( ++ "Select Chunk Overlap", 0, 100, st.session_state['chunk_overlap'], 10) + file_uploader(supabase, openai_api_key, vector_store) + elif user_choice == 'Chat with your Brain': + # Display model and temperature selection only when asking questions +- st.sidebar.title("Configuration") +- st.sidebar.markdown("Choose your model and temperature for asking questions.") +- st.session_state['model'] = st.sidebar.selectbox("Select Model", ["gpt-3.5-turbo", "gpt-4"], index=("gpt-3.5-turbo", "gpt-4").index(st.session_state['model'])) +- st.session_state['temperature'] = st.sidebar.slider("Select Temperature", 0.0, 1.0, st.session_state['temperature'], 0.1) +- chat_with_doc(openai_api_key, vector_store) ++ st.sidebar.title("Configuration") ++ st.sidebar.markdown( ++ "Choose your model and temperature for asking questions.") ++ st.session_state['model'] = st.sidebar.selectbox( ++ "Select Model", models, index=(models).index(st.session_state['model'])) ++ st.session_state['temperature'] = st.sidebar.slider( ++ "Select Temperature", 0.0, 1.0, st.session_state['temperature'], 0.1) ++ st.session_state['max_tokens'] = st.sidebar.slider( ++ "Select Max Tokens", 256, 2048, st.session_state['max_tokens'], 2048) ++ chat_with_doc(st.session_state['model'], vector_store) + elif user_choice == 'Forget': + st.sidebar.title("Configuration") +- ++ + brain(supabase) + +-st.markdown("---\n\n") +\ No newline at end of file ++st.markdown("---\n\n") +diff --git a/question.py b/question.py +index 8e875f6..6f8e9d3 100644 +--- a/question.py ++++ b/question.py +@@ -1,14 +1,35 @@ + import streamlit as st ++from streamlit.logger import get_logger + from langchain.chains import ConversationalRetrievalChain + from langchain.memory import ConversationBufferMemory + from langchain.llms import OpenAI ++from langchain.chat_models import ChatAnthropic ++from langchain.vectorstores import SupabaseVectorStore + +-memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) ++memory = ConversationBufferMemory( ++ memory_key="chat_history", return_messages=True) ++openai_api_key = st.secrets.openai_api_key ++anthropic_api_key = st.secrets.anthropic_api_key ++logger = get_logger(__name__) + +-def chat_with_doc(openai_api_key, vector_store): +- question = st.text_input("## Ask a question") ++ ++def chat_with_doc(model, vector_store: SupabaseVectorStore): ++ question = st.text_area("## Ask a question") + button = st.button("Ask") + if button: +- qa = ConversationalRetrievalChain.from_llm(OpenAI(model_name=st.session_state['model'], openai_api_key=openai_api_key, temperature=st.session_state['temperature']), vector_store.as_retriever(), memory=memory) +- result = qa({"question": question}) +- st.write(result["answer"]) +\ No newline at end of file ++ if model.startswith("gpt"): ++ logger.info('Using OpenAI model %s', model) ++ qa = ConversationalRetrievalChain.from_llm( ++ OpenAI( ++ model_name=st.session_state['model'], openai_api_key=openai_api_key, temperature=st.session_state['temperature'], max_tokens=st.session_state['max_tokens']), vector_store.as_retriever(), memory=memory, verbose=True) ++ result = qa({"question": question}) ++ logger.info('Result: %s', result) ++ st.write(result["answer"]) ++ elif anthropic_api_key and model.startswith("claude"): ++ logger.info('Using Anthropics model %s', model) ++ qa = ConversationalRetrievalChain.from_llm( ++ ChatAnthropic( ++ model=st.session_state['model'], anthropic_api_key=anthropic_api_key, temperature=st.session_state['temperature'], max_tokens_to_sample=st.session_state['max_tokens']), vector_store.as_retriever(), memory=memory, verbose=True, max_tokens_limit=102400) ++ result = qa({"question": question}) ++ logger.info('Result: %s', result) ++ st.write(result["answer"]) +diff --git a/requirements.txt b/requirements.txt +index 981d00b..ea8f1f6 100644 +--- a/requirements.txt ++++ b/requirements.txt +@@ -8,4 +8,4 @@ StrEnum==0.4.10 + supabase==1.0.3 + tiktoken==0.4.0 + unstructured==0.6.5 +- ++anthropic=0.2.8 diff --git a/main.py b/main.py index 6ed2560ea..88af1c595 100644 --- a/main.py +++ b/main.py @@ -10,25 +10,30 @@ from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import SupabaseVectorStore from supabase import Client, create_client -# supabase_url = "https://fqgpcifsfmamprzldyiv.supabase.co" supabase_url = st.secrets.supabase_url supabase_key = st.secrets.supabase_service_key openai_api_key = st.secrets.openai_api_key +anthropic_api_key = st.secrets.anthropic_api_key supabase: Client = create_client(supabase_url, supabase_key) embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) -vector_store = SupabaseVectorStore(supabase, embeddings, table_name="documents") +vector_store = SupabaseVectorStore( + supabase, embeddings, table_name="documents") +models = ["gpt-3.5-turbo", "gpt-4"] +if anthropic_api_key: + models += ["claude-v1", "claude-v1.3", + "claude-instant-v1-100k", "claude-instant-v1.1-100k"] # Set the theme st.set_page_config( page_title="Second Brain", layout="wide", initial_sidebar_state="expanded", - ) st.title("🧠 Second Brain 🧠") -st.markdown("Store your knowledge in a vector store and query it with OpenAI's GPT-3/4.") +st.markdown( + "Store your knowledge in a vector store and query it with OpenAI's GPT-3/4.") st.markdown("---\n\n") # Initialize session state variables @@ -40,31 +45,40 @@ if 'chunk_size' not in st.session_state: st.session_state['chunk_size'] = 500 if 'chunk_overlap' not in st.session_state: st.session_state['chunk_overlap'] = 0 +if 'max_tokens' not in st.session_state: + st.session_state['max_tokens'] = 256 # Create a radio button for user to choose between adding knowledge or asking a question -user_choice = st.radio("Choose an action", ('Add Knowledge', 'Chat with your Brain','Forget' )) +user_choice = st.radio( + "Choose an action", ('Add Knowledge', 'Chat with your Brain', 'Forget')) st.markdown("---\n\n") - - if user_choice == 'Add Knowledge': # Display chunk size and overlap selection only when adding knowledge - st.sidebar.title("Configuration") - st.sidebar.markdown("Choose your chunk size and overlap for adding knowledge.") - st.session_state['chunk_size'] = st.sidebar.slider("Select Chunk Size", 100, 1000, st.session_state['chunk_size'], 50) - st.session_state['chunk_overlap'] = st.sidebar.slider("Select Chunk Overlap", 0, 100, st.session_state['chunk_overlap'], 10) + st.sidebar.title("Configuration") + st.sidebar.markdown( + "Choose your chunk size and overlap for adding knowledge.") + st.session_state['chunk_size'] = st.sidebar.slider( + "Select Chunk Size", 100, 1000, st.session_state['chunk_size'], 50) + st.session_state['chunk_overlap'] = st.sidebar.slider( + "Select Chunk Overlap", 0, 100, st.session_state['chunk_overlap'], 10) file_uploader(supabase, openai_api_key, vector_store) elif user_choice == 'Chat with your Brain': # Display model and temperature selection only when asking questions - st.sidebar.title("Configuration") - st.sidebar.markdown("Choose your model and temperature for asking questions.") - st.session_state['model'] = st.sidebar.selectbox("Select Model", ["gpt-3.5-turbo", "gpt-4"], index=("gpt-3.5-turbo", "gpt-4").index(st.session_state['model'])) - st.session_state['temperature'] = st.sidebar.slider("Select Temperature", 0.0, 1.0, st.session_state['temperature'], 0.1) - chat_with_doc(openai_api_key, vector_store) + st.sidebar.title("Configuration") + st.sidebar.markdown( + "Choose your model and temperature for asking questions.") + st.session_state['model'] = st.sidebar.selectbox( + "Select Model", models, index=(models).index(st.session_state['model'])) + st.session_state['temperature'] = st.sidebar.slider( + "Select Temperature", 0.0, 1.0, st.session_state['temperature'], 0.1) + st.session_state['max_tokens'] = st.sidebar.slider( + "Select Max Tokens", 256, 2048, st.session_state['max_tokens'], 2048) + chat_with_doc(st.session_state['model'], vector_store) elif user_choice == 'Forget': st.sidebar.title("Configuration") - + brain(supabase) -st.markdown("---\n\n") \ No newline at end of file +st.markdown("---\n\n") diff --git a/question.py b/question.py index 8e875f67e..6f8e9d340 100644 --- a/question.py +++ b/question.py @@ -1,14 +1,35 @@ import streamlit as st +from streamlit.logger import get_logger from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory from langchain.llms import OpenAI +from langchain.chat_models import ChatAnthropic +from langchain.vectorstores import SupabaseVectorStore -memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) +memory = ConversationBufferMemory( + memory_key="chat_history", return_messages=True) +openai_api_key = st.secrets.openai_api_key +anthropic_api_key = st.secrets.anthropic_api_key +logger = get_logger(__name__) -def chat_with_doc(openai_api_key, vector_store): - question = st.text_input("## Ask a question") + +def chat_with_doc(model, vector_store: SupabaseVectorStore): + question = st.text_area("## Ask a question") button = st.button("Ask") if button: - qa = ConversationalRetrievalChain.from_llm(OpenAI(model_name=st.session_state['model'], openai_api_key=openai_api_key, temperature=st.session_state['temperature']), vector_store.as_retriever(), memory=memory) - result = qa({"question": question}) - st.write(result["answer"]) \ No newline at end of file + if model.startswith("gpt"): + logger.info('Using OpenAI model %s', model) + qa = ConversationalRetrievalChain.from_llm( + OpenAI( + model_name=st.session_state['model'], openai_api_key=openai_api_key, temperature=st.session_state['temperature'], max_tokens=st.session_state['max_tokens']), vector_store.as_retriever(), memory=memory, verbose=True) + result = qa({"question": question}) + logger.info('Result: %s', result) + st.write(result["answer"]) + elif anthropic_api_key and model.startswith("claude"): + logger.info('Using Anthropics model %s', model) + qa = ConversationalRetrievalChain.from_llm( + ChatAnthropic( + model=st.session_state['model'], anthropic_api_key=anthropic_api_key, temperature=st.session_state['temperature'], max_tokens_to_sample=st.session_state['max_tokens']), vector_store.as_retriever(), memory=memory, verbose=True, max_tokens_limit=102400) + result = qa({"question": question}) + logger.info('Result: %s', result) + st.write(result["answer"]) diff --git a/requirements.txt b/requirements.txt index 981d00b21..f20890a8e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,4 @@ StrEnum==0.4.10 supabase==1.0.3 tiktoken==0.4.0 unstructured==0.6.5 - +anthropic==0.2.8