diff --git a/.backend_env.example b/.backend_env.example
index 9881cea08..e0c66a81c 100644
--- a/.backend_env.example
+++ b/.backend_env.example
@@ -8,3 +8,9 @@ GOOGLE_APPLICATION_CREDENTIALS=/code/application_default_credentials.json
 GOOGLE_CLOUD_PROJECT=XXXXX to  be changed with your GCP id
 MAX_BRAIN_SIZE=52428800
 MAX_REQUESTS_NUMBER=200
+
+#Private LLM Variables
+PRIVATE=False
+MODEL_PATH=./local_models/ggml-gpt4all-j-v1.3-groovy.bin
+MODEL_N_CTX=1000
+MODEL_N_BATCH=8
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 1b80652eb..a0eb89fb1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,3 +51,6 @@ streamlit-demo/.streamlit/secrets.toml
 backend/pandoc-*
 **/.pandoc-*
 backend/application_default_credentials.json
+
+#local models
+backend/local_models/*
\ No newline at end of file
diff --git a/backend/Dockerfile b/backend/Dockerfile
index 01c652f49..a4a7d322e 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.11-buster
+FROM python:3.11-bullseye
 
 # Install GEOS library
 RUN apt-get update && apt-get install -y libgeos-dev
@@ -9,6 +9,17 @@ COPY ./requirements.txt /code/requirements.txt
 
 RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100
 
+# Install additional dependencies
+RUN apt-get install -y liblzma-dev cmake
+
+# Build GPT4All from source (required for GPT4All langchain bindings)
+RUN cd /tmp && git clone --recurse-submodules https://github.com/nomic-ai/gpt4all && \
+    cd gpt4all/gpt4all-backend/ && \
+    mkdir build && cd build && \
+    cmake .. && cmake --build . --parallel  && \
+    cd ../../gpt4all-bindings/python && \
+    pip3 install -e .
+
 COPY . /code/
 
-CMD ["uvicorn", "main:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
\ No newline at end of file
+CMD ["uvicorn", "main:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
diff --git a/backend/llm/brainpicking.py b/backend/llm/brainpicking.py
index f6a313410..d7c1e6422 100644
--- a/backend/llm/brainpicking.py
+++ b/backend/llm/brainpicking.py
@@ -1,33 +1,38 @@
 import os  # A module to interact with the OS
-from typing import Any, Dict, List  # For type hinting
+from typing import Any, Dict, List
+from models.settings import LLMSettings  # For type hinting
 
 # Importing various modules and classes from a custom library 'langchain' likely used for natural language processing
 from langchain.chains import ConversationalRetrievalChain, LLMChain
 from langchain.chains.question_answering import load_qa_chain
-from langchain.chains.router.llm_router import (LLMRouterChain,
-                                                RouterOutputParser)
-from langchain.chains.router.multi_prompt_prompt import \
-    MULTI_PROMPT_ROUTER_TEMPLATE
+from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
+from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
 from langchain.chat_models import ChatOpenAI, ChatVertexAI
 from langchain.chat_models.anthropic import ChatAnthropic
 from langchain.docstore.document import Document
+from langchain.embeddings.base import Embeddings
 from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.llms import OpenAI, VertexAI
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from langchain.llms import GPT4All
+from langchain.llms.base import LLM
 from langchain.memory import ConversationBufferMemory
 from langchain.vectorstores import SupabaseVectorStore
 from llm.prompt import LANGUAGE_PROMPT
 from llm.prompt.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
-from models.chats import \
-    ChatMessage  # Importing a custom ChatMessage class for handling chat messages
-from models.settings import \
-    BrainSettings  # Importing settings related to the 'brain'
-from pydantic import (BaseModel,  # For data validation and settings management
-                      BaseSettings)
-from supabase import (Client,  # For interacting with Supabase database
-                      create_client)
-from vectorstore.supabase import \
-    CustomSupabaseVectorStore  # Custom class for handling vector storage with Supabase
+from models.chats import (
+    ChatMessage,
+)  # Importing a custom ChatMessage class for handling chat messages
+from models.settings import BrainSettings  # Importing settings related to the 'brain'
+from pydantic import BaseModel  # For data validation and settings management
+from pydantic import BaseSettings
+from supabase import Client  # For interacting with Supabase database
+from supabase import create_client
+from vectorstore.supabase import (
+    CustomSupabaseVectorStore,
+)  # Custom class for handling vector storage with Supabase
+from logger import get_logger
 
+logger = get_logger(__name__)
 
 class AnswerConversationBufferMemory(ConversationBufferMemory):
     """
@@ -35,11 +40,12 @@ class AnswerConversationBufferMemory(ConversationBufferMemory):
     It overrides the save_context method to save the response using the 'answer' key in the outputs.
     Reference to some issue comment is given in the docstring.
     """
-    
+
     def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
         # Overriding the save_context method of the parent class
         return super(AnswerConversationBufferMemory, self).save_context(
-            inputs, {'response': outputs['answer']})
+            inputs, {"response": outputs["answer"]}
+        )
 
 
 def get_chat_history(inputs) -> str:
@@ -59,21 +65,22 @@ class BrainPicking(BaseModel):
     Main class for the Brain Picking functionality.
     It allows to initialize a Chat model, generate questions and retrieve answers using ConversationalRetrievalChain.
     """
-    
+
     # Default class attributes
     llm_name: str = "gpt-3.5-turbo"
     settings = BrainSettings()
+    llm_config = LLMSettings()
     embeddings: OpenAIEmbeddings = None
     supabase_client: Client = None
     vector_store: CustomSupabaseVectorStore = None
-    llm: ChatOpenAI = None
+    llm: LLM = None
     question_generator: LLMChain = None
     doc_chain: ConversationalRetrievalChain = None
-    
+
     class Config:
         # Allowing arbitrary types for class validation
         arbitrary_types_allowed = True
-    
+
     def init(self, model: str, user_id: str) -> "BrainPicking":
         """
         Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains.
@@ -82,15 +89,61 @@ class BrainPicking(BaseModel):
         :return: BrainPicking instance
         """
         self.embeddings = OpenAIEmbeddings(openai_api_key=self.settings.openai_api_key)
-        self.supabase_client = create_client(self.settings.supabase_url, self.settings.supabase_service_key)
+        self.supabase_client = create_client(
+            self.settings.supabase_url, self.settings.supabase_service_key
+        )
         self.vector_store = CustomSupabaseVectorStore(
-            self.supabase_client, self.embeddings, table_name="vectors", user_id=user_id)
-        self.llm = ChatOpenAI(temperature=0, model_name=model)
-        self.question_generator = LLMChain(llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT)
+            self.supabase_client,
+            self.embeddings,
+            table_name="vectors",
+            user_id=user_id,
+        )
+                    
+        self.llm = self._determine_llm(
+            private_model_args={
+                "model_path": self.llm_config.model_path,
+                "n_ctx": self.llm_config.model_n_ctx,
+                "n_batch": self.llm_config.model_n_batch,
+            },
+            private=self.llm_config.private,
+            model_name=self.llm_name,
+        )
+        self.question_generator = LLMChain(
+            llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT
+        )
         self.doc_chain = load_qa_chain(self.llm, chain_type="stuff")
         return self
-    
-    def _get_qa(self, chat_message: ChatMessage, user_openai_api_key) -> ConversationalRetrievalChain:
+
+    def _determine_llm(
+        self, private_model_args: dict, private: bool = False, model_name: str = None
+    ) -> LLM:
+        """
+        Determine the language model to be used.
+        :param model_name: Language model name to be used.
+        :param private_model_args: Dictionary containing model_path, n_ctx and n_batch.
+        :param private: Boolean value to determine if private model is to be used.
+        :return: Language model instance
+        """
+        if private:
+            model_path = private_model_args["model_path"]
+            model_n_ctx = private_model_args["n_ctx"]
+            model_n_batch = private_model_args["n_batch"]
+            
+            logger.info("Using private model: %s", model_path)
+
+            return GPT4All(
+                model=model_path,
+                n_ctx=model_n_ctx,
+                n_batch=model_n_batch,
+                backend="gptj",
+                verbose=True,
+            )
+        else:
+            return ChatOpenAI(temperature=0, model_name=model_name)
+
+    def _get_qa(
+        self, chat_message: ChatMessage, user_openai_api_key
+    ) -> ConversationalRetrievalChain:
         """
         Retrieves a QA chain for the given chat message and API key.
         :param chat_message: The chat message containing history.
@@ -100,12 +153,15 @@ class BrainPicking(BaseModel):
         # If user provided an API key, update the settings
         if user_openai_api_key is not None and user_openai_api_key != "":
             self.settings.openai_api_key = user_openai_api_key
-        
+
         # Initialize and return a ConversationalRetrievalChain
         qa = ConversationalRetrievalChain(
-                retriever=self.vector_store.as_retriever(),
-                max_tokens_limit=chat_message.max_tokens, question_generator=self.question_generator,
-                combine_docs_chain=self.doc_chain, get_chat_history=get_chat_history)
+            retriever=self.vector_store.as_retriever(),
+            max_tokens_limit=chat_message.max_tokens,
+            question_generator=self.question_generator,
+            combine_docs_chain=self.doc_chain,
+            get_chat_history=get_chat_history,
+        )
         return qa
 
     def generate_answer(self, chat_message: ChatMessage, user_openai_api_key) -> str:
@@ -119,15 +175,17 @@ class BrainPicking(BaseModel):
 
         # Get the QA chain
         qa = self._get_qa(chat_message, user_openai_api_key)
-        
+
         # Transform the chat history into a list of tuples
         for i in range(0, len(chat_message.history) - 1, 2):
             user_message = chat_message.history[i][1]
             assistant_message = chat_message.history[i + 1][1]
             transformed_history.append((user_message, assistant_message))
-        
+
         # Generate the model response using the QA chain
-        model_response = qa({"question": chat_message.question, "chat_history": transformed_history})
-        answer = model_response['answer']
+        model_response = qa(
+            {"question": chat_message.question, "chat_history": transformed_history}
+        )
+        answer = model_response["answer"]
 
         return answer
diff --git a/backend/models/settings.py b/backend/models/settings.py
index b65297a3b..8f9192614 100644
--- a/backend/models/settings.py
+++ b/backend/models/settings.py
@@ -13,6 +13,11 @@ class BrainSettings(BaseSettings):
     supabase_url: str
     supabase_service_key: str
 
+class LLMSettings(BaseSettings):
+    private: bool
+    model_path: str
+    model_n_ctx: int
+    model_n_batch: int
 
 def common_dependencies() -> dict:
     settings = BrainSettings()
diff --git a/backend/requirements.txt b/backend/requirements.txt
index 5b8ed2623..4a9817bdb 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -22,3 +22,4 @@ transformers==4.30.1
 asyncpg==0.27.0
 flake8==6.0.0
 flake8-black==0.3.6
+sentence_transformers>=2.0.0
diff --git a/backend/vectorstore/supabase.py b/backend/vectorstore/supabase.py
index 9f8c186af..0b2992cc9 100644
--- a/backend/vectorstore/supabase.py
+++ b/backend/vectorstore/supabase.py
@@ -1,15 +1,9 @@
 from typing import Any, List
 
-from langchain.chains import ConversationalRetrievalChain, LLMChain
-from langchain.chains.question_answering import load_qa_chain
-from langchain.chat_models import ChatOpenAI, ChatVertexAI
-from langchain.client import arun_on_dataset
 from langchain.docstore.document import Document
 from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.llms import OpenAI, VertexAI
-from langchain.prompts.prompt import PromptTemplate
 from langchain.vectorstores import SupabaseVectorStore
-from supabase import Client, create_client
+from supabase import Client
 
 
 class CustomSupabaseVectorStore(SupabaseVectorStore):
@@ -22,7 +16,6 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
     def similarity_search(
         self, 
         query: str, 
-        user_id: str = "none",
         table: str = "match_vectors", 
         k: int = 6, 
         threshold: float = 0.5, 
diff --git a/docs/docs/backend/llm/_category_.json b/docs/docs/backend/llm/_category_.json
new file mode 100644
index 000000000..174c44918
--- /dev/null
+++ b/docs/docs/backend/llm/_category_.json
@@ -0,0 +1,8 @@
+{
+  "label": "LLM",
+  "position": 1,
+  "link": {
+    "type": "generated-index",
+    "description": "How does the LLM (Large Language Model Work)?"
+  }
+}
diff --git a/docs/docs/backend/llm/private-llm.md b/docs/docs/backend/llm/private-llm.md
new file mode 100644
index 000000000..5283673ff
--- /dev/null
+++ b/docs/docs/backend/llm/private-llm.md
@@ -0,0 +1,23 @@
+---
+sidebar_position: 1
+---
+
+# Private LLM
+
+Quivr now has the capability to use a private LLM model powered by GPT4All (other open source models coming soon). 
+
+This is simular to the functionality provided by the PrivateGPT project.
+
+This means that your data never leaves the server. The LLM is downloaded to the server and runs inference on your question locally.
+
+## How to use
+Set the 'private' flag to True in the /backend/.env file. You can also set other model parameters in the .env file.
+
+Download the GPT4All model from [here](
+    https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin) and place it in the /backend/local_models folder.
+
+## Future Plans
+We are planning to add more models to the private LLM feature. We are also planning on using a local embedding model from Hugging Face to reduce our reliance on OpenAI's API.
+
+We will also be adding the ability to use a private LLM model from in the frontend and api. Currently it is only available if you self host the backend.
+