diff --git a/community/boilerplates/observability/enterprise/grafana/dashboards/hasura/hasura-overview.json b/community/boilerplates/observability/enterprise/grafana/dashboards/hasura/hasura-overview.json
index 64b9b67893f..b04842f3762 100644
--- a/community/boilerplates/observability/enterprise/grafana/dashboards/hasura/hasura-overview.json
+++ b/community/boilerplates/observability/enterprise/grafana/dashboards/hasura/hasura-overview.json
@@ -3453,7 +3453,7 @@
         "type": "prometheus",
         "uid": "${DS_PROMETHEUS}"
       },
-      "description": "Total number of incoming requests for cache lookup",
+      "description": "Postgres connection errors from GraphQL Engine instances",
       "fieldConfig": {
         "defaults": {
           "color": {
@@ -3501,6 +3501,10 @@
               {
                 "color": "green",
                 "value": null
+              },
+              {
+                "color": "red",
+                "value": 1
               }
             ]
           },
@@ -3514,7 +3518,7 @@
         "x": 12,
         "y": 81
       },
-      "id": 57,
+      "id": 66,
       "options": {
         "legend": {
           "calcs": [],
@@ -3534,25 +3538,13 @@
             "uid": "${DS_PROMETHEUS}"
           },
           "editorMode": "code",
-          "expr": "sum(rate(hasura_cache_request_count{job=~\"$job\",instance=~\"$instance\",status=\"hit\"}[$__rate_interval]))",
-          "legendFormat": "Cache Hit",
+          "expr": "sum by (job, role,conn_info,source_name) (increase(hasura_postgres_connection_error_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) ",
+          "legendFormat": "__auto",
           "range": true,
-          "refId": "Hit"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "editorMode": "code",
-          "expr": "sum(rate(hasura_cache_request_count{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))",
-          "hide": false,
-          "legendFormat": "Total",
-          "range": true,
-          "refId": "Total"
+          "refId": "A"
         }
       ],
-      "title": "Cache Request Rate",
+      "title": "Postgres Connectionr Errors",
       "type": "timeseries"
     },
     {
@@ -3748,6 +3740,113 @@
       ],
       "title": "Postgres Pool Wait Time (P95)",
       "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "Total number of incoming requests for cache lookup",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineStyle": {
+              "fill": "solid"
+            },
+            "lineWidth": 1,
+            "pointSize": 1,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 12,
+        "x": 0,
+        "y": 95
+      },
+      "id": 57,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(hasura_cache_request_count{job=~\"$job\",instance=~\"$instance\",status=\"hit\"}[$__rate_interval]))",
+          "legendFormat": "Cache Hit",
+          "range": true,
+          "refId": "Hit"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(hasura_cache_request_count{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))",
+          "hide": false,
+          "legendFormat": "Total",
+          "range": true,
+          "refId": "Total"
+        }
+      ],
+      "title": "Cache Request Rate",
+      "type": "timeseries"
     }
   ],
   "refresh": "",
@@ -3832,6 +3931,6 @@
   "timezone": "",
   "title": "Hasura Overview",
   "uid": "Of9GFjr7z",
-  "version": 2,
+  "version": 1,
   "weekStart": ""
 }
\ No newline at end of file
diff --git a/docs/docs/observability/enterprise-edition/prometheus/metrics.mdx b/docs/docs/observability/enterprise-edition/prometheus/metrics.mdx
index 7979050333b..881d0bb1a81 100644
--- a/docs/docs/observability/enterprise-edition/prometheus/metrics.mdx
+++ b/docs/docs/observability/enterprise-edition/prometheus/metrics.mdx
@@ -485,6 +485,16 @@ The time taken to acquire a connection from the pool.
 | Labels | `source_name`: name of the database<br />`conn_info`: connection url string (password omitted) or name of the connection url environment variable<br />`role`: primary \| replica |
 | Unit   | seconds                                                                                                                                                                           |
 
+#### Hasura Postgres Connection Errors Total 
+
+Total number of PostgreSQL connection errors.
+
+|        |                                                                                                                                                                                   |
+| ------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Name   | `hasura_postgres_connection_error_total`                                                                                                                                          |
+| Type   | Counter                                                                                                                                                                           |
+| Labels | `source_name`: name of the database<br />`conn_info`: connection url string (password omitted) or name of the connection url environment variable<br />`role`: primary \| replica |
+
 ### Hasura source health
 
 Health check status of a particular data source, corresponding to the output of `/healthz/sources`, with possible values
diff --git a/rfcs/v3/engine-plugins.md b/rfcs/v3/engine-plugins.md
deleted file mode 100644
index 21a70f4aeb4..00000000000
--- a/rfcs/v3/engine-plugins.md
+++ /dev/null
@@ -1,346 +0,0 @@
-# Engine-plugins in Hasura V3
-
-This document focuses on the implementation details for HTTP-based engine
-plugins.
-
-## Pre-parse Hook
-
-For a pre-parse plugin, the request to the plugin is performed just after
-receiving the request to the engine.
-
-### Configuration
-
-The pre-parse plugin can be configured using an OpenDD object of kind `LifecyclePluginHook`. It includes the following information:
-
-1. The engine-plugin URL
-2. Request Includes (this can be used to optimize critical engine plugins):
-    1. Request Headers
-    2. Graphql request
-    3. Variables
-
-Please note that the presence of `operationName` is not configurable, and
-including/excluding operation name won't have much impact on the request size.
-
-An example of configuration JSON is:
-
-```json
-{
-  "kind": "LifecyclePluginHook",
-  "version": "v1",
-  "definition": {
-    "pre": "parse",
-    "name": "test",
-    "url": "http://localhost:8787",
-    "config": {
-      "request": {
-        "headers": {
-          "additional": {
-            "hasura-m-auth": {
-              "value": "zZkhKqFjqXR4g5MZCsJUZCnhCcoPyZ"
-            }
-          }
-        },
-        "session": {},
-        "rawRequest": {
-          "query": {},
-          "variables": {}
-        }
-      }
-    }
-  }
-}
-```
-
-### Request
-
-The request to the pre-parse hook should have sufficient information to cater to
-the following planned use cases:
-
-1. Rate limits
-2. Depth limits
-3. Node limits
-4. Caching (get-cache)
-
-The request should have the following:
-
-1. Headers: Include information for the uniqueness of the request (origin,
-   session variables, etc.), cache control information, etc.
-2. Hasura’s session information: Role and session variables
-3. Raw request: Raw request received by graphql-engine (including variables)
-
-```json
-{
-  "session": <the hasura session object>,
-  "rawRequest": <raw request>
-}
-```
-
-### Response
-
-The response of a pre-parse hook can be of three types:
-
-1. Return with a response: The engine-plugin has handled the request, and the
-   graphql-engine should return the response provided by the engine-plugin.
-   (Should we check if the response is valid according to the spec?)
-2. Continue with the execution: The graphql-engine should proceed with the
-   request handling.
-3. Error response: Abort the request with the error response.
-
-As suggested by @SamirTalwar, we can also use HTTP status codes to decide the
-type of the response, i.e.
-
-1. 200s HTTP status code will mean either:
-   1. 200: A successful response
-   2. 204: Or continued execution
-2. 400 HTTP status code will mean user error
-3. 500 HTTP status code will mean an internal error
-
-#### Success response
-
-HTTP code: 200
-
-```
-<the response json value>
-```
-
-#### Continue with execution
-
-HTTP code: 204 There should be no response body for this case
-
-#### Error
-
-A pre-plugin response can be of two types:
-
-1. User error: This will include errors that can be propagated to the user.
-
-     HTTP code: 400
-
-     ```
-     <The user error json value>
-     ```
-
-2. Internal error: Internal errors are encountered while handling the request.
-   The engine-plugin can dictate the engine to either abort the execution or
-   continue with the request. The internal errors will not be propagated to the
-   users; they will only be part of the traces.
-
-     HTTP code: 500
-     ```json
-     {
-        "details": <The internal error json value>,
-        "action": <abort/continue>
-     }
-     ```
-
-## Pre-response hook
-
-A pre-response hook is called just before returning a response to the user. For
-now, we will have asynchronous pre-response hooks only.
-
-An asynchronous hook will be useful for the following use cases:
-
-1. Caching (cache-set)
-2. Custom business logic: Send mail/slack notifications for mutations
-
-### Configuration
-
-Like a pre-parse hook, a pre-response hook can also be configured using a
-configuration file. The request can be configured to omit a few fields if
-needed.
-
-An example of configuration JSON is:
-
-```json
-{
-  "kind": "LifecyclePluginHook",
-  "version": "v1",
-  "definition": {
-    "pre": "response",
-    "name": "test",
-    "url": "http://localhost:8787",
-    "config": {
-      "request": {
-        "headers": {
-          "additional": {
-            "hasura-m-auth": {
-              "value": "zZkhKqFjqXR4g5MZCsJUZCnhCcoPyZ"
-            }
-          }
-        },
-        "session": {},
-        "rawRequest": {
-          "query": {},
-          "variables": {}
-        },
-        "response": {}
-      }
-    }
-  }
-}
-```
-
-### Request
-
-A pre-response hook’s request can have the following fields:
-
-1. Raw request: The raw request for which the engine has generated the response.
-2. Session: The role and session variables
-3. Engine’s response: The response that we have generated after executing the
-   query.
-4. Request headers: This can be important for caching engine plugins
-
-```json
-{
-  "session": <the hasura session object>,
-  "rawRequest": <raw request>,
-  "response": <engine's response>
-}
-```
-
-### Response
-
-For asynchronous pre-response hook, the request can be either of the two:
-
-1. Success
-2. Error
-
-#### Async Success Response
-
-HTTP Code: 200s
-
-```
-There need not be any response body.
-```
-
-#### Async Error Response
-
-HTTP code 400s
-
-```
-<optional error details as JSON>
-```
-
-The error details will be part of the traces.
-
-## Multiple engine-plugins
-
-The engine can handle multiple engine plugins.
-
-### Pre-plugins
-
-For example, multiple pre-plugins can be thought of as a pipeline:
-
-```
-              _____________________       ______________________       __________________
-             |                     |     |                      |     |                  |
-  Request--->|  Pre-parse Plugin 1 |---->|  Pre-parse Plugin 2  |---->| Engine Execution |--...
-             |_____________________|     |______________________|     |__________________|
-```
-
-For plugin 2, we will do the following:
-
-- If plugin 1 responds successfully/error, we will NOT call plugin 2, and there
-  will be a short-circuit.
-- Only for the continued execution case will we call plugin 2.
-- The request to all the pre-plugin will be the same (the raw request and
-  session information are not going to change)
-
-### Pre-response
-
-Multiple pre-response engine plugins can also be handled. Since they are async
-in nature, we can execute them in parallel:
-
-```
-Engine execution ------> To the user
-                  |      ________________
-                  |     |      Async     |
-                  |---->|  Pre-response  |
-                  |     |     Plugin 1   |
-                  |     |________________|
-                  |      ________________
-                  |     |      Async     |
-                  |---->|  Pre-response  |
-                  |     |     Plugin 2   |
-                  |     |________________|
-                  ...
-```
-
-## How will this look in the metadata?
-
-Engine plugins will be part of the metadata (OpenDD). This will be more like the
-`AuthConfig` and will be handled while building the artifacts.
-
-The engine-plugin artifacts will be similar to how we store `AuthConfig`
-artifacts right now. We will have new artifacts (pre-parse and pre-response
-plugin artifacts).
-
-Each artifact will have a list of engine plugins in the order of execution. For
-example:
-
-```
- __________________
-|  ______________  |
-| | Pre-parse 1  | |                      ________________       ________________       __________________
-| |______________| |                     |                |     |                |     |                  |
-|  ______________  | =====>   Request--->|  Pre-Parse 1   |---->|  Pre-Parse 2   |---->| Engine Execution |--...
-| | Pre-parse 2  | |                     |________________|     |________________|     |__________________|
-| |______________| |
-|__________________|
-```
-
-For pre-response, the order doesn’t matter right now, but we will still maintain
-an order (to future-proof for synchronous pre-response).
-
-There are a few caveats with the ordering of engine plugins for the multitenant
-engine or DDN cloud: Auth plugin (once converted to an engine plugin, will
-always be executed first).
-
-## Future plans
-
-### Synchronous pre-response hook
-
-A synchronous hook can be useful for response transformation using something
-like kriti-lang.
-
-For synchronous pre-response hooks, the response can be similar to the pre-parse
-hook. I.e., it can be one of the three: Return with a response: The engine
-plugin has handled the request, and the graphql-engine should return the
-response provided by the engine plugin (and ignore the response generated by the
-engine). Return with engine’s response: The graphql-engine should proceed with
-the engine’s response. Error response: Abort the request with the error
-response.
-
-Synchronous pre-response engine-plugins will be daisy-chained with one another:
-
-```
-                      __________________       __________________
-                     |                  |     |                  |
-Engine execution --->|  pre-response 1  |---->|  pre-response 2  |----> ...
-                     |__________________|     |__________________|
-```
-
-For synchronous pre-response, the response will be the response from the
-previous node (i.e., for response 1, the response will be generated by the
-engine, but for pre-response 2, it will be dependent on pre-response 1). Here
-also, in case of an error response, we will short-circuit the execution stack.
-
-#### Mixing synchronous and asynchronous pre-response
-
-In case there are multiple synchronous as well as asynchronous pre-response, the
-execution stack will look like this: First, we will handle all the synchronous
-pre-response. In the end, we will handle the asynchronous ones.
-
-```
-                      _________________       _________________
-                     |      Sync       |     |      Sync       |
-Engine execution --->|  pre-response 1 |---->|  pre-response 2 |-------> To the user
-                     |_________________|     |_________________|  |      _________________
-                                                                  |     |      Async      |
-                                                                  |---->|  pre-response 1 |
-                                                                  |     |_________________|
-                                                                  |      _________________
-                                                                  |     |      Async      |
-                                                                  |---->|  pre-response 2 |
-                                                                  |     |_________________|
-                                                                  ...
-```
diff --git a/server/lib/pg-client/src/Database/PG/Query/Pool.hs b/server/lib/pg-client/src/Database/PG/Query/Pool.hs
index ff11ad16dc0..1f6688acaf3 100644
--- a/server/lib/pg-client/src/Database/PG/Query/Pool.hs
+++ b/server/lib/pg-client/src/Database/PG/Query/Pool.hs
@@ -59,6 +59,8 @@ import Language.Haskell.TH.Quote (QuasiQuoter (..))
 import Language.Haskell.TH.Syntax (Exp, Q, lift, qAddDependentFile, runIO)
 import System.Metrics.Distribution (Distribution)
 import System.Metrics.Distribution qualified as EKG.Distribution
+import System.Metrics.Prometheus.Counter (Counter)
+import System.Metrics.Prometheus.Counter qualified as Counter
 import System.Metrics.Prometheus.Histogram (Histogram)
 import System.Metrics.Prometheus.Histogram qualified as Histogram
 import Prelude
@@ -92,7 +94,9 @@ data PGPoolMetrics = PGPoolMetrics
   { -- | time taken to establish and initialise a PostgreSQL connection
     _pgConnAcquireLatencyMetric :: !Histogram,
     -- | time taken to acquire a connection from the pool
-    _poolWaitTimeMetric :: !Histogram
+    _poolWaitTimeMetric :: !Histogram,
+    -- | total number of PostgreSQL errors
+    _pgErrorTotalMetric :: !Counter
   }
 
 getInUseConnections :: PGPool -> IO Int
@@ -129,6 +133,7 @@ initPGPoolMetrics :: IO PGPoolMetrics
 initPGPoolMetrics = do
   _pgConnAcquireLatencyMetric <- Histogram.new histogramBuckets
   _poolWaitTimeMetric <- Histogram.new histogramBuckets
+  _pgErrorTotalMetric <- Counter.new
   pure PGPoolMetrics {..}
   where
     histogramBuckets = [0.000001, 0.0001, 0.01, 0.1, 0.3, 1, 3, 10, 30, 100]
@@ -151,7 +156,7 @@ initPGPool ci context cp logger = do
     retryP = mkPGRetryPolicy $ ciRetries ci
     creator stats metrics = do
       createdAt <- getCurrentTime
-      pqConn <- initPQConn ci logger
+      pqConn <- (initPQConn ci logger) `Exc.onException` (Counter.inc (_pgErrorTotalMetric metrics))
       connAcquiredAt <- getCurrentTime
       let connAcquiredMicroseconds = realToFrac (1000000 * diffUTCTime connAcquiredAt createdAt)
           connAcquiredSeconds = realToFrac $ diffUTCTime connAcquiredAt createdAt