From 8f85b110aa43c17694a103df81fdf9dc098f6c19 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Sun, 12 Feb 2023 17:50:57 +0100 Subject: [PATCH] Split Clickhouse pools into Read-Only and Read/Write (dedicated to writes) (#2661) * Configure ingest repo access/pool size If I'm not mistaken 3 is a sane default, the only inserts we're doing are: - session buffer dump - events buffer dump - GA import dump And all are serializable within their scopes? * Add IngestRepo * Start IngestRepo * Use IngestRepo for inserts * Annotate ClickhouseRepo as read_only So no insert* functions are expanded * Update moduledoc * rename alias * Fix default env var value so it can be casted * Use IngestRepo for migrations * Set default ingest pool size from 3 to 5 in case conns are restarting or else... * Ensure all Repo prometheus metrics are collected --- config/config.exs | 2 +- config/runtime.exs | 17 ++++++++++++++++- lib/plausible/application.ex | 1 + lib/plausible/clickhouse_repo.ex | 3 ++- lib/plausible/event/write_buffer.ex | 8 +++++--- lib/plausible/google/buffer.ex | 2 +- lib/plausible/ingest_repo.ex | 17 +++++++++++++++++ lib/plausible/prom_ex.ex | 2 +- lib/plausible/session/write_buffer.ex | 8 +++++--- ...0200915070607_create_events_and_sessions.exs | 0 .../migrations/20200918075025_add_utm_tags.exs | 0 .../20201020083739_add_event_metadata.exs | 0 ...25234_add_browser_version_and_os_version.exs | 0 .../migrations/20210323130440_add_sample_by.exs | 0 ...20210712214034_add_more_location_details.exs | 0 .../20211017093035_add_utm_content_and_term.exs | 0 .../20211112130238_create_imported_tables.exs | 0 .../20220310104931_add_transferred_from.exs | 0 ...0220404123000_add_entry_props_to_session.exs | 0 .../20220421161259_remove_entry_props.exs | 0 .../20220422075510_add_entry_props.exs | 0 ...0348_add_city_name_to_imported_locations.exs | 0 test/plausible/imported/imported_test.exs | 2 +- test/support/test_utils.ex | 8 ++++---- 24 files changed, 54 insertions(+), 16 deletions(-) create mode 100644 lib/plausible/ingest_repo.ex rename priv/{clickhouse_repo => ingest_repo}/migrations/20200915070607_create_events_and_sessions.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20200918075025_add_utm_tags.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20201020083739_add_event_metadata.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20201106125234_add_browser_version_and_os_version.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20210323130440_add_sample_by.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20210712214034_add_more_location_details.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20211017093035_add_utm_content_and_term.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20211112130238_create_imported_tables.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20220310104931_add_transferred_from.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20220404123000_add_entry_props_to_session.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20220421161259_remove_entry_props.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20220422075510_add_entry_props.exs (100%) rename priv/{clickhouse_repo => ingest_repo}/migrations/20230124140348_add_city_name_to_imported_locations.exs (100%) diff --git a/config/config.exs b/config/config.exs index a514f79ad..a07f5c9a1 100644 --- a/config/config.exs +++ b/config/config.exs @@ -1,7 +1,7 @@ import Config config :plausible, - ecto_repos: [Plausible.Repo, Plausible.ClickhouseRepo] + ecto_repos: [Plausible.Repo, Plausible.IngestRepo] config :plausible, PlausibleWeb.Endpoint, pubsub_server: Plausible.PubSub, diff --git a/config/runtime.exs b/config/runtime.exs index 8e6df2e44..94db325cf 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -81,6 +81,14 @@ ch_db_url = "http://plausible_events_db:8123/plausible_events_db" ) +{ingest_pool_size, ""} = + get_var_from_path_or_env( + config_dir, + "CLICKHOUSE_INGEST_POOL_SIZE", + "5" + ) + |> Integer.parse() + {ch_flush_interval_ms, ""} = config_dir |> get_var_from_path_or_env("CLICKHOUSE_FLUSH_INTERVAL_MS", "5000") @@ -268,12 +276,19 @@ config :plausible, :google, max_buffer_size: get_int_from_path_or_env(config_dir, "GOOGLE_MAX_BUFFER_SIZE", 10_000) config :plausible, Plausible.ClickhouseRepo, + loggers: [Ecto.LogEntry], + queue_target: 500, + queue_interval: 2000, + url: ch_db_url + +config :plausible, Plausible.IngestRepo, loggers: [Ecto.LogEntry], queue_target: 500, queue_interval: 2000, url: ch_db_url, flush_interval_ms: ch_flush_interval_ms, - max_buffer_size: ch_max_buffer_size + max_buffer_size: ch_max_buffer_size, + pool_size: ingest_pool_size case mailer_adapter do "Bamboo.PostmarkAdapter" -> diff --git a/lib/plausible/application.ex b/lib/plausible/application.ex index 24e85432c..fa9660d93 100644 --- a/lib/plausible/application.ex +++ b/lib/plausible/application.ex @@ -9,6 +9,7 @@ defmodule Plausible.Application do children = [ Plausible.Repo, Plausible.ClickhouseRepo, + Plausible.IngestRepo, {Finch, name: Plausible.Finch, pools: finch_pool_config()}, {Phoenix.PubSub, name: Plausible.PubSub}, Plausible.Session.Salts, diff --git a/lib/plausible/clickhouse_repo.ex b/lib/plausible/clickhouse_repo.ex index 67cfa61bf..cc8f5f206 100644 --- a/lib/plausible/clickhouse_repo.ex +++ b/lib/plausible/clickhouse_repo.ex @@ -1,7 +1,8 @@ defmodule Plausible.ClickhouseRepo do use Ecto.Repo, otp_app: :plausible, - adapter: ClickhouseEcto + adapter: ClickhouseEcto, + read_only: true defmacro __using__(_) do quote do diff --git a/lib/plausible/event/write_buffer.ex b/lib/plausible/event/write_buffer.ex index 6dc3e7ed8..ccb3fcb25 100644 --- a/lib/plausible/event/write_buffer.ex +++ b/lib/plausible/event/write_buffer.ex @@ -2,6 +2,8 @@ defmodule Plausible.Event.WriteBuffer do use GenServer require Logger + alias Plausible.IngestRepo + def start_link(_opts) do GenServer.start_link(__MODULE__, [], name: __MODULE__) end @@ -62,15 +64,15 @@ defmodule Plausible.Event.WriteBuffer do events -> Logger.info("Flushing #{length(events)} events") events = Enum.map(events, &(Map.from_struct(&1) |> Map.delete(:__meta__))) - Plausible.ClickhouseRepo.insert_all(Plausible.ClickhouseEvent, events) + IngestRepo.insert_all(Plausible.ClickhouseEvent, events) end end defp flush_interval_ms() do - Keyword.fetch!(Application.get_env(:plausible, Plausible.ClickhouseRepo), :flush_interval_ms) + Keyword.fetch!(Application.get_env(:plausible, IngestRepo), :flush_interval_ms) end defp max_buffer_size() do - Keyword.fetch!(Application.get_env(:plausible, Plausible.ClickhouseRepo), :max_buffer_size) + Keyword.fetch!(Application.get_env(:plausible, IngestRepo), :max_buffer_size) end end diff --git a/lib/plausible/google/buffer.ex b/lib/plausible/google/buffer.ex index 9000c4909..b43160dfa 100644 --- a/lib/plausible/google/buffer.ex +++ b/lib/plausible/google/buffer.ex @@ -91,6 +91,6 @@ defmodule Plausible.Google.Buffer do Process.sleep(1000) Logger.info("Import: Flushing #{length(records)} from #{table_name} buffer") - Plausible.ClickhouseRepo.insert_all(table_name, records) + Plausible.IngestRepo.insert_all(table_name, records) end end diff --git a/lib/plausible/ingest_repo.ex b/lib/plausible/ingest_repo.ex new file mode 100644 index 000000000..7928bd901 --- /dev/null +++ b/lib/plausible/ingest_repo.ex @@ -0,0 +1,17 @@ +defmodule Plausible.IngestRepo do + @moduledoc """ + Write-centric Clickhouse access interface + """ + + use Ecto.Repo, + otp_app: :plausible, + adapter: ClickhouseEcto + + defmacro __using__(_) do + quote do + alias Plausible.IngestRepo + import Ecto + import Ecto.Query, only: [from: 1, from: 2] + end + end +end diff --git a/lib/plausible/prom_ex.ex b/lib/plausible/prom_ex.ex index 550b5c05a..cf903eb01 100644 --- a/lib/plausible/prom_ex.ex +++ b/lib/plausible/prom_ex.ex @@ -9,7 +9,7 @@ defmodule Plausible.PromEx do Plugins.Application, Plugins.Beam, {Plugins.Phoenix, router: PlausibleWeb.Router, endpoint: PlausibleWeb.Endpoint}, - Plugins.Ecto, + {Plugins.Ecto, repos: [Plausible.Repo, Plausible.ClickhouseRepo, Plausible.IngestRepo]}, Plugins.Oban, Plausible.PromEx.Plugins.PlausibleMetrics ] diff --git a/lib/plausible/session/write_buffer.ex b/lib/plausible/session/write_buffer.ex index 563b6d7a8..de23a6802 100644 --- a/lib/plausible/session/write_buffer.ex +++ b/lib/plausible/session/write_buffer.ex @@ -2,6 +2,8 @@ defmodule Plausible.Session.WriteBuffer do use GenServer require Logger + alias Plausible.IngestRepo + def start_link(_opts) do GenServer.start_link(__MODULE__, [], name: __MODULE__) end @@ -67,15 +69,15 @@ defmodule Plausible.Session.WriteBuffer do |> Enum.map(&(Map.from_struct(&1) |> Map.delete(:__meta__))) |> Enum.reverse() - Plausible.ClickhouseRepo.insert_all(Plausible.ClickhouseSession, sessions) + IngestRepo.insert_all(Plausible.ClickhouseSession, sessions) end end defp flush_interval_ms() do - Keyword.fetch!(Application.get_env(:plausible, Plausible.ClickhouseRepo), :flush_interval_ms) + Keyword.fetch!(Application.get_env(:plausible, IngestRepo), :flush_interval_ms) end defp max_buffer_size() do - Keyword.fetch!(Application.get_env(:plausible, Plausible.ClickhouseRepo), :max_buffer_size) + Keyword.fetch!(Application.get_env(:plausible, IngestRepo), :max_buffer_size) end end diff --git a/priv/clickhouse_repo/migrations/20200915070607_create_events_and_sessions.exs b/priv/ingest_repo/migrations/20200915070607_create_events_and_sessions.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20200915070607_create_events_and_sessions.exs rename to priv/ingest_repo/migrations/20200915070607_create_events_and_sessions.exs diff --git a/priv/clickhouse_repo/migrations/20200918075025_add_utm_tags.exs b/priv/ingest_repo/migrations/20200918075025_add_utm_tags.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20200918075025_add_utm_tags.exs rename to priv/ingest_repo/migrations/20200918075025_add_utm_tags.exs diff --git a/priv/clickhouse_repo/migrations/20201020083739_add_event_metadata.exs b/priv/ingest_repo/migrations/20201020083739_add_event_metadata.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20201020083739_add_event_metadata.exs rename to priv/ingest_repo/migrations/20201020083739_add_event_metadata.exs diff --git a/priv/clickhouse_repo/migrations/20201106125234_add_browser_version_and_os_version.exs b/priv/ingest_repo/migrations/20201106125234_add_browser_version_and_os_version.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20201106125234_add_browser_version_and_os_version.exs rename to priv/ingest_repo/migrations/20201106125234_add_browser_version_and_os_version.exs diff --git a/priv/clickhouse_repo/migrations/20210323130440_add_sample_by.exs b/priv/ingest_repo/migrations/20210323130440_add_sample_by.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20210323130440_add_sample_by.exs rename to priv/ingest_repo/migrations/20210323130440_add_sample_by.exs diff --git a/priv/clickhouse_repo/migrations/20210712214034_add_more_location_details.exs b/priv/ingest_repo/migrations/20210712214034_add_more_location_details.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20210712214034_add_more_location_details.exs rename to priv/ingest_repo/migrations/20210712214034_add_more_location_details.exs diff --git a/priv/clickhouse_repo/migrations/20211017093035_add_utm_content_and_term.exs b/priv/ingest_repo/migrations/20211017093035_add_utm_content_and_term.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20211017093035_add_utm_content_and_term.exs rename to priv/ingest_repo/migrations/20211017093035_add_utm_content_and_term.exs diff --git a/priv/clickhouse_repo/migrations/20211112130238_create_imported_tables.exs b/priv/ingest_repo/migrations/20211112130238_create_imported_tables.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20211112130238_create_imported_tables.exs rename to priv/ingest_repo/migrations/20211112130238_create_imported_tables.exs diff --git a/priv/clickhouse_repo/migrations/20220310104931_add_transferred_from.exs b/priv/ingest_repo/migrations/20220310104931_add_transferred_from.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20220310104931_add_transferred_from.exs rename to priv/ingest_repo/migrations/20220310104931_add_transferred_from.exs diff --git a/priv/clickhouse_repo/migrations/20220404123000_add_entry_props_to_session.exs b/priv/ingest_repo/migrations/20220404123000_add_entry_props_to_session.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20220404123000_add_entry_props_to_session.exs rename to priv/ingest_repo/migrations/20220404123000_add_entry_props_to_session.exs diff --git a/priv/clickhouse_repo/migrations/20220421161259_remove_entry_props.exs b/priv/ingest_repo/migrations/20220421161259_remove_entry_props.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20220421161259_remove_entry_props.exs rename to priv/ingest_repo/migrations/20220421161259_remove_entry_props.exs diff --git a/priv/clickhouse_repo/migrations/20220422075510_add_entry_props.exs b/priv/ingest_repo/migrations/20220422075510_add_entry_props.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20220422075510_add_entry_props.exs rename to priv/ingest_repo/migrations/20220422075510_add_entry_props.exs diff --git a/priv/clickhouse_repo/migrations/20230124140348_add_city_name_to_imported_locations.exs b/priv/ingest_repo/migrations/20230124140348_add_city_name_to_imported_locations.exs similarity index 100% rename from priv/clickhouse_repo/migrations/20230124140348_add_city_name_to_imported_locations.exs rename to priv/ingest_repo/migrations/20230124140348_add_city_name_to_imported_locations.exs diff --git a/test/plausible/imported/imported_test.exs b/test/plausible/imported/imported_test.exs index 04cd02e7f..e8cefb330 100644 --- a/test/plausible/imported/imported_test.exs +++ b/test/plausible/imported/imported_test.exs @@ -7,7 +7,7 @@ defmodule Plausible.ImportedTest do defp import_data(ga_data, site_id, table_name) do ga_data |> Plausible.Imported.from_google_analytics(site_id, table_name) - |> then(&Plausible.ClickhouseRepo.insert_all(table_name, &1)) + |> then(&Plausible.IngestRepo.insert_all(table_name, &1)) end describe "Parse and import third party data fetched from Google Analytics" do diff --git a/test/support/test_utils.ex b/test/support/test_utils.ex index 698c4e11b..92b9cc18c 100644 --- a/test/support/test_utils.ex +++ b/test/support/test_utils.ex @@ -71,7 +71,7 @@ defmodule Plausible.TestUtils do Factory.build(:pageview, pageview) |> Map.from_struct() |> Map.delete(:__meta__) end) - Plausible.ClickhouseRepo.insert_all("events", pageviews) + Plausible.IngestRepo.insert_all("events", pageviews) end def create_events(events) do @@ -80,7 +80,7 @@ defmodule Plausible.TestUtils do Factory.build(:event, event) |> Map.from_struct() |> Map.delete(:__meta__) end) - Plausible.ClickhouseRepo.insert_all("events", events) + Plausible.IngestRepo.insert_all("events", events) end def create_sessions(sessions) do @@ -89,7 +89,7 @@ defmodule Plausible.TestUtils do Factory.build(:ch_session, session) |> Map.from_struct() |> Map.delete(:__meta__) end) - Plausible.ClickhouseRepo.insert_all("sessions", sessions) + Plausible.IngestRepo.insert_all("sessions", sessions) end def log_in(%{user: user, conn: conn}) do @@ -163,7 +163,7 @@ defmodule Plausible.TestUtils do defp populate_imported_stats(events) do Enum.group_by(events, &Map.fetch!(&1, :table), &Map.delete(&1, :table)) - |> Enum.map(fn {table, events} -> Plausible.ClickhouseRepo.insert_all(table, events) end) + |> Enum.map(fn {table, events} -> Plausible.IngestRepo.insert_all(table, events) end) end def relative_time(shifts) do